From a48212cb65e09669ed243581556529681cebba0a Mon Sep 17 00:00:00 2001 From: "iap10@labyrinth.cl.cam.ac.uk" Date: Mon, 24 Feb 2003 16:55:07 +0000 Subject: bitkeeper revision 1.93 (3e5a4e6bkPheUp3x1uufN2MS3LAB7A) Latest and Greatest version of XenoLinux based on the Linux-2.4.21-pre4 kernel. --- xenolinux-2.4.21-pre4-sparse/arch/xeno/Makefile | 93 ++ .../arch/xeno/boot/Makefile | 22 + xenolinux-2.4.21-pre4-sparse/arch/xeno/config.in | 127 +++ xenolinux-2.4.21-pre4-sparse/arch/xeno/defconfig | 405 +++++++ .../arch/xeno/drivers/block/Makefile | 3 + .../arch/xeno/drivers/block/xl_block.c | 490 +++++++++ .../arch/xeno/drivers/block/xl_block_test.c | 225 ++++ .../arch/xeno/drivers/console/Makefile | 3 + .../arch/xeno/drivers/console/console.c | 204 ++++ .../arch/xeno/drivers/dom0/Makefile | 3 + .../arch/xeno/drivers/dom0/dom0_block.c | 27 + .../arch/xeno/drivers/dom0/dom0_core.c | 334 ++++++ .../arch/xeno/drivers/dom0/dom0_memory.c | 368 +++++++ .../arch/xeno/drivers/dom0/dom0_ops.h | 80 ++ .../arch/xeno/drivers/dom0/vfr.c | 306 ++++++ .../arch/xeno/drivers/network/Makefile | 3 + .../arch/xeno/drivers/network/network.c | 443 ++++++++ .../arch/xeno/kernel/Makefile | 15 + .../arch/xeno/kernel/entry.S | 717 ++++++++++++ .../arch/xeno/kernel/head.S | 67 ++ .../arch/xeno/kernel/hypervisor.c | 118 ++ .../arch/xeno/kernel/i386_ksyms.c | 154 +++ .../arch/xeno/kernel/i387.c | 542 ++++++++++ .../arch/xeno/kernel/init_task.c | 33 + .../arch/xeno/kernel/ioport.c | 19 + .../arch/xeno/kernel/irq.c | 1136 ++++++++++++++++++++ .../arch/xeno/kernel/ldt.c | 26 + .../arch/xeno/kernel/process.c | 453 ++++++++ .../arch/xeno/kernel/ptrace.c | 454 ++++++++ .../arch/xeno/kernel/semaphore.c | 291 +++++ .../arch/xeno/kernel/setup.c | 995 +++++++++++++++++ .../arch/xeno/kernel/signal.c | 717 ++++++++++++ .../arch/xeno/kernel/sys_i386.c | 256 +++++ .../arch/xeno/kernel/time.c | 348 ++++++ .../arch/xeno/kernel/traps.c | 565 ++++++++++ .../arch/xeno/lib/Makefile | 15 + .../arch/xeno/lib/checksum.S | 496 +++++++++ .../arch/xeno/lib/dec_and_lock.c | 40 + xenolinux-2.4.21-pre4-sparse/arch/xeno/lib/delay.c | 47 + .../arch/xeno/lib/getuser.S | 73 ++ .../arch/xeno/lib/iodebug.c | 19 + .../arch/xeno/lib/memcpy.c | 19 + xenolinux-2.4.21-pre4-sparse/arch/xeno/lib/mmx.c | 399 +++++++ .../arch/xeno/lib/old-checksum.c | 19 + .../arch/xeno/lib/strstr.c | 31 + .../arch/xeno/lib/usercopy.c | 190 ++++ xenolinux-2.4.21-pre4-sparse/arch/xeno/mm/Makefile | 16 + .../arch/xeno/mm/extable.c | 62 ++ xenolinux-2.4.21-pre4-sparse/arch/xeno/mm/fault.c | 390 +++++++ .../arch/xeno/mm/get_unmapped_area.c | 137 +++ .../arch/xeno/mm/hypervisor.c | 168 +++ xenolinux-2.4.21-pre4-sparse/arch/xeno/mm/init.c | 396 +++++++ .../arch/xeno/mm/mmu_context.c | 26 + .../arch/xeno/mm/pageattr.c | 175 +++ xenolinux-2.4.21-pre4-sparse/arch/xeno/vmlinux.lds | 82 ++ 55 files changed, 12842 insertions(+) create mode 100644 xenolinux-2.4.21-pre4-sparse/arch/xeno/Makefile create mode 100644 xenolinux-2.4.21-pre4-sparse/arch/xeno/boot/Makefile create mode 100644 xenolinux-2.4.21-pre4-sparse/arch/xeno/config.in create mode 100644 xenolinux-2.4.21-pre4-sparse/arch/xeno/defconfig create mode 100644 xenolinux-2.4.21-pre4-sparse/arch/xeno/drivers/block/Makefile create mode 100644 xenolinux-2.4.21-pre4-sparse/arch/xeno/drivers/block/xl_block.c create mode 100644 xenolinux-2.4.21-pre4-sparse/arch/xeno/drivers/block/xl_block_test.c create mode 100644 xenolinux-2.4.21-pre4-sparse/arch/xeno/drivers/console/Makefile create mode 100644 xenolinux-2.4.21-pre4-sparse/arch/xeno/drivers/console/console.c create mode 100644 xenolinux-2.4.21-pre4-sparse/arch/xeno/drivers/dom0/Makefile create mode 100644 xenolinux-2.4.21-pre4-sparse/arch/xeno/drivers/dom0/dom0_block.c create mode 100644 xenolinux-2.4.21-pre4-sparse/arch/xeno/drivers/dom0/dom0_core.c create mode 100644 xenolinux-2.4.21-pre4-sparse/arch/xeno/drivers/dom0/dom0_memory.c create mode 100644 xenolinux-2.4.21-pre4-sparse/arch/xeno/drivers/dom0/dom0_ops.h create mode 100644 xenolinux-2.4.21-pre4-sparse/arch/xeno/drivers/dom0/vfr.c create mode 100644 xenolinux-2.4.21-pre4-sparse/arch/xeno/drivers/network/Makefile create mode 100644 xenolinux-2.4.21-pre4-sparse/arch/xeno/drivers/network/network.c create mode 100644 xenolinux-2.4.21-pre4-sparse/arch/xeno/kernel/Makefile create mode 100644 xenolinux-2.4.21-pre4-sparse/arch/xeno/kernel/entry.S create mode 100644 xenolinux-2.4.21-pre4-sparse/arch/xeno/kernel/head.S create mode 100644 xenolinux-2.4.21-pre4-sparse/arch/xeno/kernel/hypervisor.c create mode 100644 xenolinux-2.4.21-pre4-sparse/arch/xeno/kernel/i386_ksyms.c create mode 100644 xenolinux-2.4.21-pre4-sparse/arch/xeno/kernel/i387.c create mode 100644 xenolinux-2.4.21-pre4-sparse/arch/xeno/kernel/init_task.c create mode 100644 xenolinux-2.4.21-pre4-sparse/arch/xeno/kernel/ioport.c create mode 100644 xenolinux-2.4.21-pre4-sparse/arch/xeno/kernel/irq.c create mode 100644 xenolinux-2.4.21-pre4-sparse/arch/xeno/kernel/ldt.c create mode 100644 xenolinux-2.4.21-pre4-sparse/arch/xeno/kernel/process.c create mode 100644 xenolinux-2.4.21-pre4-sparse/arch/xeno/kernel/ptrace.c create mode 100644 xenolinux-2.4.21-pre4-sparse/arch/xeno/kernel/semaphore.c create mode 100644 xenolinux-2.4.21-pre4-sparse/arch/xeno/kernel/setup.c create mode 100644 xenolinux-2.4.21-pre4-sparse/arch/xeno/kernel/signal.c create mode 100644 xenolinux-2.4.21-pre4-sparse/arch/xeno/kernel/sys_i386.c create mode 100644 xenolinux-2.4.21-pre4-sparse/arch/xeno/kernel/time.c create mode 100644 xenolinux-2.4.21-pre4-sparse/arch/xeno/kernel/traps.c create mode 100644 xenolinux-2.4.21-pre4-sparse/arch/xeno/lib/Makefile create mode 100644 xenolinux-2.4.21-pre4-sparse/arch/xeno/lib/checksum.S create mode 100644 xenolinux-2.4.21-pre4-sparse/arch/xeno/lib/dec_and_lock.c create mode 100644 xenolinux-2.4.21-pre4-sparse/arch/xeno/lib/delay.c create mode 100644 xenolinux-2.4.21-pre4-sparse/arch/xeno/lib/getuser.S create mode 100644 xenolinux-2.4.21-pre4-sparse/arch/xeno/lib/iodebug.c create mode 100644 xenolinux-2.4.21-pre4-sparse/arch/xeno/lib/memcpy.c create mode 100644 xenolinux-2.4.21-pre4-sparse/arch/xeno/lib/mmx.c create mode 100644 xenolinux-2.4.21-pre4-sparse/arch/xeno/lib/old-checksum.c create mode 100644 xenolinux-2.4.21-pre4-sparse/arch/xeno/lib/strstr.c create mode 100644 xenolinux-2.4.21-pre4-sparse/arch/xeno/lib/usercopy.c create mode 100644 xenolinux-2.4.21-pre4-sparse/arch/xeno/mm/Makefile create mode 100644 xenolinux-2.4.21-pre4-sparse/arch/xeno/mm/extable.c create mode 100644 xenolinux-2.4.21-pre4-sparse/arch/xeno/mm/fault.c create mode 100644 xenolinux-2.4.21-pre4-sparse/arch/xeno/mm/get_unmapped_area.c create mode 100644 xenolinux-2.4.21-pre4-sparse/arch/xeno/mm/hypervisor.c create mode 100644 xenolinux-2.4.21-pre4-sparse/arch/xeno/mm/init.c create mode 100644 xenolinux-2.4.21-pre4-sparse/arch/xeno/mm/mmu_context.c create mode 100644 xenolinux-2.4.21-pre4-sparse/arch/xeno/mm/pageattr.c create mode 100644 xenolinux-2.4.21-pre4-sparse/arch/xeno/vmlinux.lds (limited to 'xenolinux-2.4.21-pre4-sparse/arch/xeno') diff --git a/xenolinux-2.4.21-pre4-sparse/arch/xeno/Makefile b/xenolinux-2.4.21-pre4-sparse/arch/xeno/Makefile new file mode 100644 index 0000000000..544af84605 --- /dev/null +++ b/xenolinux-2.4.21-pre4-sparse/arch/xeno/Makefile @@ -0,0 +1,93 @@ +# +# xeno/Makefile +# +# This file is included by the global makefile so that you can add your own +# architecture-specific flags and dependencies. Remember to do have actions +# for "archclean" and "archdep" for cleaning up and making dependencies for +# this architecture +# +# This file is subject to the terms and conditions of the GNU General Public +# License. See the file "COPYING" in the main directory of this archive +# for more details. +# +# Copyright (C) 1994 by Linus Torvalds +# +# 19990713 Artur Skawina +# Added '-march' and '-mpreferred-stack-boundary' support +# + +LD=$(CROSS_COMPILE)ld -m elf_i386 +OBJCOPY=$(CROSS_COMPILE)objcopy -O binary -R .note -R .comment -S +LDFLAGS=-e stext +LINKFLAGS =-T $(TOPDIR)/arch/xeno/vmlinux.lds $(LDFLAGS) + +CFLAGS += -pipe + +# prevent gcc from keeping the stack 16 byte aligned +CFLAGS += $(shell if $(CC) -mpreferred-stack-boundary=2 -S -o /dev/null -xc /dev/null >/dev/null 2>&1; then echo "-mpreferred-stack-boundary=2"; fi) + +ifdef CONFIG_M686 +CFLAGS += -march=i686 +endif + +ifdef CONFIG_MPENTIUMIII +CFLAGS += -march=i686 +endif + +ifdef CONFIG_MPENTIUM4 +CFLAGS += -march=i686 +endif + +ifdef CONFIG_MK7 +CFLAGS += $(shell if $(CC) -march=athlon -S -o /dev/null -xc /dev/null >/dev/null 2>&1; then echo "-march=athlon"; else echo "-march=i686 -malign-functions=4"; fi) +endif + +HEAD := arch/xeno/kernel/head.o arch/xeno/kernel/init_task.o + +SUBDIRS += arch/xeno/kernel arch/xeno/mm arch/xeno/lib +SUBDIRS += arch/xeno/drivers/console arch/xeno/drivers/network +SUBDIRS += arch/xeno/drivers/dom0 arch/xeno/drivers/block + +CORE_FILES += arch/xeno/kernel/kernel.o arch/xeno/mm/mm.o +CORE_FILES += arch/xeno/drivers/console/con.o +CORE_FILES += arch/xeno/drivers/block/blk.o +CORE_FILES += arch/xeno/drivers/network/net.o +CORE_FILES += arch/xeno/drivers/dom0/dom0.o +LIBS := $(TOPDIR)/arch/xeno/lib/lib.a $(LIBS) $(TOPDIR)/arch/xeno/lib/lib.a + +arch/xeno/kernel: dummy + $(MAKE) linuxsubdirs SUBDIRS=arch/xeno/kernel + +arch/xeno/mm: dummy + $(MAKE) linuxsubdirs SUBDIRS=arch/xeno/mm + +arch/xeno/drivers/console: dummy + $(MAKE) linuxsubdirs SUBDIRS=arch/xeno/drivers/console + +arch/xeno/drivers/network: dummy + $(MAKE) linuxsubdirs SUBDIRS=arch/xeno/drivers/network + +arch/xeno/drivers/block: dummy + $(MAKE) linuxsubdirs SUBDIRS=arch/xeno/drivers/block + +arch/xeno/drivers/dom0: dummy + $(MAKE) linuxsubdirs SUBDIRS=arch/xeno/drivers/dom0 + +MAKEBOOT = $(MAKE) -C arch/$(ARCH)/boot + +vmlinux: arch/xeno/vmlinux.lds + +FORCE: ; + +.PHONY: bzImage compressed clean archclean archmrproper archdep + +bzImage: vmlinux + @$(MAKEBOOT) image.gz + +archclean: + @$(MAKEBOOT) clean + +archmrproper: + +archdep: + @$(MAKEBOOT) dep diff --git a/xenolinux-2.4.21-pre4-sparse/arch/xeno/boot/Makefile b/xenolinux-2.4.21-pre4-sparse/arch/xeno/boot/Makefile new file mode 100644 index 0000000000..252daf50bf --- /dev/null +++ b/xenolinux-2.4.21-pre4-sparse/arch/xeno/boot/Makefile @@ -0,0 +1,22 @@ +# +# arch/xeno/boot/Makefile +# + +image.gz: image + gzip -f -9 < $< > $@ + +image: $(TOPDIR)/vmlinux + # Guest OS header -- first 8 bytes are identifier 'XenoGues'. + echo -e -n 'XenoGues' >$@ + # Guest OS header -- next 4 bytes are load address (0xC0000000). + echo -e -n '\000\000\000\300' >>$@ + $(OBJCOPY) $< image.body + # Guest OS header is immediately followed by raw OS image. + # Start address must be at byte 0. + cat image.body >>$@ + rm -f image.body + +dep: + +clean: + rm -f image image.gz diff --git a/xenolinux-2.4.21-pre4-sparse/arch/xeno/config.in b/xenolinux-2.4.21-pre4-sparse/arch/xeno/config.in new file mode 100644 index 0000000000..a12a1ec5c0 --- /dev/null +++ b/xenolinux-2.4.21-pre4-sparse/arch/xeno/config.in @@ -0,0 +1,127 @@ +# +# For a description of the syntax of this configuration file, +# see Documentation/kbuild/config-language.txt. +# +mainmenu_name "Linux Kernel Configuration" + +define_bool CONFIG_XENO y + +define_bool CONFIG_X86 y +define_bool CONFIG_ISA y +define_bool CONFIG_SBUS n + +define_bool CONFIG_UID16 y + +mainmenu_option next_comment +comment 'Code maturity level options' +bool 'Prompt for development and/or incomplete code/drivers' CONFIG_EXPERIMENTAL +endmenu + +mainmenu_option next_comment +comment 'Loadable module support' +bool 'Enable loadable module support' CONFIG_MODULES +if [ "$CONFIG_MODULES" = "y" ]; then + bool ' Set version information on all module symbols' CONFIG_MODVERSIONS + bool ' Kernel module loader' CONFIG_KMOD +fi +endmenu + +mainmenu_option next_comment +comment 'Processor type and features' +choice 'Processor family' \ + "Pentium-Pro/Celeron/Pentium-II CONFIG_M686 \ + Pentium-III/Celeron(Coppermine) CONFIG_MPENTIUMIII \ + Pentium-4 CONFIG_MPENTIUM4 \ + Athlon/Duron/K7 CONFIG_MK7" Pentium-Pro + + define_bool CONFIG_X86_WP_WORKS_OK y + define_bool CONFIG_X86_INVLPG y + define_bool CONFIG_X86_CMPXCHG y + define_bool CONFIG_X86_XADD y + define_bool CONFIG_X86_BSWAP y + define_bool CONFIG_X86_POPAD_OK y + define_bool CONFIG_RWSEM_GENERIC_SPINLOCK n + define_bool CONFIG_RWSEM_XCHGADD_ALGORITHM y + + define_bool CONFIG_X86_GOOD_APIC y + define_bool CONFIG_X86_PGE y + define_bool CONFIG_X86_USE_PPRO_CHECKSUM y + define_bool CONFIG_X86_TSC y + +if [ "$CONFIG_M686" = "y" ]; then + define_int CONFIG_X86_L1_CACHE_SHIFT 5 +fi +if [ "$CONFIG_MPENTIUMIII" = "y" ]; then + define_int CONFIG_X86_L1_CACHE_SHIFT 5 +fi +if [ "$CONFIG_MPENTIUM4" = "y" ]; then + define_int CONFIG_X86_L1_CACHE_SHIFT 7 +fi +if [ "$CONFIG_MK7" = "y" ]; then + define_int CONFIG_X86_L1_CACHE_SHIFT 6 + define_bool CONFIG_X86_USE_3DNOW y +fi + +choice 'High Memory Support' \ + "off CONFIG_NOHIGHMEM \ + 4GB CONFIG_HIGHMEM4G \ + 64GB CONFIG_HIGHMEM64G" off +if [ "$CONFIG_HIGHMEM4G" = "y" ]; then + define_bool CONFIG_HIGHMEM y +fi +if [ "$CONFIG_HIGHMEM64G" = "y" ]; then + define_bool CONFIG_HIGHMEM y + define_bool CONFIG_X86_PAE y +fi + +#bool 'Symmetric multi-processing support' CONFIG_SMP +#if [ "$CONFIG_SMP" = "y" -a "$CONFIG_X86_CMPXCHG" = "y" ]; then +# define_bool CONFIG_HAVE_DEC_LOCK y +#fi +endmenu + +mainmenu_option next_comment +comment 'General setup' + +bool 'Networking support' CONFIG_NET + +bool 'System V IPC' CONFIG_SYSVIPC +bool 'BSD Process Accounting' CONFIG_BSD_PROCESS_ACCT +bool 'Sysctl support' CONFIG_SYSCTL +if [ "$CONFIG_PROC_FS" = "y" ]; then + choice 'Kernel core (/proc/kcore) format' \ + "ELF CONFIG_KCORE_ELF \ + A.OUT CONFIG_KCORE_AOUT" ELF +fi +tristate 'Kernel support for a.out binaries' CONFIG_BINFMT_AOUT +tristate 'Kernel support for ELF binaries' CONFIG_BINFMT_ELF +tristate 'Kernel support for MISC binaries' CONFIG_BINFMT_MISC + +endmenu + +if [ "$CONFIG_NET" = "y" ]; then + source net/Config.in +fi + +source drivers/block/Config.in +define_bool CONFIG_BLK_DEV_IDE_MODES n +define_bool CONFIG_BLK_DEV_HD n + +source fs/Config.in + +mainmenu_option next_comment +comment 'Kernel hacking' + +bool 'Kernel debugging' CONFIG_DEBUG_KERNEL +if [ "$CONFIG_DEBUG_KERNEL" != "n" ]; then + bool ' Debug high memory support' CONFIG_DEBUG_HIGHMEM + bool ' Debug memory allocations' CONFIG_DEBUG_SLAB + bool ' Memory mapped I/O debugging' CONFIG_DEBUG_IOVIRT + bool ' Magic SysRq key' CONFIG_MAGIC_SYSRQ + bool ' Spinlock debugging' CONFIG_DEBUG_SPINLOCK + bool ' Verbose BUG() reporting (adds 70K)' CONFIG_DEBUG_BUGVERBOSE + bool ' Load all symbols for debugging' CONFIG_KALLSYMS + bool ' Compile the kernel with frame pointers' CONFIG_FRAME_POINTER +fi + +endmenu diff --git a/xenolinux-2.4.21-pre4-sparse/arch/xeno/defconfig b/xenolinux-2.4.21-pre4-sparse/arch/xeno/defconfig new file mode 100644 index 0000000000..0059207429 --- /dev/null +++ b/xenolinux-2.4.21-pre4-sparse/arch/xeno/defconfig @@ -0,0 +1,405 @@ +# +# Automatically generated make config: don't edit +# +CONFIG_X86=y +CONFIG_ISA=y +# CONFIG_SBUS is not set +CONFIG_UID16=y + +# +# Code maturity level options +# +# CONFIG_EXPERIMENTAL is not set + +# +# Loadable module support +# +CONFIG_MODULES=y +CONFIG_MODVERSIONS=y +CONFIG_KMOD=y + +# +# Processor type and features +# +# CONFIG_M386 is not set +# CONFIG_M486 is not set +# CONFIG_M586 is not set +# CONFIG_M586TSC is not set +# CONFIG_M586MMX is not set +CONFIG_M686=y +# CONFIG_MPENTIUMIII is not set +# CONFIG_MPENTIUM4 is not set +# CONFIG_MK6 is not set +# CONFIG_MK7 is not set +# CONFIG_MCRUSOE is not set +# CONFIG_MWINCHIPC6 is not set +# CONFIG_MWINCHIP2 is not set +# CONFIG_MWINCHIP3D is not set +# CONFIG_MCYRIXIII is not set +CONFIG_X86_WP_WORKS_OK=y +CONFIG_X86_INVLPG=y +CONFIG_X86_CMPXCHG=y +CONFIG_X86_XADD=y +CONFIG_X86_BSWAP=y +CONFIG_X86_POPAD_OK=y +# CONFIG_RWSEM_GENERIC_SPINLOCK is not set +CONFIG_RWSEM_XCHGADD_ALGORITHM=y +CONFIG_X86_L1_CACHE_SHIFT=5 +CONFIG_X86_TSC=y +CONFIG_X86_GOOD_APIC=y +CONFIG_X86_PGE=y +CONFIG_X86_USE_PPRO_CHECKSUM=y +# CONFIG_TOSHIBA is not set +# CONFIG_I8K is not set +# CONFIG_MICROCODE is not set +# CONFIG_X86_MSR is not set +# CONFIG_X86_CPUID is not set +CONFIG_NOHIGHMEM=y +# CONFIG_HIGHMEM4G is not set +# CONFIG_HIGHMEM64G is not set +# CONFIG_MATH_EMULATION is not set +# CONFIG_MTRR is not set +# CONFIG_SMP is not set +# CONFIG_X86_UP_APIC is not set + +# +# General setup +# +CONFIG_NET=y +# CONFIG_PCI is not set +# CONFIG_EISA is not set +# CONFIG_MCA is not set +# CONFIG_HOTPLUG is not set +# CONFIG_PCMCIA is not set +# CONFIG_HOTPLUG_PCI is not set +CONFIG_SYSVIPC=y +# CONFIG_BSD_PROCESS_ACCT is not set +CONFIG_SYSCTL=y +CONFIG_KCORE_ELF=y +# CONFIG_KCORE_AOUT is not set +CONFIG_BINFMT_AOUT=y +CONFIG_BINFMT_ELF=y +# CONFIG_BINFMT_MISC is not set +# CONFIG_PM is not set +# CONFIG_APM_IGNORE_USER_SUSPEND is not set +# CONFIG_APM_DO_ENABLE is not set +# CONFIG_APM_CPU_IDLE is not set +# CONFIG_APM_DISPLAY_BLANK is not set +# CONFIG_APM_RTC_IS_GMT is not set +# CONFIG_APM_ALLOW_INTS is not set +# CONFIG_APM_REAL_MODE_POWER_OFF is not set + +# +# Memory Technology Devices (MTD) +# +# CONFIG_MTD is not set + +# +# Parallel port support +# +# CONFIG_PARPORT is not set + +# +# Plug and Play configuration +# +# CONFIG_PNP is not set + +# +# Block devices +# +# CONFIG_BLK_DEV_FD is not set +# CONFIG_BLK_DEV_XD is not set +CONFIG_BLK_DEV_LOOP=y +CONFIG_BLK_DEV_NBD=y +CONFIG_BLK_DEV_RAM=y +CONFIG_BLK_DEV_RAM_SIZE=4096 +# CONFIG_BLK_DEV_INITRD is not set +CONFIG_XENOLINUX_BLOCK=y + +# +# Multi-device support (RAID and LVM) +# +# CONFIG_MD is not set + +# +# Networking options +# +CONFIG_PACKET=y +CONFIG_PACKET_MMAP=y +CONFIG_NETLINK=y +CONFIG_RTNETLINK=y +# CONFIG_NETLINK_DEV is not set +# CONFIG_NETFILTER is not set +CONFIG_FILTER=y +CONFIG_UNIX=y +CONFIG_INET=y +# CONFIG_IP_MULTICAST is not set +# CONFIG_IP_ADVANCED_ROUTER is not set +CONFIG_IP_PNP=y +# CONFIG_IP_PNP_DHCP is not set +# CONFIG_IP_PNP_BOOTP is not set +# CONFIG_IP_PNP_RARP is not set +# CONFIG_NET_IPIP is not set +# CONFIG_NET_IPGRE is not set +# CONFIG_INET_ECN is not set +# CONFIG_SYN_COOKIES is not set + +# +# +# +# CONFIG_IPX is not set +# CONFIG_ATALK is not set +# CONFIG_DECNET is not set +# CONFIG_BRIDGE is not set + +# +# QoS and/or fair queueing +# +# CONFIG_NET_SCHED is not set + +# +# Telephony Support +# +# CONFIG_PHONE is not set + +# +# ATA/IDE/MFM/RLL support +# +# CONFIG_IDE is not set +# CONFIG_BLK_DEV_IDE_MODES is not set +# CONFIG_BLK_DEV_HD is not set + +# +# SCSI support +# +# CONFIG_SCSI is not set + +# +# Fusion MPT device support +# +# CONFIG_FUSION_BOOT is not set +# CONFIG_FUSION_ISENSE is not set +# CONFIG_FUSION_CTL is not set +# CONFIG_FUSION_LAN is not set + +# +# I2O device support +# +# CONFIG_I2O is not set + +# +# Network device support +# +# CONFIG_NETDEVICES is not set + +# +# Amateur Radio support +# +# CONFIG_HAMRADIO is not set + +# +# IrDA (infrared) support +# +# CONFIG_IRDA is not set + +# +# ISDN subsystem +# +# CONFIG_ISDN is not set + +# +# Old CD-ROM drivers (not SCSI, not IDE) +# +# CONFIG_CD_NO_IDESCSI is not set + +# +# Input core support +# +# CONFIG_INPUT is not set +CONFIG_INPUT_MOUSEDEV_SCREEN_X=1024 +CONFIG_INPUT_MOUSEDEV_SCREEN_Y=768 + +# +# Character devices +# +# CONFIG_VT is not set +# CONFIG_SERIAL is not set +# CONFIG_SERIAL_NONSTANDARD is not set +# CONFIG_UNIX98_PTYS is not set + +# +# I2C support +# +# CONFIG_I2C is not set + +# +# Mice +# +# CONFIG_BUSMOUSE is not set +# CONFIG_MOUSE is not set + +# +# Joysticks +# +# CONFIG_INPUT_GAMEPORT is not set + +# +# Input core support is needed for gameports +# + +# +# Input core support is needed for joysticks +# +# CONFIG_QIC02_TAPE is not set + +# +# Watchdog Cards +# +# CONFIG_WATCHDOG is not set +# CONFIG_NVRAM is not set +# CONFIG_RTC is not set +# CONFIG_DTLK is not set +# CONFIG_R3964 is not set +# CONFIG_APPLICOM is not set + +# +# Ftape, the floppy tape device driver +# +# CONFIG_FTAPE is not set +# CONFIG_AGP is not set +# CONFIG_DRM is not set +# CONFIG_MWAVE is not set + +# +# Multimedia devices +# +# CONFIG_VIDEO_DEV is not set + +# +# File systems +# +# CONFIG_QUOTA is not set +CONFIG_AUTOFS_FS=y +CONFIG_AUTOFS4_FS=y +CONFIG_EXT3_FS=y +CONFIG_JBD=y +# CONFIG_FAT_FS is not set +# CONFIG_CRAMFS is not set +# CONFIG_TMPFS is not set +# CONFIG_RAMFS is not set +# CONFIG_ISO9660_FS is not set +# CONFIG_MINIX_FS is not set +# CONFIG_VXFS_FS is not set +# CONFIG_NTFS_FS is not set +# CONFIG_HPFS_FS is not set +CONFIG_PROC_FS=y +# CONFIG_ROMFS_FS is not set +CONFIG_EXT2_FS=y +# CONFIG_SYSV_FS is not set +# CONFIG_UDF_FS is not set +# CONFIG_UFS_FS is not set + +# +# Network File Systems +# +# CONFIG_CODA_FS is not set +CONFIG_NFS_FS=y +CONFIG_NFS_V3=y +CONFIG_ROOT_NFS=y +CONFIG_NFSD=y +CONFIG_NFSD_V3=y +CONFIG_SUNRPC=y +CONFIG_LOCKD=y +CONFIG_LOCKD_V4=y +# CONFIG_SMB_FS is not set +# CONFIG_NCP_FS is not set +# CONFIG_ZISOFS_FS is not set +# CONFIG_ZLIB_FS_INFLATE is not set + +# +# Partition Types +# +# CONFIG_PARTITION_ADVANCED is not set +CONFIG_MSDOS_PARTITION=y +# CONFIG_SMB_NLS is not set +# CONFIG_NLS is not set + +# +# Sound +# +# CONFIG_SOUND is not set + +# +# USB support +# + +# +# USB Controllers +# + +# +# USB Device Class drivers +# + +# +# USB Human Interface Devices (HID) +# + +# +# Input core support is needed for USB HID +# + +# +# USB Imaging devices +# + +# +# USB Multimedia devices +# + +# +# Video4Linux support is needed for USB Multimedia device support +# + +# +# USB Network adaptors +# + +# +# USB port drivers +# + +# +# USB Serial Converter support +# + +# +# USB Miscellaneous drivers +# + +# +# Kernel hacking +# +CONFIG_DEBUG_KERNEL=y +# CONFIG_DEBUG_HIGHMEM is not set +# CONFIG_DEBUG_SLAB is not set +# CONFIG_DEBUG_IOVIRT is not set +# CONFIG_MAGIC_SYSRQ is not set +# CONFIG_DEBUG_SPINLOCK is not set +# CONFIG_DEBUG_BUGVERBOSE is not set +CONFIG_KALLSYMS=y +# CONFIG_FRAME_POINTER is not set + +# Grim cruft +# +# CONFIG_VLAN_8021Q is not set +# CONFIG_NET_PKTGEN is not set +# CONFIG_BLK_STATS is not set +# CONFIG_REISERFS_FS is not set +# CONFIG_JBD_DEBUG is not set +# CONFIG_JFS_FS is not set +# CONFIG_QNX4FS_FS is not set + + + + diff --git a/xenolinux-2.4.21-pre4-sparse/arch/xeno/drivers/block/Makefile b/xenolinux-2.4.21-pre4-sparse/arch/xeno/drivers/block/Makefile new file mode 100644 index 0000000000..74a0c6c565 --- /dev/null +++ b/xenolinux-2.4.21-pre4-sparse/arch/xeno/drivers/block/Makefile @@ -0,0 +1,3 @@ +O_TARGET := blk.o +obj-y := xl_block.o xl_block_test.o +include $(TOPDIR)/Rules.make diff --git a/xenolinux-2.4.21-pre4-sparse/arch/xeno/drivers/block/xl_block.c b/xenolinux-2.4.21-pre4-sparse/arch/xeno/drivers/block/xl_block.c new file mode 100644 index 0000000000..0b77e5536e --- /dev/null +++ b/xenolinux-2.4.21-pre4-sparse/arch/xeno/drivers/block/xl_block.c @@ -0,0 +1,490 @@ +/****************************************************************************** + * xl_block.c + * + * Xenolinux virtual block-device driver. + * + */ + +#include +#include + +#include +#include +#include +#include +#include + +#include +#include +#include +#include + +#include +#include +#include +#include + +#define MAJOR_NR XLBLK_MAJOR /* force defns in blk.h, must precede include */ +static int xlblk_major = XLBLK_MAJOR; +#include + +/* Copied from linux/ide.h */ +typedef unsigned char byte; + +void xlblk_ide_register_disk(int, unsigned long); + +#define XLBLK_MAX 32 /* Maximum minor devices we support */ +#define XLBLK_MAJOR_NAME "xhd" +#define IDE_PARTN_BITS 6 /* from ide.h::PARTN_BITS */ +#define IDE_PARTN_MASK ((1<i_rdev); + if (minor_dev >= XLBLK_MAX) return -ENODEV; + + DPRINTK_IOCTL("command: 0x%x, argument: 0x%lx, minor: 0x%x\n", + command, (long) argument, minor_dev); + + switch (command) + { + case BLKGETSIZE: + DPRINTK_IOCTL(" BLKGETSIZE: %x %lx\n", BLKGETSIZE, + (long) xen_disk_info.disks[0].capacity); + return put_user(xen_disk_info.disks[0].capacity, + (unsigned long *) argument); + + case BLKRRPART: + DPRINTK_IOCTL(" BLKRRPART: %x\n", BLKRRPART); + break; + + case BLKSSZGET: + DPRINTK_IOCTL(" BLKSSZGET: %x 0x%x\n", BLKSSZGET, + xlblk_hardsect_size[minor_dev]); + return xlblk_hardsect_size[minor_dev]; + + case HDIO_GETGEO: + DPRINTK_IOCTL(" HDIO_GETGEO: %x\n", HDIO_GETGEO); + if (!argument) return -EINVAL; + if (put_user(0x00, (unsigned long *) &geo->start)) return -EFAULT; + if (put_user(0xff, (byte *)&geo->heads)) return -EFAULT; + if (put_user(0x3f, (byte *)&geo->sectors)) return -EFAULT; + if (put_user(0x106, (unsigned short *)&geo->cylinders)) return -EFAULT; + return 0; + + case HDIO_GETGEO_BIG: + DPRINTK_IOCTL(" HDIO_GETGEO_BIG: %x\n", HDIO_GETGEO_BIG); + if (!argument) return -EINVAL; + if (put_user(0x00, (unsigned long *) &geo->start)) return -EFAULT; + if (put_user(0xff, (byte *)&geo->heads)) return -EFAULT; + if (put_user(0x3f, (byte *)&geo->sectors)) return -EFAULT; + if (put_user(0x106, (unsigned int *) &geo->cylinders)) return -EFAULT; + + return 0; + + default: + DPRINTK_IOCTL(" eh? unknown ioctl\n"); + break; + } + + return 0; +} + +static int xenolinux_block_check(kdev_t dev) +{ + DPRINTK("xenolinux_block_check\n"); + return 0; +} + +static int xenolinux_block_revalidate(kdev_t dev) +{ + DPRINTK("xenolinux_block_revalidate\n"); + return 0; +} + +/* + * hypervisor_request + * + * request block io + * + * id: for guest use only. + * operation: XEN_BLOCK_READ, XEN_BLOCK_WRITE or XEN_BLOCK_PROBE + * buffer: buffer to read/write into. this should be a + * virtual address in the guest os. + * block_number: block to read + * block_size: size of each block + * device: ide/hda is 768 or 0x300 + */ +int hypervisor_request(void * id, + int operation, + char * buffer, + unsigned long block_number, + unsigned short block_size, + kdev_t device) +{ + int position; + void *buffer_ma; + kdev_t phys_device = (kdev_t) 0; + unsigned long sector_number = 0; + struct gendisk *gd; + + /* + * Bail if there's no room in the request communication ring. This may be + * because we have a whole bunch of outstanding responses to process. No + * matter, as the response handler will kick the request queue. + */ + if ( BLK_RING_INC(blk_ring->req_prod) == resp_cons ) + return 1; + + buffer_ma = (void *)phys_to_machine(virt_to_phys(buffer)); + + switch ( operation ) + { + case XEN_BLOCK_PROBE: + phys_device = (kdev_t) 0; + sector_number = 0; + break; + + case XEN_BLOCK_READ: + case XEN_BLOCK_WRITE: + if ( MAJOR(device) != XLBLK_MAJOR ) + panic("error: xl_block::hypervisor_request: " + "unknown device [0x%x]\n", device); + phys_device = MKDEV(IDE0_MAJOR, 0); + /* Compute real buffer location on disk */ + sector_number = block_number; + if ( (gd = (struct gendisk *)xen_disk_info.disks[0].gendisk) != NULL ) + sector_number += gd->part[MINOR(device)&IDE_PARTN_MASK].start_sect; + break; + + default: + panic("unknown op %d\n", operation); + } + + /* Fill out a communications ring structure & trap to the hypervisor */ + position = blk_ring->req_prod; + blk_ring->ring[position].req.id = id; + blk_ring->ring[position].req.operation = operation; + blk_ring->ring[position].req.buffer = buffer_ma; + blk_ring->ring[position].req.block_number = block_number; + blk_ring->ring[position].req.block_size = block_size; + blk_ring->ring[position].req.device = phys_device; + blk_ring->ring[position].req.sector_number = sector_number; + + blk_ring->req_prod = BLK_RING_INC(position); + + return 0; +} + + +/* + * do_xlblk_request + * read a block; request is in a request queue + */ +static void do_xlblk_request (request_queue_t *rq) +{ + struct request *req; + struct buffer_head *bh; + int rw, nsect, full, queued = 0; + + DPRINTK("xlblk.c::do_xlblk_request for '%s'\n", DEVICE_NAME); + + while ( !rq->plugged && !QUEUE_EMPTY ) + { + if ( (req = CURRENT) == NULL ) goto out; + + DPRINTK("do_xlblk_request %p: cmd %i, sec %lx, (%li/%li) bh:%p\n", + req, req->cmd, req->sector, + req->current_nr_sectors, req->nr_sectors, req->bh); + + rw = req->cmd; + if ( rw == READA ) rw = READ; + if ((rw != READ) && (rw != WRITE)) + panic("XenoLinux Virtual Block Device: bad cmd: %d\n", rw); + + req->errors = 0; + + bh = req->bh; + while ( bh != NULL ) + { + full = hypervisor_request( + bh, (rw == READ) ? XEN_BLOCK_READ : XEN_BLOCK_WRITE, + bh->b_data, bh->b_rsector, bh->b_size, bh->b_dev); + + if ( full ) goto out; + + queued++; + + /* Dequeue the buffer head from the request. */ + nsect = bh->b_size >> 9; + req->bh = bh->b_reqnext; + bh->b_reqnext = NULL; + bh = req->bh; + + if ( bh != NULL ) + { + /* There's another buffer head to do. Update the request. */ + req->hard_sector += nsect; + req->hard_nr_sectors -= nsect; + req->sector = req->hard_sector; + req->nr_sectors = req->hard_nr_sectors; + req->current_nr_sectors = bh->b_size >> 9; + req->buffer = bh->b_data; + } + else + { + /* That was the last buffer head. Finalise the request. */ + if ( end_that_request_first(req, 1, "XenBlk") ) BUG(); + blkdev_dequeue_request(req); + end_that_request_last(req); + } + } + } + + out: + if ( queued != 0 ) HYPERVISOR_block_io_op(); +} + + +static struct block_device_operations xenolinux_block_fops = +{ + open: xenolinux_block_open, + release: xenolinux_block_release, + ioctl: xenolinux_block_ioctl, + check_media_change: xenolinux_block_check, + revalidate: xenolinux_block_revalidate, +}; + +static void xlblk_response_int(int irq, void *dev_id, struct pt_regs *ptregs) +{ + int i; + unsigned long flags; + struct buffer_head *bh; + + spin_lock_irqsave(&io_request_lock, flags); + + for ( i = resp_cons; + i != blk_ring->resp_prod; + i = BLK_RING_INC(i) ) + { + blk_ring_resp_entry_t *bret = &blk_ring->ring[i].resp; + if ( (bh = bret->id) != NULL ) bh->b_end_io(bh, 1); + } + + resp_cons = i; + + /* KAF: We can push work down at this point. We have the lock. */ + do_xlblk_request(BLK_DEFAULT_QUEUE(MAJOR_NR)); + + spin_unlock_irqrestore(&io_request_lock, flags); +} + + +int __init xlblk_init(void) +{ + int i, error, result; + + /* This mapping was created early at boot time. */ + blk_ring = (blk_ring_t *)fix_to_virt(FIX_BLKRING_BASE); + blk_ring->req_prod = blk_ring->resp_prod = resp_cons = 0; + + error = request_irq(XLBLK_RESPONSE_IRQ, xlblk_response_int, 0, + "xlblk-response", NULL); + if (error) { + printk(KERN_ALERT "Could not allocate receive interrupt\n"); + goto fail; + } + + memset (&xen_disk_info, 0, sizeof(xen_disk_info)); + xen_disk_info.count = 0; + + if ( hypervisor_request(NULL, XEN_BLOCK_PROBE, (char *) &xen_disk_info, + 0, 0, (kdev_t) 0) ) + BUG(); + HYPERVISOR_block_io_op(); + while ( blk_ring->resp_prod != 1 ) barrier(); + for ( i = 0; i < xen_disk_info.count; i++ ) + { + printk (KERN_ALERT " %2d: type: %d, capacity: %ld\n", + i, xen_disk_info.disks[i].type, + xen_disk_info.disks[i].capacity); + } + + SET_MODULE_OWNER(&xenolinux_block_fops); + result = register_blkdev(xlblk_major, "block", &xenolinux_block_fops); + if (result < 0) { + printk (KERN_ALERT "xenolinux block: can't get major %d\n", + xlblk_major); + return result; + } + + /* initialize global arrays in drivers/block/ll_rw_block.c */ + for (i = 0; i < XLBLK_MAX; i++) { + xlblk_blk_size[i] = xen_disk_info.disks[0].capacity; + xlblk_blksize_size[i] = 512; + xlblk_hardsect_size[i] = 512; + xlblk_max_sectors[i] = 128; + } + xlblk_read_ahead = 8; + + blk_size[xlblk_major] = xlblk_blk_size; + blksize_size[xlblk_major] = xlblk_blksize_size; + hardsect_size[xlblk_major] = xlblk_hardsect_size; + read_ahead[xlblk_major] = xlblk_read_ahead; + max_sectors[xlblk_major] = xlblk_max_sectors; + + blk_init_queue(BLK_DEFAULT_QUEUE(xlblk_major), do_xlblk_request); + + /* + * Turn off barking 'headactive' mode. We dequeue buffer heads as + * soon as we pass them down to Xen. + */ + blk_queue_headactive(BLK_DEFAULT_QUEUE(xlblk_major), 0); + + xlblk_ide_register_disk(0, xen_disk_info.disks[0].capacity); + + printk(KERN_ALERT + "XenoLinux Virtual Block Device Driver installed [device: %d]\n", + xlblk_major); + return 0; + + fail: + return error; +} + +void xlblk_ide_register_disk(int idx, unsigned long capacity) +{ + int units; + int minors; + struct gendisk *gd; + + /* plagarized from ide-probe.c::init_gendisk */ + + units = 2; /* from ide.h::MAX_DRIVES */ + + minors = units * (1<sizes = kmalloc (minors * sizeof(int), GFP_KERNEL); + gd->part = kmalloc (minors * sizeof(struct hd_struct), GFP_KERNEL); + memset(gd->part, 0, minors * sizeof(struct hd_struct)); + + gd->major = xlblk_major; + gd->major_name = XLBLK_MAJOR_NAME; + gd->minor_shift = IDE_PARTN_BITS; + gd->max_p = 1<nr_real = units; + gd->real_devices = NULL; + gd->next = NULL; + gd->fops = &xenolinux_block_fops; + gd->de_arr = kmalloc (sizeof *gd->de_arr * units, GFP_KERNEL); + gd->flags = kmalloc (sizeof *gd->flags * units, GFP_KERNEL); + + if (gd->de_arr) + memset (gd->de_arr, 0, sizeof *gd->de_arr * units); + + if (gd->flags) + memset (gd->flags, 0, sizeof *gd->flags * units); + + add_gendisk(gd); + + xen_disk_info.disks[idx].gendisk = gd; + + /* default disk size is just a big number. in the future, we + need a message to probe the devices to determine the actual size */ + register_disk(gd, MKDEV(xlblk_major, 0), 1< +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include + +/******************************************************************/ + +static struct proc_dir_entry *bdt; +static blk_ring_req_entry_t meta; +static char * data; + +static int proc_read_bdt(char *page, char **start, off_t off, + int count, int *eof, void *data) +{ + switch (meta.operation) + { + case XEN_BLOCK_READ : + case XEN_BLOCK_WRITE : + { + return proc_dump_block(page, start, off, count, eof, data); + } + case XEN_BLOCK_DEBUG : + { + return proc_dump_debug(page, start, off, count, eof, data); + } + default : + { + printk(KERN_ALERT + "block device test error: unknown operation [%c]\n", + meta.operation); + return -EINVAL; + } + } +} + +int proc_dump_debug(char *page, char **start, off_t off, + int count, int *eof, void *data) +{ + char header[100]; + char dump[1024]; + + sprintf (header, "Block Device Test: Debug Dump\n\n"); + + sprintf (dump, "%s\n", meta.buffer); + + if (data) + { + kfree(data); + } + + strncpy (page, dump, count); + return strlen(page); +} + +int proc_dump_block(char *page, char **start, off_t off, + int count, int *eof, void *data) +{ + char header[100]; + char dump[1024]; + char temp[100]; + int loop; + + sprintf (header, "Block Device Test\n\n%s blk num: %ld 0x%lx; size: %d 0x%x; device: 0x%x\n", + meta.operation == XEN_BLOCK_WRITE ? "write" : "read", + meta.block_number, meta.block_number, + meta.block_size, meta.block_size, + meta.device); + + sprintf (dump, "%s", header); + + if (meta.buffer) + { + for (loop = 0; loop < 100; loop++) + { + int i = meta.buffer[loop]; + + if (loop % 8 == 0) + { + sprintf (temp, "[%2d] ", loop); + strcat(dump, temp); + } + else if (loop % 2 == 0) + { + strcat(dump, " "); + } + + sprintf (temp, " 0x%02x", i & 255); + strcat(dump, temp); + if ((loop + 1) % 8 == 0) + { + strcat(dump, "\n"); + } + } + strcat(dump, "\n\n"); + } + + if (data) + { + kfree(data); + } + + strncpy (page, dump, count); + return strlen(page); +} + +int proc_write_bdt(struct file *file, const char *buffer, + unsigned long count, void *data) +{ + char *local = kmalloc((count + 1) * sizeof(char), GFP_KERNEL); + char opcode; + int block_number = 0; + int block_size = 0; + int device = 0; + + if (copy_from_user(local, buffer, count)) + { + return -EFAULT; + } + local[count] = '\0'; + + sscanf(local, "%c %i %i %i", + &opcode, &block_number, &block_size, &device); + + if (opcode == 'r' || opcode == 'R') + { + meta.operation = XEN_BLOCK_READ; + } + else if (opcode == 'w' || opcode == 'W') + { + meta.operation = XEN_BLOCK_WRITE; + } + else if (opcode == 'd' || opcode == 'D') + { + meta.operation = XEN_BLOCK_DEBUG; + block_size = 10000; + } + else + { + printk(KERN_ALERT + "block device test error: unknown opcode [%c]\n", opcode); + return -EINVAL; + } + + if (data) + { + kfree(data); + } + data = kmalloc(block_size * sizeof(char), GFP_KERNEL); + if (data == NULL) + { + kfree(local); + return -ENOMEM; + } + + meta.block_number = block_number; + meta.block_size = block_size; + meta.device = device; + meta.buffer = data; + + /* submit request */ + hypervisor_request(0, meta.operation, meta.buffer, + meta.block_number, meta.block_size, + meta.device); + HYPERVISOR_block_io_op(); + mdelay(1000); /* should wait for a proper acknowledgement/response. */ + + kfree(local); + return count; +} + + +static int __init init_module(void) +{ + int return_value = 0; + + /* create proc entry */ + bdt = create_proc_entry("bdt", 0644, NULL); + if (bdt == NULL) + { + return_value = -ENOMEM; + goto error; + } + bdt->data = NULL; + bdt->read_proc = proc_read_bdt; + bdt->write_proc = proc_write_bdt; + bdt->owner = THIS_MODULE; + + memset(&meta, 0, sizeof(meta)); + + /* success */ + printk(KERN_ALERT "XenoLinux Block Device Test installed\n"); + return 0; + + error: + return return_value; +} + +static void __exit cleanup_module(void) +{ + if (data) + { + kfree(data); + } + printk(KERN_ALERT "XenoLinux Block Device Test uninstalled\n"); +} + +module_init(init_module); +module_exit(cleanup_module); diff --git a/xenolinux-2.4.21-pre4-sparse/arch/xeno/drivers/console/Makefile b/xenolinux-2.4.21-pre4-sparse/arch/xeno/drivers/console/Makefile new file mode 100644 index 0000000000..5a0e7b36b1 --- /dev/null +++ b/xenolinux-2.4.21-pre4-sparse/arch/xeno/drivers/console/Makefile @@ -0,0 +1,3 @@ +O_TARGET := con.o +obj-y := console.o +include $(TOPDIR)/Rules.make diff --git a/xenolinux-2.4.21-pre4-sparse/arch/xeno/drivers/console/console.c b/xenolinux-2.4.21-pre4-sparse/arch/xeno/drivers/console/console.c new file mode 100644 index 0000000000..11548f877e --- /dev/null +++ b/xenolinux-2.4.21-pre4-sparse/arch/xeno/drivers/console/console.c @@ -0,0 +1,204 @@ +/****************************************************************************** + * console.c + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include + +/******************** Kernel console driver ********************************/ + +static void kconsole_write(struct console *co, const char *s, unsigned count) +{ +#define STRLEN 256 + static char str[STRLEN]; + static int pos = 0; + int len; + + /* We buffer output until we see a newline, or until the buffer is full. */ + while ( count != 0 ) + { + len = ((STRLEN - pos) > count) ? count : STRLEN - pos; + memcpy(str + pos, s, len); + pos += len; + s += len; + count -= len; + if ( (pos == STRLEN) || (str[pos-1] == '\n') ) + { + (void)HYPERVISOR_console_write(str, pos); + pos = 0; + } + } +} + +static kdev_t kconsole_device(struct console *c) +{ + /* + * This is the magic that binds our "struct console" to our + * "tty_struct", defined below. + */ + return MKDEV(TTY_MAJOR, 0); +} + +static struct console kconsole_info = { + name: "xenocon", + write: kconsole_write, + device: kconsole_device, + flags: CON_PRINTBUFFER, + index: -1, +}; + +void xeno_console_init(void) +{ + register_console(&kconsole_info); +} + + +/******************** Initial /dev/console *********************************/ + + +static struct tty_driver console_driver; +static int console_refcount; +static struct tty_struct *console_table[1]; +static struct termios *console_termios[1]; +static struct termios *console_termios_locked[1]; + +static int console_write_room(struct tty_struct *tty) +{ + return INT_MAX; +} + +static int console_chars_in_buffer(struct tty_struct *tty) +{ + return 0; +} + +static inline int console_xmit(int ch) +{ + char _ch = ch; + kconsole_write(NULL, &_ch, 1); + return 1; +} + +static int console_write(struct tty_struct *tty, int from_user, + const u_char * buf, int count) +{ + int i; + + if ( from_user && verify_area(VERIFY_READ, buf, count) ) + { + return -EINVAL; + } + + for ( i = 0; i < count; i++ ) + { + char ch; + if ( from_user ) + { + __get_user(ch, buf + i); + } + else + { + ch = buf[i]; + } + console_xmit(ch); + } + return i; +} + +static void console_put_char(struct tty_struct *tty, u_char ch) +{ + console_xmit(ch); +} + +static int console_open(struct tty_struct *tty, struct file *filp) +{ + int line; + + MOD_INC_USE_COUNT; + line = MINOR(tty->device) - tty->driver.minor_start; + if ( line ) + { + MOD_DEC_USE_COUNT; + return -ENODEV; + } + + tty->driver_data = NULL; + + return 0; +} + +static void console_close(struct tty_struct *tty, struct file *filp) +{ + MOD_DEC_USE_COUNT; +} + +static int __init console_ini(void) +{ + memset(&console_driver, 0, sizeof(struct tty_driver)); + console_driver.magic = TTY_DRIVER_MAGIC; + console_driver.driver_name = "xeno_console"; + console_driver.name = "console"; + console_driver.major = TTY_MAJOR; + console_driver.minor_start = 0; + console_driver.num = 1; + console_driver.type = TTY_DRIVER_TYPE_SERIAL; + console_driver.subtype = SERIAL_TYPE_NORMAL; + console_driver.init_termios = tty_std_termios; + console_driver.flags = TTY_DRIVER_REAL_RAW; + console_driver.refcount = &console_refcount; + console_driver.table = console_table; + console_driver.termios = console_termios; + console_driver.termios_locked = console_termios_locked; + /* Functions */ + console_driver.open = console_open; + console_driver.close = console_close; + console_driver.write = console_write; + console_driver.write_room = console_write_room; + console_driver.put_char = console_put_char; + console_driver.chars_in_buffer = console_chars_in_buffer; + + if ( tty_register_driver(&console_driver) ) + { + printk(KERN_ERR "Couldn't register Xeno console driver\n"); + } + else + { + printk("Xeno console successfully installed\n"); + } + + return 0; +} + +static void __exit console_fin(void) +{ + int ret; + + ret = tty_unregister_driver(&console_driver); + if ( ret != 0 ) + { + printk(KERN_ERR "Unable to unregister Xeno console driver: %d\n", ret); + } +} + +module_init(console_ini); +module_exit(console_fin); + diff --git a/xenolinux-2.4.21-pre4-sparse/arch/xeno/drivers/dom0/Makefile b/xenolinux-2.4.21-pre4-sparse/arch/xeno/drivers/dom0/Makefile new file mode 100644 index 0000000000..4738fc0ba4 --- /dev/null +++ b/xenolinux-2.4.21-pre4-sparse/arch/xeno/drivers/dom0/Makefile @@ -0,0 +1,3 @@ +O_TARGET := dom0.o +obj-y := dom0_memory.o dom0_core.o vfr.o +include $(TOPDIR)/Rules.make diff --git a/xenolinux-2.4.21-pre4-sparse/arch/xeno/drivers/dom0/dom0_block.c b/xenolinux-2.4.21-pre4-sparse/arch/xeno/drivers/dom0/dom0_block.c new file mode 100644 index 0000000000..97d4a65b78 --- /dev/null +++ b/xenolinux-2.4.21-pre4-sparse/arch/xeno/drivers/dom0/dom0_block.c @@ -0,0 +1,27 @@ +/* + * domain 0 block driver interface + * + */ + +#include +#include +#include +#include + +static int __init init_module(void) +{ + request_module("xl_block"); + printk("Successfully installed domain 0 block interface\n"); + + + return 0; +} + +static void __exit cleanup_module(void) +{ + printk("Successfully de-installed domain-0 block interface\n"); + return 0; +} + +module_init(init_module); +module_exit(cleanup_module); diff --git a/xenolinux-2.4.21-pre4-sparse/arch/xeno/drivers/dom0/dom0_core.c b/xenolinux-2.4.21-pre4-sparse/arch/xeno/drivers/dom0/dom0_core.c new file mode 100644 index 0000000000..f8af85358b --- /dev/null +++ b/xenolinux-2.4.21-pre4-sparse/arch/xeno/drivers/dom0/dom0_core.c @@ -0,0 +1,334 @@ +/****************************************************************************** + * dom0_core.c + * + * Interface to privileged domain-0 commands. + * + * Copyright (c) 2002, K A Fraser, B Dragovic + */ + +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include + +#include "dom0_ops.h" + +/* Private proc-file data structures. */ +typedef struct proc_data { + unsigned int domain; + unsigned long map_size; +} dom_procdata_t; + +typedef struct proc_mem_data { + unsigned long pfn; + int tot_pages; +} proc_memdata_t; + +#define XENO_BASE "xeno" +#define DOM0_CMD_INTF "dom0_cmd" +#define DOM0_NEWDOM "new_dom_data" + +#define MAX_LEN 16 +#define DOM_DIR "dom" +#define DOM_MEM "mem" +#define DOM_VIF "vif" + +#define MAP_DISCONT 1 + +static struct proc_dir_entry *xeno_base; +static struct proc_dir_entry *dom0_cmd_intf; +static struct proc_dir_entry *proc_ft; + +unsigned long direct_mmap(unsigned long, unsigned long, pgprot_t, int, int); +int direct_unmap(unsigned long, unsigned long); +int direct_disc_unmap(unsigned long, unsigned long, int); + +static unsigned char readbuf[1204]; + +static int cmd_read_proc(char *page, char **start, off_t off, + int count, int *eof, void *data) +{ + strcpy(page, readbuf); + *readbuf = '\0'; + *eof = 1; + *start = page; + return strlen(page); +} + +static ssize_t dom_vif_read(struct file * file, char * buff, size_t size, loff_t * off) +{ + char hyp_buf[128]; // Hypervisor is going to write its reply here. + network_op_t op; + static int finished = 0; + + // This seems to be the only way to make the OS stop making read requests + // to the file. When we use the fileoperations version of read, offset + // seems to be ignored altogether. + + if (finished) + { + finished = 0; + return 0; + } + + op.cmd = NETWORK_OP_VIFQUERY; + op.u.vif_query.domain = (unsigned int) ((struct proc_dir_entry *)file->f_dentry->d_inode->u.generic_ip)->data; + op.u.vif_query.buf = hyp_buf; + + strcpy(hyp_buf, "Error getting domain's vif list from hypervisor.\n"); // This will be replaced if everything works. + + (void)HYPERVISOR_network_op(&op); + + if (*off >= (strlen(hyp_buf)+1)) return 0; + + copy_to_user(buff, hyp_buf, strlen(hyp_buf)); + + finished = 1; + + return strlen(hyp_buf)+1; +} + +struct file_operations dom_vif_ops = { + read: dom_vif_read +}; + + +static void create_proc_dom_entries(int dom) +{ + struct proc_dir_entry * dir; + dom_procdata_t * dom_data; + char dir_name[MAX_LEN]; + struct proc_dir_entry * file; + + snprintf(dir_name, MAX_LEN, "%s%d", DOM_DIR, dom); + + dom_data = (dom_procdata_t *)kmalloc(sizeof(dom_procdata_t), GFP_KERNEL); + dom_data->domain = dom; + + dir = proc_mkdir(dir_name, xeno_base); + dir->data = dom_data; + + file = create_proc_entry(DOM_VIF, 0600, dir); + if (file != NULL) + { + file->owner = THIS_MODULE; + file->nlink = 1; + file->proc_fops = &dom_vif_ops; + file->data = (void *) dom; + } +} + +static ssize_t dom_mem_write(struct file * file, const char * buff, + size_t size , loff_t * off) +{ + dom_mem_t mem_data; + + copy_from_user(&mem_data, (dom_mem_t *)buff, sizeof(dom_mem_t)); + + if(direct_disc_unmap(mem_data.vaddr, mem_data.start_pfn, + mem_data.tot_pages) == 0){ + return sizeof(sizeof(dom_mem_t)); + } else { + return -1; + } +} + +static ssize_t dom_mem_read(struct file * file, char * buff, size_t size, loff_t * off) +{ + unsigned long addr; + pgprot_t prot; + + proc_memdata_t * mem_data = (proc_memdata_t *)((struct proc_dir_entry *)file->f_dentry->d_inode->u.generic_ip)->data; + + prot = PAGE_SHARED; + + /* remap the range using xen specific routines */ + + addr = direct_mmap(mem_data->pfn << PAGE_SHIFT, mem_data->tot_pages << PAGE_SHIFT, prot, MAP_DISCONT, mem_data->tot_pages); + + copy_to_user((unsigned long *)buff, &addr, sizeof(addr)); + + return sizeof(addr); +} + +struct file_operations dom_mem_ops = { + read: dom_mem_read, + write: dom_mem_write, +}; + +static int dom_map_mem(unsigned int dom, unsigned long pfn, int tot_pages) +{ + int ret = -ENOENT; + struct proc_dir_entry * pd = xeno_base->subdir; + struct proc_dir_entry * file; + proc_memdata_t * memdata; + + while(pd != NULL){ + + if((pd->mode & S_IFDIR) && ((dom_procdata_t *)pd->data)->domain == dom){ + + /* check if there is already an entry for mem and if so + * remove it. + */ + remove_proc_entry(DOM_MEM, pd); + + /* create new entry with parameters describing what to do + * when it is mmaped. + */ + file = create_proc_entry(DOM_MEM, 0600, pd); + if(file != NULL) + { + file->owner = THIS_MODULE; + file->nlink = 1; + file->proc_fops = &dom_mem_ops; + + memdata = (proc_memdata_t *)kmalloc(sizeof(proc_memdata_t), GFP_KERNEL); + memdata->pfn = pfn; + memdata->tot_pages = tot_pages; + file->data = memdata; + + ret = 0; + break; + } + + ret = -EAGAIN; + break; + } + pd = pd->next; + } + + return ret; +} + +/* function used to retrieve data associated with new domain */ +static ssize_t dom_data_read(struct file * file, char * buff, size_t size, loff_t * off) +{ + dom0_newdomain_t * dom_data = (dom0_newdomain_t *) + ((struct proc_dir_entry *)file->f_dentry->d_inode->u.generic_ip)->data; + + copy_to_user((dom0_newdomain_t *)buff, dom_data, sizeof(dom0_newdomain_t)); + + remove_proc_entry(DOM0_NEWDOM, xeno_base); + + kfree(dom_data); + + return sizeof(dom0_newdomain_t); +} + +struct file_operations newdom_data_fops = { + read: dom_data_read, +}; + +static int cmd_write_proc(struct file *file, const char *buffer, + u_long count, void *data) +{ + dom0_op_t op; + int ret = 0; + struct proc_dir_entry * new_dom_id; + dom0_newdomain_t * params; + int i; + unsigned long p; + + copy_from_user(&op, buffer, sizeof(dom0_op_t)); + + /* do some sanity checks */ + if(op.cmd > MAX_CMD){ + ret = -ENOSYS; + goto out; + } + + if ( op.cmd == MAP_DOM_MEM ) + { + ret = dom_map_mem(op.u.dommem.domain, op.u.dommem.start_pfn, + op.u.dommem.tot_pages); + } + else if ( op.cmd == DO_PGUPDATES ) + { + ret = HYPERVISOR_pt_update(op.u.pgupdate.pgt_update_arr, + op.u.pgupdate.num_pgt_updates); + } + else + { + ret = HYPERVISOR_dom0_op(&op); + + /* if new domain created, create proc entries */ + if(op.cmd == DOM0_NEWDOMAIN){ + create_proc_dom_entries(ret); + + params = (dom0_newdomain_t *)kmalloc(sizeof(dom0_newdomain_t), + GFP_KERNEL); + params->memory_kb = op.u.newdomain.memory_kb; + params->pg_head = op.u.newdomain.pg_head; + params->num_vifs = op.u.newdomain.num_vifs; + params->domain = op.u.newdomain.domain; + + /* now notify user space of the new domain's id */ + new_dom_id = create_proc_entry(DOM0_NEWDOM, 0600, xeno_base); + if ( new_dom_id != NULL ) + { + new_dom_id->owner = THIS_MODULE; + new_dom_id->nlink = 1; + new_dom_id->proc_fops = &newdom_data_fops; + new_dom_id->data = (void *)params; + } + + } + + } + +out: + return ret; + +} + +static int __init init_module(void) +{ + /* xeno proc root setup */ + xeno_base = proc_mkdir(XENO_BASE, &proc_root); + + /* xeno control interface */ + *readbuf = '\0'; + dom0_cmd_intf = create_proc_entry (DOM0_CMD_INTF, 0600, xeno_base); + if ( dom0_cmd_intf != NULL ) + { + dom0_cmd_intf->owner = THIS_MODULE; + dom0_cmd_intf->nlink = 1; + dom0_cmd_intf->read_proc = cmd_read_proc; + dom0_cmd_intf->write_proc = cmd_write_proc; + } + + /* set up /proc entries for dom 0 */ + create_proc_dom_entries(0); + + return 0; +} + + +static void __exit cleanup_module(void) +{ + if ( dom0_cmd_intf == NULL ) return; + remove_proc_entry("dom0", &proc_root); + dom0_cmd_intf = NULL; +} + + +module_init(init_module); +module_exit(cleanup_module); diff --git a/xenolinux-2.4.21-pre4-sparse/arch/xeno/drivers/dom0/dom0_memory.c b/xenolinux-2.4.21-pre4-sparse/arch/xeno/drivers/dom0/dom0_memory.c new file mode 100644 index 0000000000..9d14070a1e --- /dev/null +++ b/xenolinux-2.4.21-pre4-sparse/arch/xeno/drivers/dom0/dom0_memory.c @@ -0,0 +1,368 @@ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include + +#include "dom0_ops.h" + +#define MAP_CONT 0 +#define MAP_DISCONT 1 + +extern struct list_head * find_direct(struct list_head *, unsigned long); + +/* + * bd240: functions below perform direct mapping to the real physical pages + * needed for mapping various hypervisor specific structures needed in dom0 + * userspace by various management applications such as domain builder etc. + */ + +#define direct_set_pte(pteptr, pteval) queue_l1_entry_update(__pa(pteptr)|PGREQ_UNCHECKED_UPDATE, (pteval).pte_low) + +#define direct_pte_clear(pteptr) queue_l1_entry_update(__pa(pteptr)|PGREQ_UNCHECKED_UPDATE, 0) + +#define __direct_pte(x) ((pte_t) { (x) } ) +#define __direct_mk_pte(page_nr,pgprot) __direct_pte(((page_nr) << PAGE_SHIFT) | pgprot_val(pgprot)) +#define direct_mk_pte_phys(physpage, pgprot) __direct_mk_pte((physpage) >> PAGE_SHIFT, pgprot) + +static inline void forget_pte(pte_t page) +{ + if (!pte_none(page)) { + printk("forget_pte: old mapping existed!\n"); + BUG(); + } +} + +static inline void direct_remappte_range(pte_t * pte, unsigned long address, unsigned long size, + unsigned long phys_addr, pgprot_t prot) +{ + unsigned long end; + + address &= ~PMD_MASK; + end = address + size; + if (end > PMD_SIZE) + end = PMD_SIZE; + do { + pte_t oldpage; + oldpage = ptep_get_and_clear(pte); + + direct_set_pte(pte, direct_mk_pte_phys(phys_addr, prot)); + + forget_pte(oldpage); + address += PAGE_SIZE; + phys_addr += PAGE_SIZE; + pte++; + } while (address && (address < end)); + +} + +static inline int direct_remappmd_range(struct mm_struct *mm, pmd_t * pmd, unsigned long address, unsigned long size, + unsigned long phys_addr, pgprot_t prot) +{ + unsigned long end; + + address &= ~PGDIR_MASK; + end = address + size; + if (end > PGDIR_SIZE) + end = PGDIR_SIZE; + phys_addr -= address; + do { + pte_t * pte = pte_alloc(mm, pmd, address); + if (!pte) + return -ENOMEM; + direct_remappte_range(pte, address, end - address, address + phys_addr, prot); + address = (address + PMD_SIZE) & PMD_MASK; + pmd++; + } while (address && (address < end)); + return 0; +} + +/* Note: this is only safe if the mm semaphore is held when called. */ +int direct_remap_page_range(unsigned long from, unsigned long phys_addr, unsigned long size, pgprot_t prot) +{ + int error = 0; + pgd_t * dir; + unsigned long beg = from; + unsigned long end = from + size; + struct mm_struct *mm = current->mm; + + phys_addr -= from; + dir = pgd_offset(mm, from); + flush_cache_range(mm, beg, end); + if (from >= end) + BUG(); + + spin_lock(&mm->page_table_lock); + do { + pmd_t *pmd = pmd_alloc(mm, dir, from); + error = -ENOMEM; + if (!pmd) + break; + error = direct_remappmd_range(mm, pmd, from, end - from, phys_addr + from, prot); + if (error) + break; + from = (from + PGDIR_SIZE) & PGDIR_MASK; + dir++; + } while (from && (from < end)); + spin_unlock(&mm->page_table_lock); + flush_tlb_range(mm, beg, end); + return error; +} + +/* + * used for remapping discontiguous bits of domain's memory, pages to map are + * found from frame table beginning at the given first_pg index + */ +int direct_remap_disc_page_range(unsigned long from, + unsigned long first_pg, int tot_pages, pgprot_t prot) +{ + dom0_op_t dom0_op; + unsigned long *pfns = get_free_page(GFP_KERNEL); + unsigned long start = from; + int pages, i; + + while ( tot_pages != 0 ) + { + dom0_op.cmd = DOM0_GETMEMLIST; + dom0_op.u.getmemlist.start_pfn = first_pg; + pages = 1023; + dom0_op.u.getmemlist.num_pfns = 1024; + if ( tot_pages < 1024 ) + dom0_op.u.getmemlist.num_pfns = pages = tot_pages; + dom0_op.u.getmemlist.buffer = pfns; + (void)HYPERVISOR_dom0_op(&dom0_op); + first_pg = pfns[1023]; + + for ( i = 0; i < pages; i++ ) + { + if(direct_remap_page_range(start, pfns[i] << PAGE_SHIFT, + PAGE_SIZE, prot)) + goto out; + start += PAGE_SIZE; + tot_pages--; + } + } + +out: + free_page(pfns); + return tot_pages; +} + +/* below functions replace standard sys_mmap and sys_munmap which are absolutely useless + * for direct memory mapping. direct_zap* functions are minor ammendments to the + * original versions in mm/memory.c. the changes are to enable unmapping of real physical + * addresses. + */ + +unsigned long direct_mmap(unsigned long phys_addr, unsigned long size, + pgprot_t prot, int flag, int tot_pages) +{ + direct_mmap_node_t * dmmap; + struct list_head * entry; + unsigned long addr; + int ret = 0; + + if(!capable(CAP_SYS_ADMIN)){ + ret = -EPERM; + goto out; + } + + /* get unmapped area invokes xen specific arch_get_unmapped_area */ + addr = get_unmapped_area(NULL, 0, size, 0, 0); + if(addr & ~PAGE_MASK){ + ret = -ENOMEM; + goto out; + } + + /* add node on the list of directly mapped areas, make sure the + * list remains sorted. + */ + dmmap = (direct_mmap_node_t *)kmalloc(sizeof(direct_mmap_node_t), GFP_KERNEL); + dmmap->vm_start = addr; + dmmap->vm_end = addr + size; + entry = find_direct(¤t->mm->context.direct_list, addr); + if(entry != ¤t->mm->context.direct_list){ + list_add_tail(&dmmap->list, entry); + } else { + list_add_tail(&dmmap->list, ¤t->mm->context.direct_list); + } + + /* and perform the mapping */ + if(flag == MAP_DISCONT){ + ret = direct_remap_disc_page_range(addr, phys_addr >> PAGE_SHIFT, + tot_pages, prot); + } else { + ret = direct_remap_page_range(addr, phys_addr, size, prot); + } + + if(ret == 0) + ret = addr; + +out: + return ret; +} + +/* most of the checks, refcnt updates, cache stuff have been thrown out as they are not + * needed + */ +static inline int direct_zap_pte_range(mmu_gather_t *tlb, pmd_t * pmd, unsigned long address, + unsigned long size) +{ + unsigned long offset; + pte_t * ptep; + int freed = 0; + + if (pmd_none(*pmd)) + return 0; + if (pmd_bad(*pmd)) { + pmd_ERROR(*pmd); + pmd_clear(pmd); + return 0; + } + ptep = pte_offset(pmd, address); + offset = address & ~PMD_MASK; + if (offset + size > PMD_SIZE) + size = PMD_SIZE - offset; + size &= PAGE_MASK; + for (offset=0; offset < size; ptep++, offset += PAGE_SIZE) { + pte_t pte = *ptep; + if (pte_none(pte)) + continue; + freed ++; + direct_pte_clear(ptep); + } + + return freed; +} + +static inline int direct_zap_pmd_range(mmu_gather_t *tlb, pgd_t * dir, + unsigned long address, unsigned long size) +{ + pmd_t * pmd; + unsigned long end; + int freed; + + if (pgd_none(*dir)) + return 0; + if (pgd_bad(*dir)) { + pgd_ERROR(*dir); + pgd_clear(dir); + return 0; + } + pmd = pmd_offset(dir, address); + end = address + size; + if (end > ((address + PGDIR_SIZE) & PGDIR_MASK)) + end = ((address + PGDIR_SIZE) & PGDIR_MASK); + freed = 0; + do { + freed += direct_zap_pte_range(tlb, pmd, address, end - address); + address = (address + PMD_SIZE) & PMD_MASK; + pmd++; + } while (address < end); + return freed; +} + +/* + * remove user pages in a given range. + */ +void direct_zap_page_range(struct mm_struct *mm, unsigned long address, unsigned long size) +{ + mmu_gather_t *tlb; + pgd_t * dir; + unsigned long start = address, end = address + size; + int freed = 0; + + dir = pgd_offset(mm, address); + + /* + * This is a long-lived spinlock. That's fine. + * There's no contention, because the page table + * lock only protects against kswapd anyway, and + * even if kswapd happened to be looking at this + * process we _want_ it to get stuck. + */ + if (address >= end) + BUG(); + spin_lock(&mm->page_table_lock); + flush_cache_range(mm, address, end); + tlb = tlb_gather_mmu(mm); + + do { + freed += direct_zap_pmd_range(tlb, dir, address, end - address); + address = (address + PGDIR_SIZE) & PGDIR_MASK; + dir++; + } while (address && (address < end)); + + /* this will flush any remaining tlb entries */ + tlb_finish_mmu(tlb, start, end); + + /* decrementing rss removed */ + + spin_unlock(&mm->page_table_lock); +} + +int direct_unmap(unsigned long addr, unsigned long size) +{ + direct_mmap_node_t * node; + struct list_head * curr; + struct list_head * direct_list = ¤t->mm->context.direct_list; + + curr = direct_list->next; + while(curr != direct_list){ + node = list_entry(curr, direct_mmap_node_t, list); + if(node->vm_start == addr) + break; + curr = curr->next; + } + + if(curr == direct_list) + return -1; + + list_del(&node->list); + kfree(node); + + direct_zap_page_range(current->mm, addr, size); + + return 0; +} + +int direct_disc_unmap(unsigned long from, unsigned long first_pg, int tot_pages) +{ + int count = 0; + direct_mmap_node_t * node; + struct list_head * curr; + struct list_head * direct_list = ¤t->mm->context.direct_list; + + curr = direct_list->next; + while(curr != direct_list){ + node = list_entry(curr, direct_mmap_node_t, list); + + if(node->vm_start == from) + break; + curr = curr->next; + } + + if(curr == direct_list) + return -1; + + list_del(&node->list); + kfree(node); + + while(count < tot_pages){ + direct_zap_page_range(current->mm, from, PAGE_SIZE); + from += PAGE_SIZE; + count++; + } + + return 0; +} diff --git a/xenolinux-2.4.21-pre4-sparse/arch/xeno/drivers/dom0/dom0_ops.h b/xenolinux-2.4.21-pre4-sparse/arch/xeno/drivers/dom0/dom0_ops.h new file mode 100644 index 0000000000..d98ce1b1eb --- /dev/null +++ b/xenolinux-2.4.21-pre4-sparse/arch/xeno/drivers/dom0/dom0_ops.h @@ -0,0 +1,80 @@ +/****************************************************************************** + * dom0_ops.h + * + * Process command requests from domain-0 guest OS. + * + * Copyright (c) 2002, K A Fraser, B Dragovic + */ + +#define DOM0_NEWDOMAIN 0 +#define DOM0_KILLDOMAIN 1 +#define DOM0_GETMEMLIST 2 +#define DOM0_STARTDOM 4 +#define MAP_DOM_MEM 6 /* Not passed down to Xen */ +#define DO_PGUPDATES 7 /* Not passed down to Xen */ +#define MAX_CMD 8 + +#define MAX_CMD_LEN 256 + +typedef struct dom0_newdomain_st +{ + unsigned int domain; + unsigned int memory_kb; + unsigned int num_vifs; // temporary + unsigned long pg_head; // return parameter +} dom0_newdomain_t; + +typedef struct dom0_killdomain_st +{ + unsigned int domain; +} dom0_killdomain_t; + +typedef struct dom0_getmemlist_st +{ + unsigned long start_pfn; + unsigned long num_pfns; + void *buffer; +} dom0_getmemlist_t; + +/* This is entirely processed by XenoLinux */ +typedef struct dom_mem +{ + unsigned int domain; + unsigned long vaddr; + unsigned long start_pfn; + int tot_pages; +} dom_mem_t; + +/* This is entirely processed by XenoLinux */ +typedef struct dom_pgupdate +{ + unsigned long pgt_update_arr; + unsigned long num_pgt_updates; +} dom_pgupdate_t; + +typedef struct domain_launch +{ + unsigned int domain; + unsigned long l2_pgt_addr; + unsigned long virt_load_addr; + unsigned long virt_shinfo_addr; + unsigned long virt_startinfo_addr; + unsigned int num_vifs; + char cmd_line[MAX_CMD_LEN]; +} dom_meminfo_t; + +typedef struct dom0_op_st +{ + unsigned long cmd; + union + { + dom0_newdomain_t newdomain; + dom0_killdomain_t killdomain; + dom0_getmemlist_t getmemlist; + dom_mem_t dommem; + dom_pgupdate_t pgupdate; + dom_meminfo_t meminfo; + } + u; +} dom0_op_t; + diff --git a/xenolinux-2.4.21-pre4-sparse/arch/xeno/drivers/dom0/vfr.c b/xenolinux-2.4.21-pre4-sparse/arch/xeno/drivers/dom0/vfr.c new file mode 100644 index 0000000000..13fe25ec9c --- /dev/null +++ b/xenolinux-2.4.21-pre4-sparse/arch/xeno/drivers/dom0/vfr.c @@ -0,0 +1,306 @@ +/****************************************************************************** + * vfr.c + * + * Interface to the virtual firewall/router. + * + */ + +#include +#include +#include +#include +#include +#include +#include +#include + +#include + +static struct proc_dir_entry *proc_vfr; + +static unsigned char readbuf[1024]; + +/* Helpers, implemented at the bottom. */ +u32 getipaddr(const char *buff, unsigned int len); +u16 antous(const char *buff, int len); +int anton(const char *buff, int len); + +static int vfr_read_proc(char *page, char **start, off_t off, + int count, int *eof, void *data) +{ + strcpy(page, readbuf); + *readbuf = '\0'; + *eof = 1; + *start = page; + return strlen(page); +} + +/* The format for the vfr interface is as follows: + * + * COMMAND = [= [...]] + * + * where: + * + * COMMAND = { ACCEPT | COUNT } + * + * field=val pairs are as follows: + * + * field = { srcaddr | dstaddr } + * val is a dot seperated, numeric IP address. + * + * field = { srcport | dstport } + * val is a (16-bit) unsigned int + * + * field = { proto } + * val = { IP | TCP | UDP | ARP } + * + */ + +#define isspace(_x) ( ((_x)==' ') || ((_x)=='\t') || ((_x)=='\v') || \ + ((_x)=='\f') || ((_x)=='\r') || ((_x)=='\n') ) + +static int vfr_write_proc(struct file *file, const char *buffer, + u_long count, void *data) +{ + network_op_t op; + int ret, len; + int ts, te, tl; // token start, end, and length + int fs, fe, fl; // field. + + len = count; + ts = te = 0; + + memset(&op, 0, sizeof(network_op_t)); + + // get the command: + while ( count && isspace(buffer[ts]) ) { ts++; count--; } // skip spaces. + te = ts; + while ( count && !isspace(buffer[te]) ) { te++; count--; } // command end + if ( te <= ts ) goto bad; + tl = te - ts; + + if ( strncmp(&buffer[ts], "ADD", tl) == 0 ) + { + op.cmd = NETWORK_OP_ADDRULE; + } + else if ( strncmp(&buffer[ts], "DELETE", tl) == 0 ) + { + op.cmd = NETWORK_OP_DELETERULE; + } + else if ( strncmp(&buffer[ts], "PRINT", tl) == 0 ) + { + op.cmd = NETWORK_OP_GETRULELIST; + goto doneparsing; + } + + ts = te; + + // get the action + while ( count && (buffer[ts] == ' ') ) { ts++; count--; } // skip spaces. + te = ts; + while ( count && (buffer[te] != ' ') ) { te++; count--; } // command end + if ( te <= ts ) goto bad; + tl = te - ts; + + if ( strncmp(&buffer[ts], "ACCEPT", tl) == 0 ) + { + op.u.net_rule.action = NETWORK_ACTION_ACCEPT; + goto keyval; + } + if ( strncmp(&buffer[ts], "COUNT", tl) == 0 ) + { + op.u.net_rule.action = NETWORK_ACTION_COUNT; + goto keyval; + } + + // default case; + return (len); + + + // get the key=val pairs. + keyval: + while (count) + { + //get field + ts = te; while ( count && isspace(buffer[ts]) ) { ts++; count--; } + te = ts; + while ( count && !isspace(buffer[te]) && (buffer[te] != '=') ) + { te++; count--; } + if ( te <= ts ) + goto doneparsing; + tl = te - ts; + fs = ts; fe = te; fl = tl; // save the field markers. + // skip " = " (ignores extra equals.) + while ( count && (isspace(buffer[te]) || (buffer[te] == '=')) ) + { te++; count--; } + ts = te; + while ( count && !isspace(buffer[te]) ) { te++; count--; } + tl = te - ts; + + if ( (fl <= 0) || (tl <= 0) ) goto bad; + + if (strncmp(&buffer[fs], "srcaddr", fl) == 0) + { + op.u.net_rule.src_addr = getipaddr(&buffer[ts], tl); + } + else if (strncmp(&buffer[fs], "dstaddr", fl) == 0) + { + op.u.net_rule.dst_addr = getipaddr(&buffer[ts], tl); + } + else if (strncmp(&buffer[fs], "srcaddrmask", fl) == 0) + { + op.u.net_rule.src_addr_mask = getipaddr(&buffer[ts], tl); + } + else if (strncmp(&buffer[fs], "dstaddrmask", fl) == 0) + { + op.u.net_rule.dst_addr_mask = getipaddr(&buffer[ts], tl); + } + else if (strncmp(&buffer[fs], "srcport", fl) == 0) + { + op.u.net_rule.src_port = antous(&buffer[ts], tl); + } + else if (strncmp(&buffer[fs], "dstport", fl) == 0) + { + op.u.net_rule.dst_port = antous(&buffer[ts], tl); + } + else if (strncmp(&buffer[fs], "srcportmask", fl) == 0) + { + op.u.net_rule.src_port_mask = antous(&buffer[ts], tl); + } + else if (strncmp(&buffer[fs], "dstportmask", fl) == 0) + { + op.u.net_rule.dst_port_mask = antous(&buffer[ts], tl); + } + else if (strncmp(&buffer[fs], "srcint", fl) == 0) + { + op.u.net_rule.src_interface = anton(&buffer[ts], tl); + } + else if (strncmp(&buffer[fs], "dstint", fl) == 0) + { + op.u.net_rule.dst_interface = anton(&buffer[ts], tl); + } + else if ( (strncmp(&buffer[fs], "proto", fl) == 0)) + { + if (strncmp(&buffer[ts], "any", tl) == 0) + op.u.net_rule.proto = NETWORK_PROTO_ANY; + if (strncmp(&buffer[ts], "ip", tl) == 0) + op.u.net_rule.proto = NETWORK_PROTO_IP; + if (strncmp(&buffer[ts], "tcp", tl) == 0) + op.u.net_rule.proto = NETWORK_PROTO_TCP; + if (strncmp(&buffer[ts], "udp", tl) == 0) + op.u.net_rule.proto = NETWORK_PROTO_UDP; + if (strncmp(&buffer[ts], "arp", tl) == 0) + op.u.net_rule.proto = NETWORK_PROTO_ARP; + + } + } + + doneparsing: + ret = HYPERVISOR_network_op(&op); + return(len); + + bad: + return(len); + + +} + +static int __init init_module(void) +{ + *readbuf = '\0'; + proc_vfr = create_proc_entry ("vfr", 0600, &proc_root); + if ( proc_vfr != NULL ) + { + proc_vfr->owner = THIS_MODULE; + proc_vfr->nlink = 1; + proc_vfr->read_proc = vfr_read_proc; + proc_vfr->write_proc = vfr_write_proc; + printk("Successfully installed virtual firewall/router interface\n"); + } + return 0; +} + +static void __exit cleanup_module(void) +{ + if ( proc_vfr == NULL ) return; + remove_proc_entry("vfr", &proc_root); + proc_vfr = NULL; +} + +module_init(init_module); +module_exit(cleanup_module); + +/* Helper functions start here: */ + +int anton(const char *buff, int len) +{ + int ret; + char c; + int sign = 1; + + ret = 0; + + if (len == 0) return 0; + if (*buff == '-') { sign = -1; buff++; len--; } + + while ( (len) && ((c = *buff) >= '0') && (c <= '9') ) + { + ret *= 10; + ret += c - '0'; + buff++; len--; + } + + ret *= sign; + return ret; +} + +u16 antous(const char *buff, int len) +{ + u16 ret; + char c; + + ret = 0; + + while ( (len) && ((c = *buff) >= '0') && (c <= '9') ) + { + ret *= 10; + ret += c - '0'; + buff++; len--; + } + + return ret; +} + +u32 getipaddr(const char *buff, unsigned int len) +{ + int i; + char c; + u32 ret, val; + + ret = 0; val = 0; + + while ( len ) + { + if (!((((c = *buff) >= '0') && ( c <= '9')) || ( c == '.' ) ) ) + { + return(0); // malformed. + } + + if ( c == '.' ) { + if (val > 255) return (0); //malformed. + ret = ret << 8; + ret += val; + val = 0; + len--; buff++; + continue; + } + val *= 10; + val += c - '0'; + buff++; len--; + } + ret = ret << 8; + ret += val; + + return (ret); +} + diff --git a/xenolinux-2.4.21-pre4-sparse/arch/xeno/drivers/network/Makefile b/xenolinux-2.4.21-pre4-sparse/arch/xeno/drivers/network/Makefile new file mode 100644 index 0000000000..b44a288a5b --- /dev/null +++ b/xenolinux-2.4.21-pre4-sparse/arch/xeno/drivers/network/Makefile @@ -0,0 +1,3 @@ +O_TARGET := net.o +obj-y := network.o +include $(TOPDIR)/Rules.make diff --git a/xenolinux-2.4.21-pre4-sparse/arch/xeno/drivers/network/network.c b/xenolinux-2.4.21-pre4-sparse/arch/xeno/drivers/network/network.c new file mode 100644 index 0000000000..4c4ace1006 --- /dev/null +++ b/xenolinux-2.4.21-pre4-sparse/arch/xeno/drivers/network/network.c @@ -0,0 +1,443 @@ +/****************************************************************************** + * network.c + * + * Virtual network driver for XenoLinux. + * + * Copyright (c) 2002, K A Fraser + */ + +#include +#include + +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include + +#include +#include + +#define NET_TX_IRQ _EVENT_NET_TX +#define NET_RX_IRQ _EVENT_NET_RX + +#define TX_MAX_ENTRIES (TX_RING_SIZE - 2) +#define RX_MAX_ENTRIES (RX_RING_SIZE - 2) + +#define TX_RING_INC(_i) (((_i)+1) & (TX_RING_SIZE-1)) +#define RX_RING_INC(_i) (((_i)+1) & (RX_RING_SIZE-1)) +#define TX_RING_ADD(_i,_j) (((_i)+(_j)) & (TX_RING_SIZE-1)) +#define RX_RING_ADD(_i,_j) (((_i)+(_j)) & (RX_RING_SIZE-1)) + +#define RX_BUF_SIZE ((PAGE_SIZE/2)+1) /* Fool the slab allocator :-) */ + +static void network_rx_int(int irq, void *dev_id, struct pt_regs *ptregs); +static void network_tx_int(int irq, void *dev_id, struct pt_regs *ptregs); +static void network_tx_buf_gc(struct net_device *dev); +static void network_alloc_rx_buffers(struct net_device *dev); +static void network_free_rx_buffers(struct net_device *dev); +static void cleanup_module(void); + +static struct list_head dev_list; + +/* + * RX RING: RX_IDX <= rx_cons <= rx_prod + * TX RING: TX_IDX <= tx_cons <= tx_prod + * (*_IDX allocated privately here, *_cons & *_prod shared with hypervisor) + */ +struct net_private +{ + struct list_head list; + struct net_device *dev; + + struct net_device_stats stats; + struct sk_buff **tx_skb_ring; + struct sk_buff **rx_skb_ring; + atomic_t tx_entries; + unsigned int rx_idx, tx_idx, tx_full; + net_ring_t *net_ring; + spinlock_t tx_lock; +}; + + +static int network_open(struct net_device *dev) +{ + struct net_private *np = dev->priv; + int error = 0; + + np->rx_idx = np->tx_idx = np->tx_full = 0; + + memset(&np->stats, 0, sizeof(np->stats)); + + spin_lock_init(&np->tx_lock); + + atomic_set(&np->tx_entries, 0); + + np->net_ring->tx_prod = np->net_ring->tx_cons = np->net_ring->tx_event = 0; + np->net_ring->rx_prod = np->net_ring->rx_cons = np->net_ring->rx_event = 0; + np->net_ring->tx_ring = NULL; + np->net_ring->rx_ring = NULL; + + np->tx_skb_ring = kmalloc(TX_RING_SIZE * sizeof(struct sk_buff *), + GFP_KERNEL); + np->rx_skb_ring = kmalloc(RX_RING_SIZE * sizeof(struct sk_buff *), + GFP_KERNEL); + np->net_ring->tx_ring = kmalloc(TX_RING_SIZE * sizeof(tx_entry_t), + GFP_KERNEL); + np->net_ring->rx_ring = kmalloc(RX_RING_SIZE * sizeof(rx_entry_t), + GFP_KERNEL); + if ( (np->tx_skb_ring == NULL) || (np->rx_skb_ring == NULL) || + (np->net_ring->tx_ring == NULL) || (np->net_ring->rx_ring == NULL) ) + { + printk(KERN_WARNING "%s; Could not allocate ring memory\n", dev->name); + error = -ENOBUFS; + goto fail; + } + + network_alloc_rx_buffers(dev); + + error = request_irq(NET_RX_IRQ, network_rx_int, 0, + "net-rx", dev); + if ( error ) + { + printk(KERN_WARNING "%s: Could not allocate receive interrupt\n", + dev->name); + network_free_rx_buffers(dev); + goto fail; + } + + error = request_irq(NET_TX_IRQ, network_tx_int, 0, + "net-tx", dev); + if ( error ) + { + printk(KERN_WARNING "%s: Could not allocate transmit interrupt\n", + dev->name); + free_irq(NET_RX_IRQ, dev); + network_free_rx_buffers(dev); + goto fail; + } + + printk("XenoLinux Virtual Network Driver installed as %s\n", dev->name); + + netif_start_queue(dev); + + MOD_INC_USE_COUNT; + + return 0; + + fail: + if ( np->net_ring->rx_ring ) kfree(np->net_ring->rx_ring); + if ( np->net_ring->tx_ring ) kfree(np->net_ring->tx_ring); + if ( np->rx_skb_ring ) kfree(np->rx_skb_ring); + if ( np->tx_skb_ring ) kfree(np->tx_skb_ring); + kfree(np); + return error; +} + + +static void network_tx_buf_gc(struct net_device *dev) +{ + unsigned int i; + struct net_private *np = dev->priv; + struct sk_buff *skb; + unsigned long flags; + + spin_lock_irqsave(&np->tx_lock, flags); + + for ( i = np->tx_idx; i != np->net_ring->tx_cons; i = TX_RING_INC(i) ) + { + skb = np->tx_skb_ring[i]; + dev_kfree_skb_any(skb); + atomic_dec(&np->tx_entries); + } + + np->tx_idx = i; + + if ( np->tx_full && (atomic_read(&np->tx_entries) < TX_MAX_ENTRIES) ) + { + np->tx_full = 0; + netif_wake_queue(dev); + } + + spin_unlock_irqrestore(&np->tx_lock, flags); +} + +inline unsigned long get_ppte(unsigned long addr) +{ + unsigned long ppte; + pgd_t *pgd; pmd_t *pmd; pte_t *ptep; + pgd = pgd_offset_k(addr); + + if ( pgd_none(*pgd) || pgd_bad(*pgd) ) BUG(); + + pmd = pmd_offset(pgd, addr); + if ( pmd_none(*pmd) || pmd_bad(*pmd) ) BUG(); + + ptep = pte_offset(pmd, addr); + ppte = (unsigned long)phys_to_machine(virt_to_phys(ptep)); + + return ppte; +} + +static void network_alloc_rx_buffers(struct net_device *dev) +{ + unsigned int i; + struct net_private *np = dev->priv; + struct sk_buff *skb; + unsigned int end = RX_RING_ADD(np->rx_idx, RX_MAX_ENTRIES); + + for ( i = np->net_ring->rx_prod; i != end; i = RX_RING_INC(i) ) + { + skb = dev_alloc_skb(RX_BUF_SIZE); + if ( skb == NULL ) break; + skb->dev = dev; + skb_reserve(skb, 2); /* word align the IP header */ + np->rx_skb_ring[i] = skb; + np->net_ring->rx_ring[i].addr = get_ppte((unsigned long)skb->head); + np->net_ring->rx_ring[i].size = RX_BUF_SIZE - 16; /* arbitrary */ + } + + np->net_ring->rx_prod = i; + + np->net_ring->rx_event = RX_RING_INC(np->rx_idx); + + HYPERVISOR_net_update(); +} + + +static void network_free_rx_buffers(struct net_device *dev) +{ + unsigned int i; + struct net_private *np = dev->priv; + struct sk_buff *skb; + + for ( i = np->rx_idx; i != np->net_ring->rx_prod; i = RX_RING_INC(i) ) + { + skb = np->rx_skb_ring[i]; + dev_kfree_skb(skb); + } +} + +static int network_start_xmit(struct sk_buff *skb, struct net_device *dev) +{ + unsigned int i; + struct net_private *np = (struct net_private *)dev->priv; + + if ( np->tx_full ) + { + printk(KERN_WARNING "%s: full queue wasn't stopped!\n", dev->name); + netif_stop_queue(dev); + return -ENOBUFS; + } + i = np->net_ring->tx_prod; + + if ( (((unsigned long)skb->data & ~PAGE_MASK) + skb->len) >= PAGE_SIZE ) + { + struct sk_buff *new_skb = alloc_skb(RX_BUF_SIZE, GFP_KERNEL); + skb_put(new_skb, skb->len); + memcpy(new_skb->data, skb->data, skb->len); + kfree_skb(skb); + skb = new_skb; + } + + np->tx_skb_ring[i] = skb; + np->net_ring->tx_ring[i].addr = + (unsigned long)phys_to_machine(virt_to_phys(skb->data)); + np->net_ring->tx_ring[i].size = skb->len; + np->net_ring->tx_prod = TX_RING_INC(i); + atomic_inc(&np->tx_entries); + + np->stats.tx_bytes += skb->len; + np->stats.tx_packets++; + + spin_lock_irq(&np->tx_lock); + if ( atomic_read(&np->tx_entries) >= TX_MAX_ENTRIES ) + { + np->tx_full = 1; + netif_stop_queue(dev); + np->net_ring->tx_event = + TX_RING_ADD(np->tx_idx, atomic_read(&np->tx_entries) >> 1); + } + else + { + /* Avoid unnecessary tx interrupts. */ + np->net_ring->tx_event = TX_RING_INC(np->net_ring->tx_prod); + } + spin_unlock_irq(&np->tx_lock); + + /* Must do this after setting tx_event: race with updates of tx_cons. */ + network_tx_buf_gc(dev); + + HYPERVISOR_net_update(); + + return 0; +} + + +static void network_rx_int(int irq, void *dev_id, struct pt_regs *ptregs) +{ + unsigned int i; + struct net_device *dev = (struct net_device *)dev_id; + struct net_private *np = dev->priv; + struct sk_buff *skb; + + again: + for ( i = np->rx_idx; i != np->net_ring->rx_cons; i = RX_RING_INC(i) ) + { + if (np->net_ring->rx_ring[i].status != RING_STATUS_OK) + { + printk("bad buffer on RX ring!(%d)\n", + np->net_ring->rx_ring[i].status); + continue; + } + skb = np->rx_skb_ring[i]; + + phys_to_machine_mapping[virt_to_phys(skb->head) >> PAGE_SHIFT] = + (*(unsigned long *)phys_to_virt( + machine_to_phys(np->net_ring->rx_ring[i].addr)) + ) >> PAGE_SHIFT; + + skb_put(skb, np->net_ring->rx_ring[i].size); + skb->protocol = eth_type_trans(skb, dev); + + /* + * Set up shinfo -- from alloc_skb This was particularily nasty: the + * shared info is hidden at the back of the data area (presumably so it + * can be shared), but on page flip it gets very spunked. + */ + atomic_set(&(skb_shinfo(skb)->dataref), 1); + skb_shinfo(skb)->nr_frags = 0; + skb_shinfo(skb)->frag_list = NULL; + + np->stats.rx_packets++; + + np->stats.rx_bytes += np->net_ring->rx_ring[i].size; + netif_rx(skb); + dev->last_rx = jiffies; + } + + np->rx_idx = i; + + network_alloc_rx_buffers(dev); + + /* Deal with hypervisor racing our resetting of rx_event. */ + smp_mb(); + if ( np->net_ring->rx_cons != i ) goto again; +} + + +static void network_tx_int(int irq, void *dev_id, struct pt_regs *ptregs) +{ + struct net_device *dev = (struct net_device *)dev_id; + network_tx_buf_gc(dev); +} + + +int network_close(struct net_device *dev) +{ + struct net_private *np = dev->priv; + + netif_stop_queue(dev); + + free_irq(NET_RX_IRQ, dev); + free_irq(NET_TX_IRQ, dev); + + /* + * XXXX This cannot be done safely until be have a proper interface + * for setting up and tearing down virtual interfaces on the fly. + * Currently the receive buffers are locked down by Xen and we have + * no sensible way of retrieving them. + */ +#if 0 + network_free_rx_buffers(dev); + kfree(np->net_ring->rx_ring); + kfree(np->net_ring->tx_ring); +#endif + + kfree(np->rx_skb_ring); + kfree(np->tx_skb_ring); + + MOD_DEC_USE_COUNT; + + return 0; +} + + +static struct net_device_stats *network_get_stats(struct net_device *dev) +{ + struct net_private *np = (struct net_private *)dev->priv; + return &np->stats; +} + + +int __init init_module(void) +{ + int i, err; + struct net_device *dev; + struct net_private *np; + + INIT_LIST_HEAD(&dev_list); + + for ( i = 0; i < start_info.num_net_rings; i++ ) + { + dev = alloc_etherdev(sizeof(struct net_private)); + if ( dev == NULL ) + { + err = -ENOMEM; + goto fail; + } + + np = dev->priv; + np->net_ring = start_info.net_rings + i; + + SET_MODULE_OWNER(dev); + dev->open = network_open; + dev->hard_start_xmit = network_start_xmit; + dev->stop = network_close; + dev->get_stats = network_get_stats; + + memset(dev->dev_addr, 0, ETH_ALEN); + *(unsigned int *)(dev->dev_addr + 1) = i; + + if ( (err = register_netdev(dev)) != 0 ) + { + kfree(dev); + goto fail; + } + + np->dev = dev; + list_add(&np->list, &dev_list); + } + + return 0; + + fail: + cleanup_module(); + return err; +} + + +static void cleanup_module(void) +{ + struct net_private *np; + struct net_device *dev; + + while ( !list_empty(&dev_list) ) + { + np = list_entry(dev_list.next, struct net_private, list); + list_del(&np->list); + dev = np->dev; + unregister_netdev(dev); + kfree(dev); + } +} + + +module_init(init_module); +module_exit(cleanup_module); diff --git a/xenolinux-2.4.21-pre4-sparse/arch/xeno/kernel/Makefile b/xenolinux-2.4.21-pre4-sparse/arch/xeno/kernel/Makefile new file mode 100644 index 0000000000..a43a615e2f --- /dev/null +++ b/xenolinux-2.4.21-pre4-sparse/arch/xeno/kernel/Makefile @@ -0,0 +1,15 @@ + +.S.o: + $(CC) $(AFLAGS) -traditional -c $< -o $*.o + +all: kernel.o head.o init_task.o + +O_TARGET := kernel.o + +export-objs := i386_ksyms.o + +obj-y := process.o semaphore.o signal.o entry.o traps.o irq.o \ + ptrace.o ioport.o ldt.o setup.o time.o sys_i386.o \ + i386_ksyms.o i387.o hypervisor.o + +include $(TOPDIR)/Rules.make diff --git a/xenolinux-2.4.21-pre4-sparse/arch/xeno/kernel/entry.S b/xenolinux-2.4.21-pre4-sparse/arch/xeno/kernel/entry.S new file mode 100644 index 0000000000..0a6a5374d1 --- /dev/null +++ b/xenolinux-2.4.21-pre4-sparse/arch/xeno/kernel/entry.S @@ -0,0 +1,717 @@ +/* + * linux/arch/i386/entry.S + * + * Copyright (C) 1991, 1992 Linus Torvalds + */ + +/* + * entry.S contains the system-call and fault low-level handling routines. + * This also contains the timer-interrupt handler, as well as all interrupts + * and faults that can result in a task-switch. + * + * NOTE: This code handles signal-recognition, which happens every time + * after a timer-interrupt and after each system call. + * + * I changed all the .align's to 4 (16 byte alignment), as that's faster + * on a 486. + * + * Stack layout in 'ret_from_system_call': + * ptrace needs to have all regs on the stack. + * if the order here is changed, it needs to be + * updated in fork.c:copy_process, signal.c:do_signal, + * ptrace.c and ptrace.h + * + * 0(%esp) - %ebx + * 4(%esp) - %ecx + * 8(%esp) - %edx + * C(%esp) - %esi + * 10(%esp) - %edi + * 14(%esp) - %ebp + * 18(%esp) - %eax + * 1C(%esp) - %ds + * 20(%esp) - %es + * 24(%esp) - orig_eax + * 28(%esp) - %eip + * 2C(%esp) - %cs + * 30(%esp) - %eflags + * 34(%esp) - %oldesp + * 38(%esp) - %oldss + * + * "current" is in register %ebx during any slow entries. + */ + +#include +#include +#include +#include +#include + +EBX = 0x00 +ECX = 0x04 +EDX = 0x08 +ESI = 0x0C +EDI = 0x10 +EBP = 0x14 +EAX = 0x18 +DS = 0x1C +ES = 0x20 +ORIG_EAX = 0x24 +EIP = 0x28 +CS = 0x2C +EFLAGS = 0x30 +OLDESP = 0x34 +OLDSS = 0x38 + +CF_MASK = 0x00000001 +TF_MASK = 0x00000100 +IF_MASK = 0x00000200 +DF_MASK = 0x00000400 +NT_MASK = 0x00004000 + +/* + * these are offsets into the task-struct. + */ +state = 0 +flags = 4 +sigpending = 8 +addr_limit = 12 +exec_domain = 16 +need_resched = 20 +tsk_ptrace = 24 +processor = 52 + +ENOSYS = 38 + + +#define SAVE_ALL \ + cld; \ + pushl %es; \ + pushl %ds; \ + pushl %eax; \ + pushl %ebp; \ + pushl %edi; \ + pushl %esi; \ + pushl %edx; \ + pushl %ecx; \ + pushl %ebx; \ + movl $(__KERNEL_DS),%edx; \ + movl %edx,%ds; \ + movl %edx,%es; + +#define RESTORE_ALL \ + popl %ebx; \ + popl %ecx; \ + popl %edx; \ + popl %esi; \ + popl %edi; \ + popl %ebp; \ + popl %eax; \ +1: popl %ds; \ +2: popl %es; \ + addl $4,%esp; \ +3: iret; \ +.section .fixup,"ax"; \ +4: movl $0,(%esp); \ + jmp 1b; \ +5: movl $0,(%esp); \ + jmp 2b; \ +6: pushl %ss; \ + popl %ds; \ + pushl %ss; \ + popl %es; \ + pushl $11; \ + call do_exit; \ +.previous; \ +.section __ex_table,"a";\ + .align 4; \ + .long 1b,4b; \ + .long 2b,5b; \ + .long 3b,6b; \ +.previous + +#define GET_CURRENT(reg) \ + movl $-8192, reg; \ + andl %esp, reg + +ENTRY(ret_from_fork) + pushl %ebx + call SYMBOL_NAME(schedule_tail) + addl $4, %esp + GET_CURRENT(%ebx) + testb $0x02,tsk_ptrace(%ebx) # PT_TRACESYS + jne tracesys_exit + jmp ret_from_sys_call + +/* + * Return to user mode is not as complex as all this looks, + * but we want the default path for a system call return to + * go as quickly as possible which is why some of this is + * less clear than it otherwise should be. + */ + +ENTRY(system_call) + pushl %eax # save orig_eax + SAVE_ALL + GET_CURRENT(%ebx) + testb $0x02,tsk_ptrace(%ebx) # PT_TRACESYS + jne tracesys + cmpl $(NR_syscalls),%eax + jae badsys + call *SYMBOL_NAME(sys_call_table)(,%eax,4) + movl %eax,EAX(%esp) # save the return value +ENTRY(ret_from_sys_call) + movl SYMBOL_NAME(HYPERVISOR_shared_info),%esi + xorl %eax,%eax + movl %eax,4(%esi) # need_resched and signals atomic test +ret_syscall_tests: + cmpl $0,need_resched(%ebx) + jne reschedule + cmpl $0,sigpending(%ebx) + je safesti # ensure need_resched updates are seen +signal_return: + btsl %eax,4(%esi) # reenable event callbacks + movl %esp,%eax + xorl %edx,%edx + call SYMBOL_NAME(do_signal) + jmp ret_from_sys_call + + ALIGN +restore_all: + RESTORE_ALL + + ALIGN +tracesys: + movl $-ENOSYS,EAX(%esp) + call SYMBOL_NAME(syscall_trace) + movl ORIG_EAX(%esp),%eax + cmpl $(NR_syscalls),%eax + jae tracesys_exit + call *SYMBOL_NAME(sys_call_table)(,%eax,4) + movl %eax,EAX(%esp) # save the return value +tracesys_exit: + call SYMBOL_NAME(syscall_trace) + jmp ret_from_sys_call +badsys: + movl $-ENOSYS,EAX(%esp) + jmp ret_from_sys_call + + ALIGN +ENTRY(ret_from_intr) + GET_CURRENT(%ebx) +ret_from_exception: + movb CS(%esp),%al + testl $2,%eax + jne ret_from_sys_call + jmp restore_all + + ALIGN +reschedule: + btsl %eax,4(%esi) # reenable event callbacks + call SYMBOL_NAME(schedule) # test + jmp ret_from_sys_call + +ENTRY(divide_error) + pushl $0 # no error code + pushl $ SYMBOL_NAME(do_divide_error) + ALIGN +error_code: + pushl %ds + pushl %eax + xorl %eax,%eax + pushl %ebp + pushl %edi + pushl %esi + pushl %edx + decl %eax # eax = -1 + pushl %ecx + pushl %ebx + GET_CURRENT(%ebx) + cld + movl %es,%ecx + movl ORIG_EAX(%esp), %esi # get the error code + movl ES(%esp), %edi # get the function address + movl %eax, ORIG_EAX(%esp) + movl %ecx, ES(%esp) + movl %esp,%edx + pushl %esi # push the error code + pushl %edx # push the pt_regs pointer + movl $(__KERNEL_DS),%edx + movl %edx,%ds + movl %edx,%es + call *%edi + addl $8,%esp + jmp ret_from_exception + +# A note on the "critical region" in our callback handler. +# We want to avoid stacking callback handlers due to events occurring +# during handling of the last event. To do this, we keep events disabled +# until we've done all processing. HOWEVER, we must enable events before +# popping the stack frame (can't be done atomically) and so it would still +# be possible to get enough handler activations to overflow the stack. +# Although unlikely, bugs of that kind are hard to track down, so we'd +# like to avoid the possibility. +# So, on entry to the handler we detect whether we interrupted an +# existing activation in its critical region -- if so, we pop the current +# activation and restart the handler using the previous one. +ENTRY(hypervisor_callback) + pushl %eax + SAVE_ALL + GET_CURRENT(%ebx) + movl EIP(%esp),%eax + cmpl $scrit,%eax + jb 11f + cmpl $ecrit,%eax + jb critical_region_fixup +11: push %esp + call do_hypervisor_callback + add $4,%esp + movl SYMBOL_NAME(HYPERVISOR_shared_info),%esi + xorl %eax,%eax + movb CS(%esp),%cl + test $2,%cl # slow return to ring 2 or 3 + jne ret_syscall_tests +safesti:btsl %eax,4(%esi) # reenable event callbacks +scrit: /**** START OF CRITICAL REGION ****/ + cmpl %eax,(%esi) + jne 14f # process more events if necessary... + RESTORE_ALL +14: btrl %eax,4(%esi) + jmp 11b +ecrit: /**** END OF CRITICAL REGION ****/ +# [How we do the fixup]. We want to merge the current stack frame with the +# just-interrupted frame. How we do this depends on where in the critical +# region the interrupted handler was executing, and so how many saved +# registers are in each frame. We do this quickly using the lookup table +# 'critical_fixup_table'. For each byte offset in the critical region, it +# provides the number of bytes which have already been popped from the +# interrupted stack frame. +critical_region_fixup: + addl $critical_fixup_table-scrit,%eax + movzbl (%eax),%eax # %eax contains num bytes popped + mov %esp,%esi + add %eax,%esi # %esi points at end of src region + mov %esp,%edi + add $0x34,%edi # %edi points at end of dst region + mov %eax,%ecx + shr $2,%ecx # convert words to bytes + je 16f # skip loop if nothing to copy +15: subl $4,%esi # pre-decrementing copy loop + subl $4,%edi + movl (%esi),%eax + movl %eax,(%edi) + loop 15b +16: movl %edi,%esp # final %edi is top of merged stack + jmp 11b + +critical_fixup_table: + .byte 0x00,0x00 # cmpl %eax,(%esi) + .byte 0x00,0x00 # jne 14f + .byte 0x00 # pop %ebx + .byte 0x04 # pop %ecx + .byte 0x08 # pop %edx + .byte 0x0c # pop %esi + .byte 0x10 # pop %edi + .byte 0x14 # pop %ebp + .byte 0x18 # pop %eax + .byte 0x1c # pop %ds + .byte 0x20 # pop %es + .byte 0x24,0x24,0x24 # add $4,%esp + .byte 0x28 # iret + .byte 0x00,0x00,0x00,0x00 # btrl %eax,4(%esi) + .byte 0x00,0x00 # jmp 11b + +# Hypervisor uses this for application faults while it executes. +ENTRY(failsafe_callback) +1: pop %ds +2: pop %es +3: iret +.section .fixup,"ax"; \ +4: movl $0,(%esp); \ + jmp 1b; \ +5: movl $0,(%esp); \ + jmp 2b; \ +6: pushl %ss; \ + popl %ds; \ + pushl %ss; \ + popl %es; \ + pushl $11; \ + call do_exit; \ +.previous; \ +.section __ex_table,"a";\ + .align 4; \ + .long 1b,4b; \ + .long 2b,5b; \ + .long 3b,6b; \ +.previous + +ENTRY(coprocessor_error) + pushl $0 + pushl $ SYMBOL_NAME(do_coprocessor_error) + jmp error_code + +ENTRY(simd_coprocessor_error) + pushl $0 + pushl $ SYMBOL_NAME(do_simd_coprocessor_error) + jmp error_code + +ENTRY(device_not_available) + pushl $-1 # mark this as an int + SAVE_ALL + GET_CURRENT(%ebx) + call SYMBOL_NAME(math_state_restore) + jmp ret_from_exception + +ENTRY(debug) + pushl $0 + pushl $ SYMBOL_NAME(do_debug) + jmp error_code + +ENTRY(int3) + pushl $0 + pushl $ SYMBOL_NAME(do_int3) + jmp error_code + +ENTRY(overflow) + pushl $0 + pushl $ SYMBOL_NAME(do_overflow) + jmp error_code + +ENTRY(bounds) + pushl $0 + pushl $ SYMBOL_NAME(do_bounds) + jmp error_code + +ENTRY(invalid_op) + pushl $0 + pushl $ SYMBOL_NAME(do_invalid_op) + jmp error_code + +ENTRY(coprocessor_segment_overrun) + pushl $0 + pushl $ SYMBOL_NAME(do_coprocessor_segment_overrun) + jmp error_code + +ENTRY(double_fault) + pushl $ SYMBOL_NAME(do_double_fault) + jmp error_code + +ENTRY(invalid_TSS) + pushl $ SYMBOL_NAME(do_invalid_TSS) + jmp error_code + +ENTRY(segment_not_present) + pushl $ SYMBOL_NAME(do_segment_not_present) + jmp error_code + +ENTRY(stack_segment) + pushl $ SYMBOL_NAME(do_stack_segment) + jmp error_code + +ENTRY(general_protection) + pushl $ SYMBOL_NAME(do_general_protection) + jmp error_code + +ENTRY(alignment_check) + pushl $ SYMBOL_NAME(do_alignment_check) + jmp error_code + +# This handler is special, because it gets an extra value on its stack, +# which is the linear faulting address. +ENTRY(page_fault) + pushl %ds + pushl %eax + xorl %eax,%eax + pushl %ebp + pushl %edi + pushl %esi + pushl %edx + decl %eax # eax = -1 + pushl %ecx + pushl %ebx + GET_CURRENT(%ebx) + cld + movl %es,%ecx + movl ORIG_EAX(%esp), %esi # get the error code + movl ES(%esp), %edi # get the faulting address + movl %eax, ORIG_EAX(%esp) + movl %ecx, ES(%esp) + movl %esp,%edx + pushl %edi # push the faulting address + pushl %esi # push the error code + pushl %edx # push the pt_regs pointer + movl $(__KERNEL_DS),%edx + movl %edx,%ds + movl %edx,%es + call SYMBOL_NAME(do_page_fault) + addl $12,%esp + jmp ret_from_exception + +ENTRY(machine_check) + pushl $0 + pushl $ SYMBOL_NAME(do_machine_check) + jmp error_code + +ENTRY(spurious_interrupt_bug) + pushl $0 + pushl $ SYMBOL_NAME(do_spurious_interrupt_bug) + jmp error_code + +.data +ENTRY(sys_call_table) + .long SYMBOL_NAME(sys_ni_syscall) /* 0 - old "setup()" system call*/ + .long SYMBOL_NAME(sys_exit) + .long SYMBOL_NAME(sys_fork) + .long SYMBOL_NAME(sys_read) + .long SYMBOL_NAME(sys_write) + .long SYMBOL_NAME(sys_open) /* 5 */ + .long SYMBOL_NAME(sys_close) + .long SYMBOL_NAME(sys_waitpid) + .long SYMBOL_NAME(sys_creat) + .long SYMBOL_NAME(sys_link) + .long SYMBOL_NAME(sys_unlink) /* 10 */ + .long SYMBOL_NAME(sys_execve) + .long SYMBOL_NAME(sys_chdir) + .long SYMBOL_NAME(sys_time) + .long SYMBOL_NAME(sys_mknod) + .long SYMBOL_NAME(sys_chmod) /* 15 */ + .long SYMBOL_NAME(sys_lchown16) + .long SYMBOL_NAME(sys_ni_syscall) /* old break syscall holder */ + .long SYMBOL_NAME(sys_stat) + .long SYMBOL_NAME(sys_lseek) + .long SYMBOL_NAME(sys_getpid) /* 20 */ + .long SYMBOL_NAME(sys_mount) + .long SYMBOL_NAME(sys_oldumount) + .long SYMBOL_NAME(sys_setuid16) + .long SYMBOL_NAME(sys_getuid16) + .long SYMBOL_NAME(sys_stime) /* 25 */ + .long SYMBOL_NAME(sys_ptrace) + .long SYMBOL_NAME(sys_alarm) + .long SYMBOL_NAME(sys_fstat) + .long SYMBOL_NAME(sys_pause) + .long SYMBOL_NAME(sys_utime) /* 30 */ + .long SYMBOL_NAME(sys_ni_syscall) /* old stty syscall holder */ + .long SYMBOL_NAME(sys_ni_syscall) /* old gtty syscall holder */ + .long SYMBOL_NAME(sys_access) + .long SYMBOL_NAME(sys_nice) + .long SYMBOL_NAME(sys_ni_syscall) /* 35 */ /* old ftime syscall holder */ + .long SYMBOL_NAME(sys_sync) + .long SYMBOL_NAME(sys_kill) + .long SYMBOL_NAME(sys_rename) + .long SYMBOL_NAME(sys_mkdir) + .long SYMBOL_NAME(sys_rmdir) /* 40 */ + .long SYMBOL_NAME(sys_dup) + .long SYMBOL_NAME(sys_pipe) + .long SYMBOL_NAME(sys_times) + .long SYMBOL_NAME(sys_ni_syscall) /* old prof syscall holder */ + .long SYMBOL_NAME(sys_brk) /* 45 */ + .long SYMBOL_NAME(sys_setgid16) + .long SYMBOL_NAME(sys_getgid16) + .long SYMBOL_NAME(sys_signal) + .long SYMBOL_NAME(sys_geteuid16) + .long SYMBOL_NAME(sys_getegid16) /* 50 */ + .long SYMBOL_NAME(sys_acct) + .long SYMBOL_NAME(sys_umount) /* recycled never used phys() */ + .long SYMBOL_NAME(sys_ni_syscall) /* old lock syscall holder */ + .long SYMBOL_NAME(sys_ioctl) + .long SYMBOL_NAME(sys_fcntl) /* 55 */ + .long SYMBOL_NAME(sys_ni_syscall) /* old mpx syscall holder */ + .long SYMBOL_NAME(sys_setpgid) + .long SYMBOL_NAME(sys_ni_syscall) /* old ulimit syscall holder */ + .long SYMBOL_NAME(sys_olduname) + .long SYMBOL_NAME(sys_umask) /* 60 */ + .long SYMBOL_NAME(sys_chroot) + .long SYMBOL_NAME(sys_ustat) + .long SYMBOL_NAME(sys_dup2) + .long SYMBOL_NAME(sys_getppid) + .long SYMBOL_NAME(sys_getpgrp) /* 65 */ + .long SYMBOL_NAME(sys_setsid) + .long SYMBOL_NAME(sys_sigaction) + .long SYMBOL_NAME(sys_sgetmask) + .long SYMBOL_NAME(sys_ssetmask) + .long SYMBOL_NAME(sys_setreuid16) /* 70 */ + .long SYMBOL_NAME(sys_setregid16) + .long SYMBOL_NAME(sys_sigsuspend) + .long SYMBOL_NAME(sys_sigpending) + .long SYMBOL_NAME(sys_sethostname) + .long SYMBOL_NAME(sys_setrlimit) /* 75 */ + .long SYMBOL_NAME(sys_old_getrlimit) + .long SYMBOL_NAME(sys_getrusage) + .long SYMBOL_NAME(sys_gettimeofday) + .long SYMBOL_NAME(sys_settimeofday) + .long SYMBOL_NAME(sys_getgroups16) /* 80 */ + .long SYMBOL_NAME(sys_setgroups16) + .long SYMBOL_NAME(old_select) + .long SYMBOL_NAME(sys_symlink) + .long SYMBOL_NAME(sys_lstat) + .long SYMBOL_NAME(sys_readlink) /* 85 */ + .long SYMBOL_NAME(sys_uselib) + .long SYMBOL_NAME(sys_swapon) + .long SYMBOL_NAME(sys_reboot) + .long SYMBOL_NAME(old_readdir) + .long SYMBOL_NAME(old_mmap) /* 90 */ + .long SYMBOL_NAME(sys_munmap) + .long SYMBOL_NAME(sys_truncate) + .long SYMBOL_NAME(sys_ftruncate) + .long SYMBOL_NAME(sys_fchmod) + .long SYMBOL_NAME(sys_fchown16) /* 95 */ + .long SYMBOL_NAME(sys_getpriority) + .long SYMBOL_NAME(sys_setpriority) + .long SYMBOL_NAME(sys_ni_syscall) /* old profil syscall holder */ + .long SYMBOL_NAME(sys_statfs) + .long SYMBOL_NAME(sys_fstatfs) /* 100 */ + .long SYMBOL_NAME(sys_ioperm) + .long SYMBOL_NAME(sys_socketcall) + .long SYMBOL_NAME(sys_syslog) + .long SYMBOL_NAME(sys_setitimer) + .long SYMBOL_NAME(sys_getitimer) /* 105 */ + .long SYMBOL_NAME(sys_newstat) + .long SYMBOL_NAME(sys_newlstat) + .long SYMBOL_NAME(sys_newfstat) + .long SYMBOL_NAME(sys_uname) + .long SYMBOL_NAME(sys_iopl) /* 110 */ + .long SYMBOL_NAME(sys_vhangup) + .long SYMBOL_NAME(sys_ni_syscall) /* old "idle" system call */ + .long SYMBOL_NAME(sys_ni_syscall) /* was VM86 */ + .long SYMBOL_NAME(sys_wait4) + .long SYMBOL_NAME(sys_swapoff) /* 115 */ + .long SYMBOL_NAME(sys_sysinfo) + .long SYMBOL_NAME(sys_ipc) + .long SYMBOL_NAME(sys_fsync) + .long SYMBOL_NAME(sys_sigreturn) + .long SYMBOL_NAME(sys_clone) /* 120 */ + .long SYMBOL_NAME(sys_setdomainname) + .long SYMBOL_NAME(sys_newuname) + .long SYMBOL_NAME(sys_modify_ldt) + .long SYMBOL_NAME(sys_adjtimex) + .long SYMBOL_NAME(sys_mprotect) /* 125 */ + .long SYMBOL_NAME(sys_sigprocmask) + .long SYMBOL_NAME(sys_create_module) + .long SYMBOL_NAME(sys_init_module) + .long SYMBOL_NAME(sys_delete_module) + .long SYMBOL_NAME(sys_get_kernel_syms) /* 130 */ + .long SYMBOL_NAME(sys_quotactl) + .long SYMBOL_NAME(sys_getpgid) + .long SYMBOL_NAME(sys_fchdir) + .long SYMBOL_NAME(sys_bdflush) + .long SYMBOL_NAME(sys_sysfs) /* 135 */ + .long SYMBOL_NAME(sys_personality) + .long SYMBOL_NAME(sys_ni_syscall) /* for afs_syscall */ + .long SYMBOL_NAME(sys_setfsuid16) + .long SYMBOL_NAME(sys_setfsgid16) + .long SYMBOL_NAME(sys_llseek) /* 140 */ + .long SYMBOL_NAME(sys_getdents) + .long SYMBOL_NAME(sys_select) + .long SYMBOL_NAME(sys_flock) + .long SYMBOL_NAME(sys_msync) + .long SYMBOL_NAME(sys_readv) /* 145 */ + .long SYMBOL_NAME(sys_writev) + .long SYMBOL_NAME(sys_getsid) + .long SYMBOL_NAME(sys_fdatasync) + .long SYMBOL_NAME(sys_sysctl) + .long SYMBOL_NAME(sys_mlock) /* 150 */ + .long SYMBOL_NAME(sys_munlock) + .long SYMBOL_NAME(sys_mlockall) + .long SYMBOL_NAME(sys_munlockall) + .long SYMBOL_NAME(sys_sched_setparam) + .long SYMBOL_NAME(sys_sched_getparam) /* 155 */ + .long SYMBOL_NAME(sys_sched_setscheduler) + .long SYMBOL_NAME(sys_sched_getscheduler) + .long SYMBOL_NAME(sys_sched_yield) + .long SYMBOL_NAME(sys_sched_get_priority_max) + .long SYMBOL_NAME(sys_sched_get_priority_min) /* 160 */ + .long SYMBOL_NAME(sys_sched_rr_get_interval) + .long SYMBOL_NAME(sys_nanosleep) + .long SYMBOL_NAME(sys_mremap) + .long SYMBOL_NAME(sys_setresuid16) + .long SYMBOL_NAME(sys_getresuid16) /* 165 */ + .long SYMBOL_NAME(sys_ni_syscall) /* was VM86 */ + .long SYMBOL_NAME(sys_query_module) + .long SYMBOL_NAME(sys_poll) + .long SYMBOL_NAME(sys_nfsservctl) + .long SYMBOL_NAME(sys_setresgid16) /* 170 */ + .long SYMBOL_NAME(sys_getresgid16) + .long SYMBOL_NAME(sys_prctl) + .long SYMBOL_NAME(sys_rt_sigreturn) + .long SYMBOL_NAME(sys_rt_sigaction) + .long SYMBOL_NAME(sys_rt_sigprocmask) /* 175 */ + .long SYMBOL_NAME(sys_rt_sigpending) + .long SYMBOL_NAME(sys_rt_sigtimedwait) + .long SYMBOL_NAME(sys_rt_sigqueueinfo) + .long SYMBOL_NAME(sys_rt_sigsuspend) + .long SYMBOL_NAME(sys_pread) /* 180 */ + .long SYMBOL_NAME(sys_pwrite) + .long SYMBOL_NAME(sys_chown16) + .long SYMBOL_NAME(sys_getcwd) + .long SYMBOL_NAME(sys_capget) + .long SYMBOL_NAME(sys_capset) /* 185 */ + .long SYMBOL_NAME(sys_sigaltstack) + .long SYMBOL_NAME(sys_sendfile) + .long SYMBOL_NAME(sys_ni_syscall) /* streams1 */ + .long SYMBOL_NAME(sys_ni_syscall) /* streams2 */ + .long SYMBOL_NAME(sys_vfork) /* 190 */ + .long SYMBOL_NAME(sys_getrlimit) + .long SYMBOL_NAME(sys_mmap2) + .long SYMBOL_NAME(sys_truncate64) + .long SYMBOL_NAME(sys_ftruncate64) + .long SYMBOL_NAME(sys_stat64) /* 195 */ + .long SYMBOL_NAME(sys_lstat64) + .long SYMBOL_NAME(sys_fstat64) + .long SYMBOL_NAME(sys_lchown) + .long SYMBOL_NAME(sys_getuid) + .long SYMBOL_NAME(sys_getgid) /* 200 */ + .long SYMBOL_NAME(sys_geteuid) + .long SYMBOL_NAME(sys_getegid) + .long SYMBOL_NAME(sys_setreuid) + .long SYMBOL_NAME(sys_setregid) + .long SYMBOL_NAME(sys_getgroups) /* 205 */ + .long SYMBOL_NAME(sys_setgroups) + .long SYMBOL_NAME(sys_fchown) + .long SYMBOL_NAME(sys_setresuid) + .long SYMBOL_NAME(sys_getresuid) + .long SYMBOL_NAME(sys_setresgid) /* 210 */ + .long SYMBOL_NAME(sys_getresgid) + .long SYMBOL_NAME(sys_chown) + .long SYMBOL_NAME(sys_setuid) + .long SYMBOL_NAME(sys_setgid) + .long SYMBOL_NAME(sys_setfsuid) /* 215 */ + .long SYMBOL_NAME(sys_setfsgid) + .long SYMBOL_NAME(sys_pivot_root) + .long SYMBOL_NAME(sys_mincore) + .long SYMBOL_NAME(sys_madvise) + .long SYMBOL_NAME(sys_getdents64) /* 220 */ + .long SYMBOL_NAME(sys_fcntl64) + .long SYMBOL_NAME(sys_ni_syscall) /* reserved for TUX */ + .long SYMBOL_NAME(sys_ni_syscall) /* Reserved for Security */ + .long SYMBOL_NAME(sys_gettid) + .long SYMBOL_NAME(sys_readahead) /* 225 */ + .long SYMBOL_NAME(sys_setxattr) + .long SYMBOL_NAME(sys_lsetxattr) + .long SYMBOL_NAME(sys_fsetxattr) + .long SYMBOL_NAME(sys_getxattr) + .long SYMBOL_NAME(sys_lgetxattr) /* 230 */ + .long SYMBOL_NAME(sys_fgetxattr) + .long SYMBOL_NAME(sys_listxattr) + .long SYMBOL_NAME(sys_llistxattr) + .long SYMBOL_NAME(sys_flistxattr) + .long SYMBOL_NAME(sys_removexattr) /* 235 */ + .long SYMBOL_NAME(sys_lremovexattr) + .long SYMBOL_NAME(sys_fremovexattr) + .long SYMBOL_NAME(sys_tkill) + .long SYMBOL_NAME(sys_ni_syscall) /* reserved for sendfile64 */ + .long SYMBOL_NAME(sys_ni_syscall) /* 240 reserved for futex */ + .long SYMBOL_NAME(sys_ni_syscall) /* reserved for sched_setaffinity */ + .long SYMBOL_NAME(sys_ni_syscall) /* reserved for sched_getaffinity */ + .long SYMBOL_NAME(sys_ni_syscall) /* sys_set_thread_area */ + .long SYMBOL_NAME(sys_ni_syscall) /* sys_get_thread_area */ + .long SYMBOL_NAME(sys_ni_syscall) /* 245 sys_io_setup */ + .long SYMBOL_NAME(sys_ni_syscall) /* sys_io_destroy */ + .long SYMBOL_NAME(sys_ni_syscall) /* sys_io_getevents */ + .long SYMBOL_NAME(sys_ni_syscall) /* sys_io_submit */ + .long SYMBOL_NAME(sys_ni_syscall) /* sys_io_cancel */ + .long SYMBOL_NAME(sys_ni_syscall) /* 250 sys_alloc_hugepages */ + .long SYMBOL_NAME(sys_ni_syscall) /* sys_free_hugepages */ + .long SYMBOL_NAME(sys_ni_syscall) /* sys_exit_group */ + + .rept NR_syscalls-(.-sys_call_table)/4 + .long SYMBOL_NAME(sys_ni_syscall) + .endr diff --git a/xenolinux-2.4.21-pre4-sparse/arch/xeno/kernel/head.S b/xenolinux-2.4.21-pre4-sparse/arch/xeno/kernel/head.S new file mode 100644 index 0000000000..86a82b13dc --- /dev/null +++ b/xenolinux-2.4.21-pre4-sparse/arch/xeno/kernel/head.S @@ -0,0 +1,67 @@ + +.text +#include +#include +#include +#include +#include +#include +#include + +/* Offsets in start_info structure */ +#define SHARED_INFO 4 +#define MOD_START 12 +#define MOD_LEN 16 + +startup_32: + cld + + lss stack_start,%esp + + /* Copy initrd somewhere safe before it's clobbered by BSS. */ + mov MOD_LEN(%esi),%ecx + shr $2,%ecx + jz 2f /* bail from copy loop if no initrd */ + mov $SYMBOL_NAME(_end),%edi + add MOD_LEN(%esi),%edi + mov MOD_START(%esi),%eax + add MOD_LEN(%esi),%eax +1: sub $4,%eax + sub $4,%edi + mov (%eax),%ebx + mov %ebx,(%edi) + loop 1b + mov %edi,MOD_START(%esi) + + /* Clear BSS first so that there are no surprises... */ +2: xorl %eax,%eax + movl $SYMBOL_NAME(__bss_start),%edi + movl $SYMBOL_NAME(_end),%ecx + subl %edi,%ecx + rep stosb + + /* Copy the necessary stuff from start_info structure. */ + /* We need to copy shared_info early, so that sti/cli work */ + mov SHARED_INFO(%esi),%eax + mov %eax,SYMBOL_NAME(HYPERVISOR_shared_info) + mov $SYMBOL_NAME(start_info_union),%edi + mov $128,%ecx + rep movsl + + jmp SYMBOL_NAME(start_kernel) + +ENTRY(stack_start) + .long SYMBOL_NAME(init_task_union)+8192, __KERNEL_DS + +.org 0x1000 +ENTRY(empty_zero_page) + +.org 0x2000 +ENTRY(cpu0_pte_quicklist) + +.org 0x2400 +ENTRY(cpu0_pgd_quicklist) + +.org 0x2800 +ENTRY(stext) +ENTRY(_stext) diff --git a/xenolinux-2.4.21-pre4-sparse/arch/xeno/kernel/hypervisor.c b/xenolinux-2.4.21-pre4-sparse/arch/xeno/kernel/hypervisor.c new file mode 100644 index 0000000000..c49087173f --- /dev/null +++ b/xenolinux-2.4.21-pre4-sparse/arch/xeno/kernel/hypervisor.c @@ -0,0 +1,118 @@ +/****************************************************************************** + * hypervisor.c + * + * Communication to/from hypervisor. + * + * Copyright (c) 2002, K A Fraser + */ + +#include +#include +#include +#include +#include +#include + +static unsigned long event_mask = 0; + +void frobb(void) {} + +void do_hypervisor_callback(struct pt_regs *regs) +{ + unsigned long events, flags; + shared_info_t *shared = HYPERVISOR_shared_info; + + do { + /* Specialised local_irq_save(). */ + flags = shared->events_enable; + shared->events_enable = 0; + barrier(); + + events = xchg(&shared->events, 0); + events &= event_mask; + + __asm__ __volatile__ ( + " push %1 ;" + " sub $4,%%esp ;" + " jmp 2f ;" + "1: btrl %%eax,%0 ;" /* clear bit */ + " mov %%eax,(%%esp) ;" + " call do_IRQ ;" /* do_IRQ(event) */ + "2: bsfl %0,%%eax ;" /* %eax == bit # */ + " jnz 1b ;" + " add $8,%%esp ;" + /* we use %ebx because it is callee-saved */ + : : "b" (events), "r" (regs) + /* clobbered by callback function calls */ + : "eax", "ecx", "edx", "memory" ); + + /* Specialised local_irq_restore(). */ + shared->events_enable = flags; + barrier(); + } + while ( shared->events ); +} + + + +/* + * Define interface to generic handling in irq.c + */ + +static unsigned int startup_hypervisor_event(unsigned int irq) +{ + set_bit(irq, &event_mask); + return 0; +} + +static void shutdown_hypervisor_event(unsigned int irq) +{ + clear_bit(irq, &event_mask); +} + +static void enable_hypervisor_event(unsigned int irq) +{ + set_bit(irq, &event_mask); +} + +static void disable_hypervisor_event(unsigned int irq) +{ + clear_bit(irq, &event_mask); +} + +static void ack_hypervisor_event(unsigned int irq) +{ + if ( !(event_mask & (1< +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +//XXX ??? #include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +extern void dump_thread(struct pt_regs *, struct user *); +extern spinlock_t rtc_lock; + +#if defined(CONFIG_APMXXX) || defined(CONFIG_APM_MODULEXXX) +extern void machine_real_restart(unsigned char *, int); +EXPORT_SYMBOL(machine_real_restart); +extern void default_idle(void); +EXPORT_SYMBOL(default_idle); +#endif + +#ifdef CONFIG_SMP +extern void FASTCALL( __write_lock_failed(rwlock_t *rw)); +extern void FASTCALL( __read_lock_failed(rwlock_t *rw)); +#endif + +#if defined(CONFIG_BLK_DEV_IDE) || defined(CONFIG_BLK_DEV_HD) || defined(CONFIG_BLK_DEV_IDE_MODULE) || defined(CONFIG_BLK_DEV_HD_MODULE) +extern struct drive_info_struct drive_info; +EXPORT_SYMBOL(drive_info); +#endif + +// XXX extern unsigned long get_cmos_time(void); + +/* platform dependent support */ +EXPORT_SYMBOL(boot_cpu_data); +EXPORT_SYMBOL(__verify_write); +EXPORT_SYMBOL(dump_thread); +EXPORT_SYMBOL(dump_fpu); +EXPORT_SYMBOL(dump_extended_fpu); +EXPORT_SYMBOL(enable_irq); +EXPORT_SYMBOL(disable_irq); +EXPORT_SYMBOL(disable_irq_nosync); +EXPORT_SYMBOL(probe_irq_mask); +EXPORT_SYMBOL(kernel_thread); +EXPORT_SYMBOL(pm_idle); +EXPORT_SYMBOL(pm_power_off); +EXPORT_SYMBOL(apm_info); +//EXPORT_SYMBOL(gdt); +EXPORT_SYMBOL(empty_zero_page); + +#ifdef CONFIG_DEBUG_IOVIRT +EXPORT_SYMBOL(__io_virt_debug); +#endif + +EXPORT_SYMBOL_NOVERS(__down_failed); +EXPORT_SYMBOL_NOVERS(__down_failed_interruptible); +EXPORT_SYMBOL_NOVERS(__down_failed_trylock); +EXPORT_SYMBOL_NOVERS(__up_wakeup); +/* Networking helper routines. */ +EXPORT_SYMBOL(csum_partial_copy_generic); +/* Delay loops */ +EXPORT_SYMBOL(__udelay); +EXPORT_SYMBOL(__delay); +EXPORT_SYMBOL(__const_udelay); + +EXPORT_SYMBOL_NOVERS(__get_user_1); +EXPORT_SYMBOL_NOVERS(__get_user_2); +EXPORT_SYMBOL_NOVERS(__get_user_4); + +EXPORT_SYMBOL(strtok); +EXPORT_SYMBOL(strpbrk); +EXPORT_SYMBOL(strstr); + +EXPORT_SYMBOL(strncpy_from_user); +EXPORT_SYMBOL(__strncpy_from_user); +EXPORT_SYMBOL(clear_user); +EXPORT_SYMBOL(__clear_user); +EXPORT_SYMBOL(__generic_copy_from_user); +EXPORT_SYMBOL(__generic_copy_to_user); +EXPORT_SYMBOL(strnlen_user); + +#ifdef CONFIG_X86_USE_3DNOW +EXPORT_SYMBOL(_mmx_memcpy); +EXPORT_SYMBOL(mmx_clear_page); +EXPORT_SYMBOL(mmx_copy_page); +#endif + +#ifdef CONFIG_SMP +EXPORT_SYMBOL(cpu_data); +EXPORT_SYMBOL(kernel_flag_cacheline); +EXPORT_SYMBOL(smp_num_cpus); +EXPORT_SYMBOL(cpu_online_map); +EXPORT_SYMBOL_NOVERS(__write_lock_failed); +EXPORT_SYMBOL_NOVERS(__read_lock_failed); + +/* Global SMP irq stuff */ +EXPORT_SYMBOL(synchronize_irq); +EXPORT_SYMBOL(global_irq_holder); +EXPORT_SYMBOL(__global_cli); +EXPORT_SYMBOL(__global_sti); +EXPORT_SYMBOL(__global_save_flags); +EXPORT_SYMBOL(__global_restore_flags); +EXPORT_SYMBOL(smp_call_function); + +/* TLB flushing */ +EXPORT_SYMBOL(flush_tlb_page); +#endif + +#ifdef CONFIG_X86_IO_APIC +EXPORT_SYMBOL(IO_APIC_get_PCI_irq_vector); +#endif + +#ifdef CONFIG_VT +EXPORT_SYMBOL(screen_info); +#endif + +EXPORT_SYMBOL(get_wchan); + +EXPORT_SYMBOL(rtc_lock); + +#undef memcpy +#undef memset +extern void * memset(void *,int,__kernel_size_t); +extern void * memcpy(void *,const void *,__kernel_size_t); +EXPORT_SYMBOL_NOVERS(memcpy); +EXPORT_SYMBOL_NOVERS(memset); + +#ifdef CONFIG_HAVE_DEC_LOCK +EXPORT_SYMBOL(atomic_dec_and_lock); +#endif + +#ifdef CONFIG_MULTIQUAD +EXPORT_SYMBOL(xquad_portio); +#endif diff --git a/xenolinux-2.4.21-pre4-sparse/arch/xeno/kernel/i387.c b/xenolinux-2.4.21-pre4-sparse/arch/xeno/kernel/i387.c new file mode 100644 index 0000000000..d7d7fdd05d --- /dev/null +++ b/xenolinux-2.4.21-pre4-sparse/arch/xeno/kernel/i387.c @@ -0,0 +1,542 @@ +/* + * linux/arch/i386/kernel/i387.c + * + * Copyright (C) 1994 Linus Torvalds + * + * Pentium III FXSR, SSE support + * General FPU state handling cleanups + * Gareth Hughes , May 2000 + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#define HAVE_HWFP 1 + +static union i387_union empty_fpu_state; + +void __init boot_init_fpu(void) +{ + memset(&empty_fpu_state, 0, sizeof(union i387_union)); + + if (!cpu_has_fxsr) { + empty_fpu_state.fsave.cwd = 0xffff037f; + empty_fpu_state.fsave.swd = 0xffff0000; + empty_fpu_state.fsave.twd = 0xffffffff; + empty_fpu_state.fsave.fos = 0xffff0000; + } else { + empty_fpu_state.fxsave.cwd = 0x37f; + if (cpu_has_xmm) + empty_fpu_state.fxsave.mxcsr = 0x1f80; + } +} + +void load_empty_fpu(struct task_struct * tsk) +{ + memcpy(&tsk->thread.i387, &empty_fpu_state, sizeof(union i387_union)); +} + +/* + * The _current_ task is using the FPU for the first time + * so initialize it and set the mxcsr to its default + * value at reset if we support XMM instructions and then + * remeber the current task has used the FPU. + */ +void init_fpu(void) +{ + if (cpu_has_fxsr) + asm volatile("fxrstor %0" : : "m" (empty_fpu_state.fxsave)); + else + __asm__("fninit"); + current->used_math = 1; +} + +/* + * FPU lazy state save handling. + */ + +static inline void __save_init_fpu( struct task_struct *tsk ) +{ + if ( cpu_has_fxsr ) { + asm volatile( "fxsave %0 ; fnclex" + : "=m" (tsk->thread.i387.fxsave) ); + } else { + asm volatile( "fnsave %0 ; fwait" + : "=m" (tsk->thread.i387.fsave) ); + } + tsk->flags &= ~PF_USEDFPU; +} + +void save_init_fpu( struct task_struct *tsk ) +{ + __save_init_fpu(tsk); + stts(); +} + +void kernel_fpu_begin(void) +{ + struct task_struct *tsk = current; + + if (tsk->flags & PF_USEDFPU) { + __save_init_fpu(tsk); + return; + } + clts(); +} + +void restore_fpu( struct task_struct *tsk ) +{ + if ( cpu_has_fxsr ) { + asm volatile( "fxrstor %0" + : : "m" (tsk->thread.i387.fxsave) ); + } else { + asm volatile( "frstor %0" + : : "m" (tsk->thread.i387.fsave) ); + } +} + +/* + * FPU tag word conversions. + */ + +static inline unsigned short twd_i387_to_fxsr( unsigned short twd ) +{ + unsigned int tmp; /* to avoid 16 bit prefixes in the code */ + + /* Transform each pair of bits into 01 (valid) or 00 (empty) */ + tmp = ~twd; + tmp = (tmp | (tmp>>1)) & 0x5555; /* 0V0V0V0V0V0V0V0V */ + /* and move the valid bits to the lower byte. */ + tmp = (tmp | (tmp >> 1)) & 0x3333; /* 00VV00VV00VV00VV */ + tmp = (tmp | (tmp >> 2)) & 0x0f0f; /* 0000VVVV0000VVVV */ + tmp = (tmp | (tmp >> 4)) & 0x00ff; /* 00000000VVVVVVVV */ + return tmp; +} + +static inline unsigned long twd_fxsr_to_i387( struct i387_fxsave_struct *fxsave ) +{ + struct _fpxreg *st = NULL; + unsigned long twd = (unsigned long) fxsave->twd; + unsigned long tag; + unsigned long ret = 0xffff0000; + int i; + +#define FPREG_ADDR(f, n) ((char *)&(f)->st_space + (n) * 16); + + for ( i = 0 ; i < 8 ; i++ ) { + if ( twd & 0x1 ) { + st = (struct _fpxreg *) FPREG_ADDR( fxsave, i ); + + switch ( st->exponent & 0x7fff ) { + case 0x7fff: + tag = 2; /* Special */ + break; + case 0x0000: + if ( !st->significand[0] && + !st->significand[1] && + !st->significand[2] && + !st->significand[3] ) { + tag = 1; /* Zero */ + } else { + tag = 2; /* Special */ + } + break; + default: + if ( st->significand[3] & 0x8000 ) { + tag = 0; /* Valid */ + } else { + tag = 2; /* Special */ + } + break; + } + } else { + tag = 3; /* Empty */ + } + ret |= (tag << (2 * i)); + twd = twd >> 1; + } + return ret; +} + +/* + * FPU state interaction. + */ + +unsigned short get_fpu_cwd( struct task_struct *tsk ) +{ + if ( cpu_has_fxsr ) { + return tsk->thread.i387.fxsave.cwd; + } else { + return (unsigned short)tsk->thread.i387.fsave.cwd; + } +} + +unsigned short get_fpu_swd( struct task_struct *tsk ) +{ + if ( cpu_has_fxsr ) { + return tsk->thread.i387.fxsave.swd; + } else { + return (unsigned short)tsk->thread.i387.fsave.swd; + } +} + +unsigned short get_fpu_twd( struct task_struct *tsk ) +{ + if ( cpu_has_fxsr ) { + return tsk->thread.i387.fxsave.twd; + } else { + return (unsigned short)tsk->thread.i387.fsave.twd; + } +} + +unsigned short get_fpu_mxcsr( struct task_struct *tsk ) +{ + if ( cpu_has_xmm ) { + return tsk->thread.i387.fxsave.mxcsr; + } else { + return 0x1f80; + } +} + +void set_fpu_cwd( struct task_struct *tsk, unsigned short cwd ) +{ + if ( cpu_has_fxsr ) { + tsk->thread.i387.fxsave.cwd = cwd; + } else { + tsk->thread.i387.fsave.cwd = ((long)cwd | 0xffff0000); + } +} + +void set_fpu_swd( struct task_struct *tsk, unsigned short swd ) +{ + if ( cpu_has_fxsr ) { + tsk->thread.i387.fxsave.swd = swd; + } else { + tsk->thread.i387.fsave.swd = ((long)swd | 0xffff0000); + } +} + +void set_fpu_twd( struct task_struct *tsk, unsigned short twd ) +{ + if ( cpu_has_fxsr ) { + tsk->thread.i387.fxsave.twd = twd_i387_to_fxsr(twd); + } else { + tsk->thread.i387.fsave.twd = ((long)twd | 0xffff0000); + } +} + +void set_fpu_mxcsr( struct task_struct *tsk, unsigned short mxcsr ) +{ + if ( cpu_has_xmm ) { + tsk->thread.i387.fxsave.mxcsr = (mxcsr & 0xffbf); + } +} + +/* + * FXSR floating point environment conversions. + */ + +static inline int convert_fxsr_to_user( struct _fpstate *buf, + struct i387_fxsave_struct *fxsave ) +{ + unsigned long env[7]; + struct _fpreg *to; + struct _fpxreg *from; + int i; + + env[0] = (unsigned long)fxsave->cwd | 0xffff0000; + env[1] = (unsigned long)fxsave->swd | 0xffff0000; + env[2] = twd_fxsr_to_i387(fxsave); + env[3] = fxsave->fip; + env[4] = fxsave->fcs | ((unsigned long)fxsave->fop << 16); + env[5] = fxsave->foo; + env[6] = fxsave->fos; + + if ( __copy_to_user( buf, env, 7 * sizeof(unsigned long) ) ) + return 1; + + to = &buf->_st[0]; + from = (struct _fpxreg *) &fxsave->st_space[0]; + for ( i = 0 ; i < 8 ; i++, to++, from++ ) { + if ( __copy_to_user( to, from, sizeof(*to) ) ) + return 1; + } + return 0; +} + +static inline int convert_fxsr_from_user( struct i387_fxsave_struct *fxsave, + struct _fpstate *buf ) +{ + unsigned long env[7]; + struct _fpxreg *to; + struct _fpreg *from; + int i; + + if ( __copy_from_user( env, buf, 7 * sizeof(long) ) ) + return 1; + + fxsave->cwd = (unsigned short)(env[0] & 0xffff); + fxsave->swd = (unsigned short)(env[1] & 0xffff); + fxsave->twd = twd_i387_to_fxsr((unsigned short)(env[2] & 0xffff)); + fxsave->fip = env[3]; + fxsave->fop = (unsigned short)((env[4] & 0xffff0000) >> 16); + fxsave->fcs = (env[4] & 0xffff); + fxsave->foo = env[5]; + fxsave->fos = env[6]; + + to = (struct _fpxreg *) &fxsave->st_space[0]; + from = &buf->_st[0]; + for ( i = 0 ; i < 8 ; i++, to++, from++ ) { + if ( __copy_from_user( to, from, sizeof(*from) ) ) + return 1; + } + return 0; +} + +/* + * Signal frame handlers. + */ + +static inline int save_i387_fsave( struct _fpstate *buf ) +{ + struct task_struct *tsk = current; + + unlazy_fpu( tsk ); + tsk->thread.i387.fsave.status = tsk->thread.i387.fsave.swd; + if ( __copy_to_user( buf, &tsk->thread.i387.fsave, + sizeof(struct i387_fsave_struct) ) ) + return -1; + return 1; +} + +static inline int save_i387_fxsave( struct _fpstate *buf ) +{ + struct task_struct *tsk = current; + int err = 0; + + unlazy_fpu( tsk ); + + if ( convert_fxsr_to_user( buf, &tsk->thread.i387.fxsave ) ) + return -1; + + err |= __put_user( tsk->thread.i387.fxsave.swd, &buf->status ); + err |= __put_user( X86_FXSR_MAGIC, &buf->magic ); + if ( err ) + return -1; + + if ( __copy_to_user( &buf->_fxsr_env[0], &tsk->thread.i387.fxsave, + sizeof(struct i387_fxsave_struct) ) ) + return -1; + return 1; +} + +int save_i387( struct _fpstate *buf ) +{ + if ( !current->used_math ) + return 0; + + /* This will cause a "finit" to be triggered by the next + * attempted FPU operation by the 'current' process. + */ + current->used_math = 0; + + if ( HAVE_HWFP ) { + if ( cpu_has_fxsr ) { + return save_i387_fxsave( buf ); + } else { + return save_i387_fsave( buf ); + } + } else { + return save_i387_soft( ¤t->thread.i387.soft, buf ); + } +} + +static inline int restore_i387_fsave( struct _fpstate *buf ) +{ + struct task_struct *tsk = current; + clear_fpu( tsk ); + return __copy_from_user( &tsk->thread.i387.fsave, buf, + sizeof(struct i387_fsave_struct) ); +} + +static inline int restore_i387_fxsave( struct _fpstate *buf ) +{ + struct task_struct *tsk = current; + clear_fpu( tsk ); + if ( __copy_from_user( &tsk->thread.i387.fxsave, &buf->_fxsr_env[0], + sizeof(struct i387_fxsave_struct) ) ) + return 1; + /* mxcsr bit 6 and 31-16 must be zero for security reasons */ + tsk->thread.i387.fxsave.mxcsr &= 0xffbf; + return convert_fxsr_from_user( &tsk->thread.i387.fxsave, buf ); +} + +int restore_i387( struct _fpstate *buf ) +{ + int err; + + if ( HAVE_HWFP ) { + if ( cpu_has_fxsr ) { + err = restore_i387_fxsave( buf ); + } else { + err = restore_i387_fsave( buf ); + } + } else { + err = restore_i387_soft( ¤t->thread.i387.soft, buf ); + } + current->used_math = 1; + return err; +} + +/* + * ptrace request handlers. + */ + +static inline int get_fpregs_fsave( struct user_i387_struct *buf, + struct task_struct *tsk ) +{ + return __copy_to_user( buf, &tsk->thread.i387.fsave, + sizeof(struct user_i387_struct) ); +} + +static inline int get_fpregs_fxsave( struct user_i387_struct *buf, + struct task_struct *tsk ) +{ + return convert_fxsr_to_user( (struct _fpstate *)buf, + &tsk->thread.i387.fxsave ); +} + +int get_fpregs( struct user_i387_struct *buf, struct task_struct *tsk ) +{ + if ( HAVE_HWFP ) { + if ( cpu_has_fxsr ) { + return get_fpregs_fxsave( buf, tsk ); + } else { + return get_fpregs_fsave( buf, tsk ); + } + } else { + return save_i387_soft( &tsk->thread.i387.soft, + (struct _fpstate *)buf ); + } +} + +static inline int set_fpregs_fsave( struct task_struct *tsk, + struct user_i387_struct *buf ) +{ + return __copy_from_user( &tsk->thread.i387.fsave, buf, + sizeof(struct user_i387_struct) ); +} + +static inline int set_fpregs_fxsave( struct task_struct *tsk, + struct user_i387_struct *buf ) +{ + return convert_fxsr_from_user( &tsk->thread.i387.fxsave, + (struct _fpstate *)buf ); +} + +int set_fpregs( struct task_struct *tsk, struct user_i387_struct *buf ) +{ + if ( HAVE_HWFP ) { + if ( cpu_has_fxsr ) { + return set_fpregs_fxsave( tsk, buf ); + } else { + return set_fpregs_fsave( tsk, buf ); + } + } else { + return restore_i387_soft( &tsk->thread.i387.soft, + (struct _fpstate *)buf ); + } +} + +int get_fpxregs( struct user_fxsr_struct *buf, struct task_struct *tsk ) +{ + if ( cpu_has_fxsr ) { + if (__copy_to_user( (void *)buf, &tsk->thread.i387.fxsave, + sizeof(struct user_fxsr_struct) )) + return -EFAULT; + return 0; + } else { + return -EIO; + } +} + +int set_fpxregs( struct task_struct *tsk, struct user_fxsr_struct *buf ) +{ + if ( cpu_has_fxsr ) { + __copy_from_user( &tsk->thread.i387.fxsave, (void *)buf, + sizeof(struct user_fxsr_struct) ); + /* mxcsr bit 6 and 31-16 must be zero for security reasons */ + tsk->thread.i387.fxsave.mxcsr &= 0xffbf; + return 0; + } else { + return -EIO; + } +} + +/* + * FPU state for core dumps. + */ + +static inline void copy_fpu_fsave( struct task_struct *tsk, + struct user_i387_struct *fpu ) +{ + memcpy( fpu, &tsk->thread.i387.fsave, + sizeof(struct user_i387_struct) ); +} + +static inline void copy_fpu_fxsave( struct task_struct *tsk, + struct user_i387_struct *fpu ) +{ + unsigned short *to; + unsigned short *from; + int i; + + memcpy( fpu, &tsk->thread.i387.fxsave, 7 * sizeof(long) ); + + to = (unsigned short *)&fpu->st_space[0]; + from = (unsigned short *)&tsk->thread.i387.fxsave.st_space[0]; + for ( i = 0 ; i < 8 ; i++, to += 5, from += 8 ) { + memcpy( to, from, 5 * sizeof(unsigned short) ); + } +} + +int dump_fpu( struct pt_regs *regs, struct user_i387_struct *fpu ) +{ + int fpvalid; + struct task_struct *tsk = current; + + fpvalid = tsk->used_math; + if ( fpvalid ) { + unlazy_fpu( tsk ); + if ( cpu_has_fxsr ) { + copy_fpu_fxsave( tsk, fpu ); + } else { + copy_fpu_fsave( tsk, fpu ); + } + } + + return fpvalid; +} + +int dump_extended_fpu( struct pt_regs *regs, struct user_fxsr_struct *fpu ) +{ + int fpvalid; + struct task_struct *tsk = current; + + fpvalid = tsk->used_math && cpu_has_fxsr; + if ( fpvalid ) { + unlazy_fpu( tsk ); + memcpy( fpu, &tsk->thread.i387.fxsave, + sizeof(struct user_fxsr_struct) ); + } + + return fpvalid; +} diff --git a/xenolinux-2.4.21-pre4-sparse/arch/xeno/kernel/init_task.c b/xenolinux-2.4.21-pre4-sparse/arch/xeno/kernel/init_task.c new file mode 100644 index 0000000000..7779809ef2 --- /dev/null +++ b/xenolinux-2.4.21-pre4-sparse/arch/xeno/kernel/init_task.c @@ -0,0 +1,33 @@ +#include +#include +#include + +#include +#include +#include + +static struct fs_struct init_fs = INIT_FS; +static struct files_struct init_files = INIT_FILES; +static struct signal_struct init_signals = INIT_SIGNALS; +struct mm_struct init_mm = INIT_MM(init_mm); + +/* + * Initial task structure. + * + * We need to make sure that this is 8192-byte aligned due to the + * way process stacks are handled. This is done by having a special + * "init_task" linker map entry.. + */ +union task_union init_task_union + __attribute__((__section__(".data.init_task"))) = + { INIT_TASK(init_task_union.task) }; + +/* + * per-CPU TSS segments. Threads are completely 'soft' on Linux, + * no more per-task TSS's. The TSS size is kept cacheline-aligned + * so they are allowed to end up in the .data.cacheline_aligned + * section. Since TSS's are completely CPU-local, we want them + * on exact cacheline boundaries, to eliminate cacheline ping-pong. + */ +struct tss_struct init_tss[NR_CPUS] __cacheline_aligned = { [0 ... NR_CPUS-1] = INIT_TSS }; + diff --git a/xenolinux-2.4.21-pre4-sparse/arch/xeno/kernel/ioport.c b/xenolinux-2.4.21-pre4-sparse/arch/xeno/kernel/ioport.c new file mode 100644 index 0000000000..798c5c5c1f --- /dev/null +++ b/xenolinux-2.4.21-pre4-sparse/arch/xeno/kernel/ioport.c @@ -0,0 +1,19 @@ +#include +#include +#include +#include +#include + + +asmlinkage int sys_ioperm(unsigned long from, unsigned long num, int turn_on) +{ + /* No IO permission! */ + return -EPERM; +} + + +asmlinkage int sys_iopl(unsigned long unused) +{ + /* The hypervisor won't allow it! */ + return -EPERM; +} diff --git a/xenolinux-2.4.21-pre4-sparse/arch/xeno/kernel/irq.c b/xenolinux-2.4.21-pre4-sparse/arch/xeno/kernel/irq.c new file mode 100644 index 0000000000..4f449691f0 --- /dev/null +++ b/xenolinux-2.4.21-pre4-sparse/arch/xeno/kernel/irq.c @@ -0,0 +1,1136 @@ +/* + * linux/arch/i386/kernel/irq.c + * + * Copyright (C) 1992, 1998 Linus Torvalds, Ingo Molnar + * + * This file contains the code used by various IRQ handling routines: + * asking for different IRQ's should be done through these routines + * instead of just grabbing them. Thus setups with different IRQ numbers + * shouldn't result in any weird surprises, and installing new handlers + * should be easier. + */ + +/* + * (mostly architecture independent, will move to kernel/irq.c in 2.5.) + * + * IRQs are in fact implemented a bit like signal handlers for the kernel. + * Naturally it's not a 1:1 relation, but there are similarities. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + + + +/* + * Linux has a controller-independent x86 interrupt architecture. + * every controller has a 'controller-template', that is used + * by the main code to do the right thing. Each driver-visible + * interrupt source is transparently wired to the apropriate + * controller. Thus drivers need not be aware of the + * interrupt-controller. + * + * Various interrupt controllers we handle: 8259 PIC, SMP IO-APIC, + * PIIX4's internal 8259 PIC and SGI's Visual Workstation Cobalt (IO-)APIC. + * (IO-APICs assumed to be messaging to Pentium local-APICs) + * + * the code is designed to be easily extended with new/different + * interrupt controllers, without having to do assembly magic. + */ + +/* + * Controller mappings for all interrupt sources: + */ +irq_desc_t irq_desc[NR_IRQS] __cacheline_aligned = + { [0 ... NR_IRQS-1] = { 0, &no_irq_type, NULL, 0, SPIN_LOCK_UNLOCKED}}; + +static void register_irq_proc (unsigned int irq); + +/* + * Special irq handlers. + */ + +void no_action(int cpl, void *dev_id, struct pt_regs *regs) { } + +/* + * Generic no controller code + */ + +static void enable_none(unsigned int irq) { } +static unsigned int startup_none(unsigned int irq) { return 0; } +static void disable_none(unsigned int irq) { } +static void ack_none(unsigned int irq) +{ + printk("unexpected IRQ trap at vector %02x\n", irq); +} + +/* startup is the same as "enable", shutdown is same as "disable" */ +#define shutdown_none disable_none +#define end_none enable_none + +struct hw_interrupt_type no_irq_type = { + "none", + startup_none, + shutdown_none, + enable_none, + disable_none, + ack_none, + end_none +}; + +atomic_t irq_err_count; +#ifdef CONFIG_X86_IO_APIC +#ifdef APIC_MISMATCH_DEBUG +atomic_t irq_mis_count; +#endif +#endif + +/* + * Generic, controller-independent functions: + */ + +int get_irq_list(char *buf) +{ + int i, j; + struct irqaction * action; + char *p = buf; + + p += sprintf(p, " "); + for (j=0; jtypename); + p += sprintf(p, " %s", action->name); + + for (action=action->next; action; action = action->next) + p += sprintf(p, ", %s", action->name); + *p++ = '\n'; + } + p += sprintf(p, "NMI: "); + for (j = 0; j < smp_num_cpus; j++) + p += sprintf(p, "%10u ", + nmi_count(cpu_logical_map(j))); + p += sprintf(p, "\n"); +#if CONFIG_X86_LOCAL_APIC + p += sprintf(p, "LOC: "); + for (j = 0; j < smp_num_cpus; j++) + p += sprintf(p, "%10u ", + apic_timer_irqs[cpu_logical_map(j)]); + p += sprintf(p, "\n"); +#endif + p += sprintf(p, "ERR: %10u\n", atomic_read(&irq_err_count)); +#ifdef CONFIG_X86_IO_APIC +#ifdef APIC_MISMATCH_DEBUG + p += sprintf(p, "MIS: %10u\n", atomic_read(&irq_mis_count)); +#endif +#endif + return p - buf; +} + + +/* + * Global interrupt locks for SMP. Allow interrupts to come in on any + * CPU, yet make cli/sti act globally to protect critical regions.. + */ + +#ifdef CONFIG_SMP +unsigned char global_irq_holder = NO_PROC_ID; +unsigned volatile long global_irq_lock; /* pendantic: long for set_bit --RR */ + +extern void show_stack(unsigned long* esp); + +static void show(char * str) +{ + int i; + int cpu = smp_processor_id(); + + printk("\n%s, CPU %d:\n", str, cpu); + printk("irq: %d [",irqs_running()); + for(i=0;i < smp_num_cpus;i++) + printk(" %d",local_irq_count(i)); + printk(" ]\nbh: %d [",spin_is_locked(&global_bh_lock) ? 1 : 0); + for(i=0;i < smp_num_cpus;i++) + printk(" %d",local_bh_count(i)); + + printk(" ]\nStack dumps:"); + for(i = 0; i < smp_num_cpus; i++) { + unsigned long esp; + if (i == cpu) + continue; + printk("\nCPU %d:",i); + esp = init_tss[i].esp0; + if (!esp) { + /* tss->esp0 is set to NULL in cpu_init(), + * it's initialized when the cpu returns to user + * space. -- manfreds + */ + printk(" "); + continue; + } + esp &= ~(THREAD_SIZE-1); + esp += sizeof(struct task_struct); + show_stack((void*)esp); + } + printk("\nCPU %d:",cpu); + show_stack(NULL); + printk("\n"); +} + +#define MAXCOUNT 100000000 + +/* + * I had a lockup scenario where a tight loop doing + * spin_unlock()/spin_lock() on CPU#1 was racing with + * spin_lock() on CPU#0. CPU#0 should have noticed spin_unlock(), but + * apparently the spin_unlock() information did not make it + * through to CPU#0 ... nasty, is this by design, do we have to limit + * 'memory update oscillation frequency' artificially like here? + * + * Such 'high frequency update' races can be avoided by careful design, but + * some of our major constructs like spinlocks use similar techniques, + * it would be nice to clarify this issue. Set this define to 0 if you + * want to check whether your system freezes. I suspect the delay done + * by SYNC_OTHER_CORES() is in correlation with 'snooping latency', but + * i thought that such things are guaranteed by design, since we use + * the 'LOCK' prefix. + */ +#define SUSPECTED_CPU_OR_CHIPSET_BUG_WORKAROUND 0 + +#if SUSPECTED_CPU_OR_CHIPSET_BUG_WORKAROUND +# define SYNC_OTHER_CORES(x) udelay(x+1) +#else +/* + * We have to allow irqs to arrive between __sti and __cli + */ +# define SYNC_OTHER_CORES(x) __asm__ __volatile__ ("nop") +#endif + +static inline void wait_on_irq(int cpu) +{ + int count = MAXCOUNT; + + for (;;) { + + /* + * Wait until all interrupts are gone. Wait + * for bottom half handlers unless we're + * already executing in one.. + */ + if (!irqs_running()) + if (local_bh_count(cpu) || !spin_is_locked(&global_bh_lock)) + break; + + /* Duh, we have to loop. Release the lock to avoid deadlocks */ + clear_bit(0,&global_irq_lock); + + for (;;) { + if (!--count) { + show("wait_on_irq"); + count = ~0; + } + __sti(); + SYNC_OTHER_CORES(cpu); + __cli(); + if (irqs_running()) + continue; + if (global_irq_lock) + continue; + if (!local_bh_count(cpu) && spin_is_locked(&global_bh_lock)) + continue; + if (!test_and_set_bit(0,&global_irq_lock)) + break; + } + } +} + +/* + * This is called when we want to synchronize with + * interrupts. We may for example tell a device to + * stop sending interrupts: but to make sure there + * are no interrupts that are executing on another + * CPU we need to call this function. + */ +void synchronize_irq(void) +{ + if (irqs_running()) { + /* Stupid approach */ + cli(); + sti(); + } +} + +static inline void get_irqlock(int cpu) +{ + if (test_and_set_bit(0,&global_irq_lock)) { + /* do we already hold the lock? */ + if ((unsigned char) cpu == global_irq_holder) + return; + /* Uhhuh.. Somebody else got it. Wait.. */ + do { + do { + rep_nop(); + } while (test_bit(0,&global_irq_lock)); + } while (test_and_set_bit(0,&global_irq_lock)); + } + /* + * We also to make sure that nobody else is running + * in an interrupt context. + */ + wait_on_irq(cpu); + + /* + * Ok, finally.. + */ + global_irq_holder = cpu; +} + +void __global_cli(void) +{ + panic("__global_cli"); +} + +void __global_sti(void) +{ + panic("__global_sti"); +} + +/* + * SMP flags value to restore to: + * 0 - global cli + * 1 - global sti + * 2 - local cli + * 3 - local sti + */ +unsigned long __global_save_flags(void) +{ + panic("__global_save_flags"); +} + +void __global_restore_flags(unsigned long flags) +{ + panic("__global_restore_flags"); +} + +#endif + +/* + * This should really return information about whether + * we should do bottom half handling etc. Right now we + * end up _always_ checking the bottom half, which is a + * waste of time and is not what some drivers would + * prefer. + */ +int handle_IRQ_event(unsigned int irq, struct pt_regs * regs, struct irqaction * action) +{ + int status; + int cpu = smp_processor_id(); + + irq_enter(cpu, irq); + + status = 1; /* Force the "do bottom halves" bit */ + + if (!(action->flags & SA_INTERRUPT)) + __sti(); + + do { + status |= action->flags; + action->handler(irq, action->dev_id, regs); + action = action->next; + } while (action); + if (status & SA_SAMPLE_RANDOM) + add_interrupt_randomness(irq); + __cli(); + + irq_exit(cpu, irq); + + return status; +} + +/* + * Generic enable/disable code: this just calls + * down into the PIC-specific version for the actual + * hardware disable after having gotten the irq + * controller lock. + */ + +/** + * disable_irq_nosync - disable an irq without waiting + * @irq: Interrupt to disable + * + * Disable the selected interrupt line. Disables and Enables are + * nested. + * Unlike disable_irq(), this function does not ensure existing + * instances of the IRQ handler have completed before returning. + * + * This function may be called from IRQ context. + */ + +inline void disable_irq_nosync(unsigned int irq) +{ + irq_desc_t *desc = irq_desc + irq; + unsigned long flags; + + spin_lock_irqsave(&desc->lock, flags); + if (!desc->depth++) { + desc->status |= IRQ_DISABLED; + desc->handler->disable(irq); + } + spin_unlock_irqrestore(&desc->lock, flags); +} + +/** + * disable_irq - disable an irq and wait for completion + * @irq: Interrupt to disable + * + * Disable the selected interrupt line. Enables and Disables are + * nested. + * This function waits for any pending IRQ handlers for this interrupt + * to complete before returning. If you use this function while + * holding a resource the IRQ handler may need you will deadlock. + * + * This function may be called - with care - from IRQ context. + */ + +void disable_irq(unsigned int irq) +{ + disable_irq_nosync(irq); + + if (!local_irq_count(smp_processor_id())) { + do { + barrier(); + cpu_relax(); + } while (irq_desc[irq].status & IRQ_INPROGRESS); + } +} + +/** + * enable_irq - enable handling of an irq + * @irq: Interrupt to enable + * + * Undoes the effect of one call to disable_irq(). If this + * matches the last disable, processing of interrupts on this + * IRQ line is re-enabled. + * + * This function may be called from IRQ context. + */ + +void enable_irq(unsigned int irq) +{ + irq_desc_t *desc = irq_desc + irq; + unsigned long flags; + + spin_lock_irqsave(&desc->lock, flags); + switch (desc->depth) { + case 1: { + unsigned int status = desc->status & ~IRQ_DISABLED; + desc->status = status; + if ((status & (IRQ_PENDING | IRQ_REPLAY)) == IRQ_PENDING) { + desc->status = status | IRQ_REPLAY; + hw_resend_irq(desc->handler,irq); + } + desc->handler->enable(irq); + /* fall-through */ + } + default: + desc->depth--; + break; + case 0: + printk("enable_irq(%u) unbalanced from %p\n", irq, + __builtin_return_address(0)); + } + spin_unlock_irqrestore(&desc->lock, flags); +} + +/* + * do_IRQ handles all normal device IRQ's (the special + * SMP cross-CPU interrupts have their own specific + * handlers). + */ +asmlinkage unsigned int do_IRQ(int irq, struct pt_regs *regs) +{ + /* + * We ack quickly, we don't want the irq controller + * thinking we're snobs just because some other CPU has + * disabled global interrupts (we have already done the + * INT_ACK cycles, it's too late to try to pretend to the + * controller that we aren't taking the interrupt). + * + * 0 return value means that this irq is already being + * handled by some other CPU. (or is disabled) + */ + int cpu = smp_processor_id(); + irq_desc_t *desc = irq_desc + irq; + struct irqaction * action; + unsigned int status; +#ifdef CONFIG_DEBUG_STACKOVERFLOW + long esp; + + /* Debugging check for stack overflow: is there less than 1KB free? */ + __asm__ __volatile__("andl %%esp,%0" : "=r" (esp) : "0" (8191)); + if (unlikely(esp < (sizeof(struct task_struct) + 1024))) { + extern void show_stack(unsigned long *); + + printk("do_IRQ: stack overflow: %ld\n", + esp - sizeof(struct task_struct)); + __asm__ __volatile__("movl %%esp,%0" : "=r" (esp)); + show_stack((void *)esp); + } +#endif + + kstat.irqs[cpu][irq]++; + spin_lock(&desc->lock); + desc->handler->ack(irq); + /* + REPLAY is when Linux resends an IRQ that was dropped earlier + WAITING is used by probe to mark irqs that are being tested + */ + status = desc->status & ~(IRQ_REPLAY | IRQ_WAITING); + status |= IRQ_PENDING; /* we _want_ to handle it */ + + /* + * If the IRQ is disabled for whatever reason, we cannot + * use the action we have. + */ + action = NULL; + if (!(status & (IRQ_DISABLED | IRQ_INPROGRESS))) { + action = desc->action; + status &= ~IRQ_PENDING; /* we commit to handling */ + status |= IRQ_INPROGRESS; /* we are handling it */ + } + desc->status = status; + + /* + * If there is no IRQ handler or it was disabled, exit early. + Since we set PENDING, if another processor is handling + a different instance of this same irq, the other processor + will take care of it. + */ + if (!action) + goto out; + + /* + * Edge triggered interrupts need to remember + * pending events. + * This applies to any hw interrupts that allow a second + * instance of the same irq to arrive while we are in do_IRQ + * or in the handler. But the code here only handles the _second_ + * instance of the irq, not the third or fourth. So it is mostly + * useful for irq hardware that does not mask cleanly in an + * SMP environment. + */ + for (;;) { + spin_unlock(&desc->lock); + handle_IRQ_event(irq, regs, action); + spin_lock(&desc->lock); + + if (!(desc->status & IRQ_PENDING)) + break; + desc->status &= ~IRQ_PENDING; + } + desc->status &= ~IRQ_INPROGRESS; +out: + /* + * The ->end() handler has to deal with interrupts which got + * disabled while the handler was running. + */ + desc->handler->end(irq); + spin_unlock(&desc->lock); + + if (softirq_pending(cpu)) + do_softirq(); + return 1; +} + +/** + * request_irq - allocate an interrupt line + * @irq: Interrupt line to allocate + * @handler: Function to be called when the IRQ occurs + * @irqflags: Interrupt type flags + * @devname: An ascii name for the claiming device + * @dev_id: A cookie passed back to the handler function + * + * This call allocates interrupt resources and enables the + * interrupt line and IRQ handling. From the point this + * call is made your handler function may be invoked. Since + * your handler function must clear any interrupt the board + * raises, you must take care both to initialise your hardware + * and to set up the interrupt handler in the right order. + * + * Dev_id must be globally unique. Normally the address of the + * device data structure is used as the cookie. Since the handler + * receives this value it makes sense to use it. + * + * If your interrupt is shared you must pass a non NULL dev_id + * as this is required when freeing the interrupt. + * + * Flags: + * + * SA_SHIRQ Interrupt is shared + * + * SA_INTERRUPT Disable local interrupts while processing + * + * SA_SAMPLE_RANDOM The interrupt can be used for entropy + * + */ + +int request_irq(unsigned int irq, + void (*handler)(int, void *, struct pt_regs *), + unsigned long irqflags, + const char * devname, + void *dev_id) +{ + int retval; + struct irqaction * action; + +#if 1 + /* + * Sanity-check: shared interrupts should REALLY pass in + * a real dev-ID, otherwise we'll have trouble later trying + * to figure out which interrupt is which (messes up the + * interrupt freeing logic etc). + */ + if (irqflags & SA_SHIRQ) { + if (!dev_id) + printk("Bad boy: %s (at 0x%x) called us without a dev_id!\n", devname, (&irq)[-1]); + } +#endif + + if (irq >= NR_IRQS) + return -EINVAL; + if (!handler) + return -EINVAL; + + action = (struct irqaction *) + kmalloc(sizeof(struct irqaction), GFP_KERNEL); + if (!action) + return -ENOMEM; + + action->handler = handler; + action->flags = irqflags; + action->mask = 0; + action->name = devname; + action->next = NULL; + action->dev_id = dev_id; + + retval = setup_irq(irq, action); + if (retval) + kfree(action); + return retval; +} + +/** + * free_irq - free an interrupt + * @irq: Interrupt line to free + * @dev_id: Device identity to free + * + * Remove an interrupt handler. The handler is removed and if the + * interrupt line is no longer in use by any driver it is disabled. + * On a shared IRQ the caller must ensure the interrupt is disabled + * on the card it drives before calling this function. The function + * does not return until any executing interrupts for this IRQ + * have completed. + * + * This function may be called from interrupt context. + * + * Bugs: Attempting to free an irq in a handler for the same irq hangs + * the machine. + */ + +void free_irq(unsigned int irq, void *dev_id) +{ + irq_desc_t *desc; + struct irqaction **p; + unsigned long flags; + + if (irq >= NR_IRQS) + return; + + desc = irq_desc + irq; + spin_lock_irqsave(&desc->lock,flags); + p = &desc->action; + for (;;) { + struct irqaction * action = *p; + if (action) { + struct irqaction **pp = p; + p = &action->next; + if (action->dev_id != dev_id) + continue; + + /* Found it - now remove it from the list of entries */ + *pp = action->next; + if (!desc->action) { + desc->status |= IRQ_DISABLED; + desc->handler->shutdown(irq); + } + spin_unlock_irqrestore(&desc->lock,flags); + +#ifdef CONFIG_SMP + /* Wait to make sure it's not being used on another CPU */ + while (desc->status & IRQ_INPROGRESS) { + barrier(); + cpu_relax(); + } +#endif + kfree(action); + return; + } + printk("Trying to free free IRQ%d\n",irq); + spin_unlock_irqrestore(&desc->lock,flags); + return; + } +} + +/* + * IRQ autodetection code.. + * + * This depends on the fact that any interrupt that + * comes in on to an unassigned handler will get stuck + * with "IRQ_WAITING" cleared and the interrupt + * disabled. + */ + +static DECLARE_MUTEX(probe_sem); + +/** + * probe_irq_on - begin an interrupt autodetect + * + * Commence probing for an interrupt. The interrupts are scanned + * and a mask of potential interrupt lines is returned. + * + */ + +unsigned long probe_irq_on(void) +{ + unsigned int i; + irq_desc_t *desc; + unsigned long val; + unsigned long delay; + + down(&probe_sem); + /* + * something may have generated an irq long ago and we want to + * flush such a longstanding irq before considering it as spurious. + */ + for (i = NR_IRQS-1; i > 0; i--) { + desc = irq_desc + i; + + spin_lock_irq(&desc->lock); + if (!irq_desc[i].action) + irq_desc[i].handler->startup(i); + spin_unlock_irq(&desc->lock); + } + + /* Wait for longstanding interrupts to trigger. */ + for (delay = jiffies + HZ/50; time_after(delay, jiffies); ) + /* about 20ms delay */ synchronize_irq(); + + /* + * enable any unassigned irqs + * (we must startup again here because if a longstanding irq + * happened in the previous stage, it may have masked itself) + */ + for (i = NR_IRQS-1; i > 0; i--) { + desc = irq_desc + i; + + spin_lock_irq(&desc->lock); + if (!desc->action) { + desc->status |= IRQ_AUTODETECT | IRQ_WAITING; + if (desc->handler->startup(i)) + desc->status |= IRQ_PENDING; + } + spin_unlock_irq(&desc->lock); + } + + /* + * Wait for spurious interrupts to trigger + */ + for (delay = jiffies + HZ/10; time_after(delay, jiffies); ) + /* about 100ms delay */ synchronize_irq(); + + /* + * Now filter out any obviously spurious interrupts + */ + val = 0; + for (i = 0; i < NR_IRQS; i++) { + irq_desc_t *desc = irq_desc + i; + unsigned int status; + + spin_lock_irq(&desc->lock); + status = desc->status; + + if (status & IRQ_AUTODETECT) { + /* It triggered already - consider it spurious. */ + if (!(status & IRQ_WAITING)) { + desc->status = status & ~IRQ_AUTODETECT; + desc->handler->shutdown(i); + } else + if (i < 32) + val |= 1 << i; + } + spin_unlock_irq(&desc->lock); + } + + return val; +} + +/* + * Return a mask of triggered interrupts (this + * can handle only legacy ISA interrupts). + */ + +/** + * probe_irq_mask - scan a bitmap of interrupt lines + * @val: mask of interrupts to consider + * + * Scan the ISA bus interrupt lines and return a bitmap of + * active interrupts. The interrupt probe logic state is then + * returned to its previous value. + * + * Note: we need to scan all the irq's even though we will + * only return ISA irq numbers - just so that we reset them + * all to a known state. + */ +unsigned int probe_irq_mask(unsigned long val) +{ + int i; + unsigned int mask; + + mask = 0; + for (i = 0; i < NR_IRQS; i++) { + irq_desc_t *desc = irq_desc + i; + unsigned int status; + + spin_lock_irq(&desc->lock); + status = desc->status; + + if (status & IRQ_AUTODETECT) { + if (i < 16 && !(status & IRQ_WAITING)) + mask |= 1 << i; + + desc->status = status & ~IRQ_AUTODETECT; + desc->handler->shutdown(i); + } + spin_unlock_irq(&desc->lock); + } + up(&probe_sem); + + return mask & val; +} + +/* + * Return the one interrupt that triggered (this can + * handle any interrupt source). + */ + +/** + * probe_irq_off - end an interrupt autodetect + * @val: mask of potential interrupts (unused) + * + * Scans the unused interrupt lines and returns the line which + * appears to have triggered the interrupt. If no interrupt was + * found then zero is returned. If more than one interrupt is + * found then minus the first candidate is returned to indicate + * their is doubt. + * + * The interrupt probe logic state is returned to its previous + * value. + * + * BUGS: When used in a module (which arguably shouldnt happen) + * nothing prevents two IRQ probe callers from overlapping. The + * results of this are non-optimal. + */ + +int probe_irq_off(unsigned long val) +{ + int i, irq_found, nr_irqs; + + nr_irqs = 0; + irq_found = 0; + for (i = 0; i < NR_IRQS; i++) { + irq_desc_t *desc = irq_desc + i; + unsigned int status; + + spin_lock_irq(&desc->lock); + status = desc->status; + + if (status & IRQ_AUTODETECT) { + if (!(status & IRQ_WAITING)) { + if (!nr_irqs) + irq_found = i; + nr_irqs++; + } + desc->status = status & ~IRQ_AUTODETECT; + desc->handler->shutdown(i); + } + spin_unlock_irq(&desc->lock); + } + up(&probe_sem); + + if (nr_irqs > 1) + irq_found = -irq_found; + return irq_found; +} + +/* this was setup_x86_irq but it seems pretty generic */ +int setup_irq(unsigned int irq, struct irqaction * new) +{ + int shared = 0; + unsigned long flags; + struct irqaction *old, **p; + irq_desc_t *desc = irq_desc + irq; + + /* + * Some drivers like serial.c use request_irq() heavily, + * so we have to be careful not to interfere with a + * running system. + */ + if (new->flags & SA_SAMPLE_RANDOM) { + /* + * This function might sleep, we want to call it first, + * outside of the atomic block. + * Yes, this might clear the entropy pool if the wrong + * driver is attempted to be loaded, without actually + * installing a new handler, but is this really a problem, + * only the sysadmin is able to do this. + */ + rand_initialize_irq(irq); + } + + /* + * The following block of code has to be executed atomically + */ + spin_lock_irqsave(&desc->lock,flags); + p = &desc->action; + if ((old = *p) != NULL) { + /* Can't share interrupts unless both agree to */ + if (!(old->flags & new->flags & SA_SHIRQ)) { + spin_unlock_irqrestore(&desc->lock,flags); + return -EBUSY; + } + + /* add new interrupt at end of irq queue */ + do { + p = &old->next; + old = *p; + } while (old); + shared = 1; + } + + *p = new; + + if (!shared) { + desc->depth = 0; + desc->status &= ~(IRQ_DISABLED | IRQ_AUTODETECT | IRQ_WAITING); + desc->handler->startup(irq); + } + spin_unlock_irqrestore(&desc->lock,flags); + + register_irq_proc(irq); + return 0; +} + +static struct proc_dir_entry * root_irq_dir; +static struct proc_dir_entry * irq_dir [NR_IRQS]; + +#define HEX_DIGITS 8 + +static unsigned int parse_hex_value (const char *buffer, + unsigned long count, unsigned long *ret) +{ + unsigned char hexnum [HEX_DIGITS]; + unsigned long value; + int i; + + if (!count) + return -EINVAL; + if (count > HEX_DIGITS) + count = HEX_DIGITS; + if (copy_from_user(hexnum, buffer, count)) + return -EFAULT; + + /* + * Parse the first 8 characters as a hex string, any non-hex char + * is end-of-string. '00e1', 'e1', '00E1', 'E1' are all the same. + */ + value = 0; + + for (i = 0; i < count; i++) { + unsigned int c = hexnum[i]; + + switch (c) { + case '0' ... '9': c -= '0'; break; + case 'a' ... 'f': c -= 'a'-10; break; + case 'A' ... 'F': c -= 'A'-10; break; + default: + goto out; + } + value = (value << 4) | c; + } +out: + *ret = value; + return 0; +} + +#if CONFIG_SMP + +static struct proc_dir_entry * smp_affinity_entry [NR_IRQS]; + +static unsigned long irq_affinity [NR_IRQS] = { [0 ... NR_IRQS-1] = ~0UL }; +static int irq_affinity_read_proc (char *page, char **start, off_t off, + int count, int *eof, void *data) +{ + if (count < HEX_DIGITS+1) + return -EINVAL; + return sprintf (page, "%08lx\n", irq_affinity[(long)data]); +} + +static int irq_affinity_write_proc (struct file *file, const char *buffer, + unsigned long count, void *data) +{ + int irq = (long) data, full_count = count, err; + unsigned long new_value; + + if (!irq_desc[irq].handler->set_affinity) + return -EIO; + + err = parse_hex_value(buffer, count, &new_value); + + /* + * Do not allow disabling IRQs completely - it's a too easy + * way to make the system unusable accidentally :-) At least + * one online CPU still has to be targeted. + */ + if (!(new_value & cpu_online_map)) + return -EINVAL; + + irq_affinity[irq] = new_value; + irq_desc[irq].handler->set_affinity(irq, new_value); + + return full_count; +} + +#endif + +static int prof_cpu_mask_read_proc (char *page, char **start, off_t off, + int count, int *eof, void *data) +{ + unsigned long *mask = (unsigned long *) data; + if (count < HEX_DIGITS+1) + return -EINVAL; + return sprintf (page, "%08lx\n", *mask); +} + +static int prof_cpu_mask_write_proc (struct file *file, const char *buffer, + unsigned long count, void *data) +{ + unsigned long *mask = (unsigned long *) data, full_count = count, err; + unsigned long new_value; + + err = parse_hex_value(buffer, count, &new_value); + if (err) + return err; + + *mask = new_value; + return full_count; +} + +#define MAX_NAMELEN 10 + +static void register_irq_proc (unsigned int irq) +{ + char name [MAX_NAMELEN]; + + if (!root_irq_dir || (irq_desc[irq].handler == &no_irq_type) || + irq_dir[irq]) + return; + + memset(name, 0, MAX_NAMELEN); + sprintf(name, "%d", irq); + + /* create /proc/irq/1234 */ + irq_dir[irq] = proc_mkdir(name, root_irq_dir); + +#if CONFIG_SMP + { + struct proc_dir_entry *entry; + + /* create /proc/irq/1234/smp_affinity */ + entry = create_proc_entry("smp_affinity", 0600, irq_dir[irq]); + + if (entry) { + entry->nlink = 1; + entry->data = (void *)(long)irq; + entry->read_proc = irq_affinity_read_proc; + entry->write_proc = irq_affinity_write_proc; + } + + smp_affinity_entry[irq] = entry; + } +#endif +} + +unsigned long prof_cpu_mask = -1; + +void init_irq_proc (void) +{ + struct proc_dir_entry *entry; + int i; + + /* create /proc/irq */ + root_irq_dir = proc_mkdir("irq", 0); + + /* create /proc/irq/prof_cpu_mask */ + entry = create_proc_entry("prof_cpu_mask", 0600, root_irq_dir); + + if (!entry) + return; + + entry->nlink = 1; + entry->data = (void *)&prof_cpu_mask; + entry->read_proc = prof_cpu_mask_read_proc; + entry->write_proc = prof_cpu_mask_write_proc; + + /* + * Create entries for all existing IRQs. + */ + for (i = 0; i < NR_IRQS; i++) + register_irq_proc(i); +} + diff --git a/xenolinux-2.4.21-pre4-sparse/arch/xeno/kernel/ldt.c b/xenolinux-2.4.21-pre4-sparse/arch/xeno/kernel/ldt.c new file mode 100644 index 0000000000..6c93943036 --- /dev/null +++ b/xenolinux-2.4.21-pre4-sparse/arch/xeno/kernel/ldt.c @@ -0,0 +1,26 @@ +/* + * linux/kernel/ldt.c + * + * Copyright (C) 1992 Krishna Balasubramanian and Linus Torvalds + * Copyright (C) 1999 Ingo Molnar + */ + +#include +#include +#include +#include + +/* + * XXX KAF (28/7/02): This stuff is only used for DOS emulation, and is + * the default way of finding current TCB in linuxthreads. Supporting + * table update svia the hypervisor is feasible, but a hassle: for now, + * recompiling linuxthreads is the most sensible option. + * + * Oh, this may become an issue depending on what JVM we use for + * running the xeno-daemon. + */ + +asmlinkage int sys_modify_ldt(int func, void *ptr, unsigned long bytecount) +{ + return -ENOSYS; +} diff --git a/xenolinux-2.4.21-pre4-sparse/arch/xeno/kernel/process.c b/xenolinux-2.4.21-pre4-sparse/arch/xeno/kernel/process.c new file mode 100644 index 0000000000..a4e4cd2497 --- /dev/null +++ b/xenolinux-2.4.21-pre4-sparse/arch/xeno/kernel/process.c @@ -0,0 +1,453 @@ +/* + * linux/arch/i386/kernel/process.c + * + * Copyright (C) 1995 Linus Torvalds + * + * Pentium III FXSR, SSE support + * Gareth Hughes , May 2000 + */ + +/* + * This file handles the architecture-dependent parts of process handling.. + */ + +#define __KERNEL_SYSCALLS__ +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include + +asmlinkage void ret_from_fork(void) __asm__("ret_from_fork"); + +int hlt_counter; + +/* + * Powermanagement idle function, if any.. + */ +void (*pm_idle)(void); + +/* + * Power off function, if any + */ +void (*pm_power_off)(void); + +void disable_hlt(void) +{ + hlt_counter++; +} + +void enable_hlt(void) +{ + hlt_counter--; +} + +/* + * The idle thread. There's no useful work to be + * done, so just try to conserve power and have a + * low exit latency (ie sit in a loop waiting for + * somebody to say that they'd like to reschedule) + */ +void cpu_idle (void) +{ + /* endless idle loop with no priority at all */ + init_idle(); + current->nice = 20; + current->counter = -100; + + while (1) { + while (!current->need_resched) + HYPERVISOR_do_sched_op(NULL); + schedule(); + check_pgt_cache(); + } +} + +void machine_restart(char * __unused) +{ + HYPERVISOR_exit(); +} + +void machine_halt(void) +{ + HYPERVISOR_exit(); +} + +void machine_power_off(void) +{ + HYPERVISOR_exit(); +} + +extern void show_trace(unsigned long* esp); + +void show_regs(struct pt_regs * regs) +{ + printk("\n"); + printk("Pid: %d, comm: %20s\n", current->pid, current->comm); + printk("EIP: %04x:[<%08lx>] CPU: %d",0xffff & regs->xcs,regs->eip, smp_processor_id()); + if (regs->xcs & 2) + printk(" ESP: %04x:%08lx",0xffff & regs->xss,regs->esp); + printk(" EFLAGS: %08lx %s\n",regs->eflags, print_tainted()); + printk("EAX: %08lx EBX: %08lx ECX: %08lx EDX: %08lx\n", + regs->eax,regs->ebx,regs->ecx,regs->edx); + printk("ESI: %08lx EDI: %08lx EBP: %08lx", + regs->esi, regs->edi, regs->ebp); + printk(" DS: %04x ES: %04x\n", + 0xffff & regs->xds,0xffff & regs->xes); + + show_trace(®s->esp); +} + +/* + * No need to lock the MM as we are the last user + */ +void release_segments(struct mm_struct *mm) +{ + void * ldt = mm->context.segments; + + /* + * free the LDT + */ + if (ldt) { + mm->context.segments = NULL; + clear_LDT(); + vfree(ldt); + } +} + +/* + * Create a kernel thread + */ +int kernel_thread(int (*fn)(void *), void * arg, unsigned long flags) +{ + long retval, d0; + + __asm__ __volatile__( + "movl %%esp,%%esi\n\t" + "int $0x80\n\t" /* Linux/i386 system call */ + "cmpl %%esp,%%esi\n\t" /* child or parent? */ + "je 1f\n\t" /* parent - jump */ + /* Load the argument into eax, and push it. That way, it does + * not matter whether the called function is compiled with + * -mregparm or not. */ + "movl %4,%%eax\n\t" + "pushl %%eax\n\t" + "call *%5\n\t" /* call fn */ + "movl %3,%0\n\t" /* exit */ + "int $0x80\n" + "1:\t" + :"=&a" (retval), "=&S" (d0) + :"0" (__NR_clone), "i" (__NR_exit), + "r" (arg), "r" (fn), + "b" (flags | CLONE_VM) + : "memory"); + + return retval; +} + +/* + * Free current thread data structures etc.. + */ +void exit_thread(void) +{ + /* nothing to do ... */ +} + +void flush_thread(void) +{ + struct task_struct *tsk = current; + + memset(tsk->thread.debugreg, 0, sizeof(unsigned long)*8); + + /* + * Forget coprocessor state.. + */ + clear_fpu(tsk); + tsk->used_math = 0; +} + +void release_thread(struct task_struct *dead_task) +{ + if (dead_task->mm) { + void * ldt = dead_task->mm->context.segments; + + // temporary debugging check + if (ldt) { + printk("WARNING: dead process %8s still has LDT? <%p>\n", + dead_task->comm, ldt); + BUG(); + } + } +} + +/* + * we do not have to muck with descriptors here, that is + * done in switch_mm() as needed. + */ +void copy_segments(struct task_struct *p, struct mm_struct *new_mm) +{ + struct mm_struct * old_mm; + void *old_ldt, *ldt; + + ldt = NULL; + old_mm = current->mm; + if (old_mm && (old_ldt = old_mm->context.segments) != NULL) { + /* + * Completely new LDT, we initialize it from the parent: + */ + ldt = vmalloc(LDT_ENTRIES*LDT_ENTRY_SIZE); + if (!ldt) + printk(KERN_WARNING "ldt allocation failed\n"); + else + memcpy(ldt, old_ldt, LDT_ENTRIES*LDT_ENTRY_SIZE); + } + new_mm->context.segments = ldt; + new_mm->context.cpuvalid = ~0UL; /* valid on all CPU's - they can't have stale data */ +} + +/* + * Save a segment. + */ +#define savesegment(seg,value) \ + asm volatile("movl %%" #seg ",%0":"=m" (*(int *)&(value))) + +int copy_thread(int nr, unsigned long clone_flags, unsigned long esp, + unsigned long unused, + struct task_struct * p, struct pt_regs * regs) +{ + struct pt_regs * childregs; + + childregs = ((struct pt_regs *) (THREAD_SIZE + (unsigned long) p)) - 1; + struct_cpy(childregs, regs); + childregs->eax = 0; + childregs->esp = esp; + + p->thread.esp = (unsigned long) childregs; + p->thread.esp0 = (unsigned long) (childregs+1); + + p->thread.eip = (unsigned long) ret_from_fork; + + savesegment(fs,p->thread.fs); + savesegment(gs,p->thread.gs); + + unlazy_fpu(current); + struct_cpy(&p->thread.i387, ¤t->thread.i387); + + return 0; +} + +/* + * fill in the user structure for a core dump.. + */ +void dump_thread(struct pt_regs * regs, struct user * dump) +{ + int i; + +/* changed the size calculations - should hopefully work better. lbt */ + dump->magic = CMAGIC; + dump->start_code = 0; + dump->start_stack = regs->esp & ~(PAGE_SIZE - 1); + dump->u_tsize = ((unsigned long) current->mm->end_code) >> PAGE_SHIFT; + dump->u_dsize = ((unsigned long) (current->mm->brk + (PAGE_SIZE-1))) >> PAGE_SHIFT; + dump->u_dsize -= dump->u_tsize; + dump->u_ssize = 0; + for (i = 0; i < 8; i++) + dump->u_debugreg[i] = current->thread.debugreg[i]; + + if (dump->start_stack < TASK_SIZE) + dump->u_ssize = ((unsigned long) (TASK_SIZE - dump->start_stack)) >> PAGE_SHIFT; + + dump->regs.ebx = regs->ebx; + dump->regs.ecx = regs->ecx; + dump->regs.edx = regs->edx; + dump->regs.esi = regs->esi; + dump->regs.edi = regs->edi; + dump->regs.ebp = regs->ebp; + dump->regs.eax = regs->eax; + dump->regs.ds = regs->xds; + dump->regs.es = regs->xes; + savesegment(fs,dump->regs.fs); + savesegment(gs,dump->regs.gs); + dump->regs.orig_eax = regs->orig_eax; + dump->regs.eip = regs->eip; + dump->regs.cs = regs->xcs; + dump->regs.eflags = regs->eflags; + dump->regs.esp = regs->esp; + dump->regs.ss = regs->xss; + + dump->u_fpvalid = dump_fpu (regs, &dump->i387); +} + +/* + * switch_to(x,yn) should switch tasks from x to y. + * + * We fsave/fwait so that an exception goes off at the right time + * (as a call from the fsave or fwait in effect) rather than to + * the wrong process. Lazy FP saving no longer makes any sense + * with modern CPU's, and this simplifies a lot of things (SMP + * and UP become the same). + * + * NOTE! We used to use the x86 hardware context switching. The + * reason for not using it any more becomes apparent when you + * try to recover gracefully from saved state that is no longer + * valid (stale segment register values in particular). With the + * hardware task-switch, there is no way to fix up bad state in + * a reasonable manner. + * + * The fact that Intel documents the hardware task-switching to + * be slow is a fairly red herring - this code is not noticeably + * faster. However, there _is_ some room for improvement here, + * so the performance issues may eventually be a valid point. + * More important, however, is the fact that this allows us much + * more flexibility. + */ +void __switch_to(struct task_struct *prev_p, struct task_struct *next_p) +{ + struct thread_struct *prev = &prev_p->thread, + *next = &next_p->thread; + + unlazy_fpu(prev_p); + + HYPERVISOR_stack_and_ldt_switch(__KERNEL_DS, next->esp0, 0); + + /* + * Save away %fs and %gs. No need to save %es and %ds, as + * those are always kernel segments while inside the kernel. + */ + asm volatile("movl %%fs,%0":"=m" (*(int *)&prev->fs)); + asm volatile("movl %%gs,%0":"=m" (*(int *)&prev->gs)); + + /* + * Restore %fs and %gs. + */ + loadsegment(fs, next->fs); + loadsegment(gs, next->gs); + + /* + * Now maybe reload the debug registers + */ + if ( next->debugreg[7] != 0 ) + { + HYPERVISOR_set_debugreg(0, next->debugreg[0]); + HYPERVISOR_set_debugreg(1, next->debugreg[1]); + HYPERVISOR_set_debugreg(2, next->debugreg[2]); + HYPERVISOR_set_debugreg(3, next->debugreg[3]); + /* no 4 and 5 */ + HYPERVISOR_set_debugreg(6, next->debugreg[6]); + HYPERVISOR_set_debugreg(7, next->debugreg[7]); + } +} + +asmlinkage int sys_fork(struct pt_regs regs) +{ + return do_fork(SIGCHLD, regs.esp, ®s, 0); +} + +asmlinkage int sys_clone(struct pt_regs regs) +{ + unsigned long clone_flags; + unsigned long newsp; + + clone_flags = regs.ebx; + newsp = regs.ecx; + if (!newsp) + newsp = regs.esp; + return do_fork(clone_flags, newsp, ®s, 0); +} + +/* + * This is trivial, and on the face of it looks like it + * could equally well be done in user mode. + * + * Not so, for quite unobvious reasons - register pressure. + * In user mode vfork() cannot have a stack frame, and if + * done by calling the "clone()" system call directly, you + * do not have enough call-clobbered registers to hold all + * the information you need. + */ +asmlinkage int sys_vfork(struct pt_regs regs) +{ + return do_fork(CLONE_VFORK | CLONE_VM | SIGCHLD, regs.esp, ®s, 0); +} + +/* + * sys_execve() executes a new program. + */ +asmlinkage int sys_execve(struct pt_regs regs) +{ + int error; + char * filename; + + filename = getname((char *) regs.ebx); + error = PTR_ERR(filename); + if (IS_ERR(filename)) + goto out; + error = do_execve(filename, (char **) regs.ecx, (char **) regs.edx, ®s); + if (error == 0) + current->ptrace &= ~PT_DTRACE; + putname(filename); + out: + return error; +} + +/* + * These bracket the sleeping functions.. + */ +extern void scheduling_functions_start_here(void); +extern void scheduling_functions_end_here(void); +#define first_sched ((unsigned long) scheduling_functions_start_here) +#define last_sched ((unsigned long) scheduling_functions_end_here) + +unsigned long get_wchan(struct task_struct *p) +{ + unsigned long ebp, esp, eip; + unsigned long stack_page; + int count = 0; + if (!p || p == current || p->state == TASK_RUNNING) + return 0; + stack_page = (unsigned long)p; + esp = p->thread.esp; + if (!stack_page || esp < stack_page || esp > 8188+stack_page) + return 0; + /* include/asm-i386/system.h:switch_to() pushes ebp last. */ + ebp = *(unsigned long *) esp; + do { + if (ebp < stack_page || ebp > 8184+stack_page) + return 0; + eip = *(unsigned long *) (ebp+4); + if (eip < first_sched || eip >= last_sched) + return eip; + ebp = *(unsigned long *) ebp; + } while (count++ < 16); + return 0; +} +#undef last_sched +#undef first_sched diff --git a/xenolinux-2.4.21-pre4-sparse/arch/xeno/kernel/ptrace.c b/xenolinux-2.4.21-pre4-sparse/arch/xeno/kernel/ptrace.c new file mode 100644 index 0000000000..b0ba97d198 --- /dev/null +++ b/xenolinux-2.4.21-pre4-sparse/arch/xeno/kernel/ptrace.c @@ -0,0 +1,454 @@ +/* ptrace.c */ +/* By Ross Biro 1/23/92 */ +/* + * Pentium III FXSR, SSE support + * Gareth Hughes , May 2000 + */ + +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include + +/* + * does not yet catch signals sent when the child dies. + * in exit.c or in signal.c. + */ + +/* determines which flags the user has access to. */ +/* 1 = access 0 = no access */ +#define FLAG_MASK 0x00044dd5 + +/* set's the trap flag. */ +#define TRAP_FLAG 0x100 + +/* + * Offset of eflags on child stack.. + */ +#define EFL_OFFSET ((EFL-2)*4-sizeof(struct pt_regs)) + +/* + * this routine will get a word off of the processes privileged stack. + * the offset is how far from the base addr as stored in the TSS. + * this routine assumes that all the privileged stacks are in our + * data space. + */ +static inline int get_stack_long(struct task_struct *task, int offset) +{ + unsigned char *stack; + + stack = (unsigned char *)task->thread.esp0; + stack += offset; + return (*((int *)stack)); +} + +/* + * this routine will put a word on the processes privileged stack. + * the offset is how far from the base addr as stored in the TSS. + * this routine assumes that all the privileged stacks are in our + * data space. + */ +static inline int put_stack_long(struct task_struct *task, int offset, + unsigned long data) +{ + unsigned char * stack; + + stack = (unsigned char *) task->thread.esp0; + stack += offset; + *(unsigned long *) stack = data; + return 0; +} + +static int putreg(struct task_struct *child, + unsigned long regno, unsigned long value) +{ + switch (regno >> 2) { + case FS: + if (value && (value & 3) != 3) + return -EIO; + child->thread.fs = value; + return 0; + case GS: + if (value && (value & 3) != 3) + return -EIO; + child->thread.gs = value; + return 0; + case DS: + case ES: + if (value && (value & 3) != 3) + return -EIO; + value &= 0xffff; + break; + case SS: + case CS: + if ((value & 3) != 3) + return -EIO; + value &= 0xffff; + break; + case EFL: + value &= FLAG_MASK; + value |= get_stack_long(child, EFL_OFFSET) & ~FLAG_MASK; + break; + } + if (regno > GS*4) + regno -= 2*4; + put_stack_long(child, regno - sizeof(struct pt_regs), value); + return 0; +} + +static unsigned long getreg(struct task_struct *child, + unsigned long regno) +{ + unsigned long retval = ~0UL; + + switch (regno >> 2) { + case FS: + retval = child->thread.fs; + break; + case GS: + retval = child->thread.gs; + break; + case DS: + case ES: + case SS: + case CS: + retval = 0xffff; + /* fall through */ + default: + if (regno > GS*4) + regno -= 2*4; + regno = regno - sizeof(struct pt_regs); + retval &= get_stack_long(child, regno); + } + return retval; +} + +/* + * Called by kernel/ptrace.c when detaching.. + * + * Make sure the single step bit is not set. + */ +void ptrace_disable(struct task_struct *child) +{ + long tmp; + + tmp = get_stack_long(child, EFL_OFFSET) & ~TRAP_FLAG; + put_stack_long(child, EFL_OFFSET, tmp); +} + +asmlinkage int sys_ptrace(long request, long pid, long addr, long data) +{ + struct task_struct *child; + struct user * dummy = NULL; + int i, ret; + + lock_kernel(); + ret = -EPERM; + if (request == PTRACE_TRACEME) { + /* are we already being traced? */ + if (current->ptrace & PT_PTRACED) + goto out; + /* set the ptrace bit in the process flags. */ + current->ptrace |= PT_PTRACED; + ret = 0; + goto out; + } + ret = -ESRCH; + read_lock(&tasklist_lock); + child = find_task_by_pid(pid); + if (child) + get_task_struct(child); + read_unlock(&tasklist_lock); + if (!child) + goto out; + + ret = -EPERM; + if (pid == 1) /* you may not mess with init */ + goto out_tsk; + + if (request == PTRACE_ATTACH) { + ret = ptrace_attach(child); + goto out_tsk; + } + + ret = ptrace_check_attach(child, request == PTRACE_KILL); + if (ret < 0) + goto out_tsk; + + switch (request) { + /* when I and D space are separate, these will need to be fixed. */ + case PTRACE_PEEKTEXT: /* read word at location addr. */ + case PTRACE_PEEKDATA: { + unsigned long tmp; + int copied; + + copied = access_process_vm(child, addr, &tmp, sizeof(tmp), 0); + ret = -EIO; + if (copied != sizeof(tmp)) + break; + ret = put_user(tmp,(unsigned long *) data); + break; + } + + /* read the word at location addr in the USER area. */ + case PTRACE_PEEKUSR: { + unsigned long tmp; + + ret = -EIO; + if ((addr & 3) || addr < 0 || + addr > sizeof(struct user) - 3) + break; + + tmp = 0; /* Default return condition */ + if(addr < FRAME_SIZE*sizeof(long)) + tmp = getreg(child, addr); + if(addr >= (long) &dummy->u_debugreg[0] && + addr <= (long) &dummy->u_debugreg[7]){ + addr -= (long) &dummy->u_debugreg[0]; + addr = addr >> 2; + tmp = child->thread.debugreg[addr]; + } + ret = put_user(tmp,(unsigned long *) data); + break; + } + + /* when I and D space are separate, this will have to be fixed. */ + case PTRACE_POKETEXT: /* write the word at location addr. */ + case PTRACE_POKEDATA: + ret = 0; + if (access_process_vm(child, addr, &data, sizeof(data), 1) == sizeof(data)) + break; + ret = -EIO; + break; + + case PTRACE_POKEUSR: /* write the word at location addr in the USER area */ + ret = -EIO; + if ((addr & 3) || addr < 0 || + addr > sizeof(struct user) - 3) + break; + + if (addr < FRAME_SIZE*sizeof(long)) { + ret = putreg(child, addr, data); + break; + } + /* We need to be very careful here. We implicitly + want to modify a portion of the task_struct, and we + have to be selective about what portions we allow someone + to modify. */ + + ret = -EIO; + if(addr >= (long) &dummy->u_debugreg[0] && + addr <= (long) &dummy->u_debugreg[7]){ + + if(addr == (long) &dummy->u_debugreg[4]) break; + if(addr == (long) &dummy->u_debugreg[5]) break; + if(addr < (long) &dummy->u_debugreg[4] && + ((unsigned long) data) >= TASK_SIZE-3) break; + + if(addr == (long) &dummy->u_debugreg[7]) { + data &= ~DR_CONTROL_RESERVED; + for(i=0; i<4; i++) + if ((0x5f54 >> ((data >> (16 + 4*i)) & 0xf)) & 1) + goto out_tsk; + } + + addr -= (long) &dummy->u_debugreg; + addr = addr >> 2; + child->thread.debugreg[addr] = data; + ret = 0; + } + break; + + case PTRACE_SYSCALL: /* continue and stop at next (return from) syscall */ + case PTRACE_CONT: { /* restart after signal. */ + long tmp; + + ret = -EIO; + if ((unsigned long) data > _NSIG) + break; + if (request == PTRACE_SYSCALL) + child->ptrace |= PT_TRACESYS; + else + child->ptrace &= ~PT_TRACESYS; + child->exit_code = data; + /* make sure the single step bit is not set. */ + tmp = get_stack_long(child, EFL_OFFSET) & ~TRAP_FLAG; + put_stack_long(child, EFL_OFFSET,tmp); + wake_up_process(child); + ret = 0; + break; + } + +/* + * make the child exit. Best I can do is send it a sigkill. + * perhaps it should be put in the status that it wants to + * exit. + */ + case PTRACE_KILL: { + long tmp; + + ret = 0; + if (child->state == TASK_ZOMBIE) /* already dead */ + break; + child->exit_code = SIGKILL; + /* make sure the single step bit is not set. */ + tmp = get_stack_long(child, EFL_OFFSET) & ~TRAP_FLAG; + put_stack_long(child, EFL_OFFSET, tmp); + wake_up_process(child); + break; + } + + case PTRACE_SINGLESTEP: { /* set the trap flag. */ + long tmp; + + ret = -EIO; + if ((unsigned long) data > _NSIG) + break; + child->ptrace &= ~PT_TRACESYS; + if ((child->ptrace & PT_DTRACE) == 0) { + /* Spurious delayed TF traps may occur */ + child->ptrace |= PT_DTRACE; + } + tmp = get_stack_long(child, EFL_OFFSET) | TRAP_FLAG; + put_stack_long(child, EFL_OFFSET, tmp); + child->exit_code = data; + /* give it a chance to run. */ + wake_up_process(child); + ret = 0; + break; + } + + case PTRACE_DETACH: + /* detach a process that was attached. */ + ret = ptrace_detach(child, data); + break; + + case PTRACE_GETREGS: { /* Get all gp regs from the child. */ + if (!access_ok(VERIFY_WRITE, (unsigned *)data, FRAME_SIZE*sizeof(long))) { + ret = -EIO; + break; + } + for ( i = 0; i < FRAME_SIZE*sizeof(long); i += sizeof(long) ) { + __put_user(getreg(child, i),(unsigned long *) data); + data += sizeof(long); + } + ret = 0; + break; + } + + case PTRACE_SETREGS: { /* Set all gp regs in the child. */ + unsigned long tmp; + if (!access_ok(VERIFY_READ, (unsigned *)data, FRAME_SIZE*sizeof(long))) { + ret = -EIO; + break; + } + for ( i = 0; i < FRAME_SIZE*sizeof(long); i += sizeof(long) ) { + __get_user(tmp, (unsigned long *) data); + putreg(child, i, tmp); + data += sizeof(long); + } + ret = 0; + break; + } + + case PTRACE_GETFPREGS: { /* Get the child FPU state. */ + if (!access_ok(VERIFY_WRITE, (unsigned *)data, + sizeof(struct user_i387_struct))) { + ret = -EIO; + break; + } + ret = 0; + if ( !child->used_math ) + load_empty_fpu(child); + get_fpregs((struct user_i387_struct *)data, child); + break; + } + + case PTRACE_SETFPREGS: { /* Set the child FPU state. */ + if (!access_ok(VERIFY_READ, (unsigned *)data, + sizeof(struct user_i387_struct))) { + ret = -EIO; + break; + } + child->used_math = 1; + set_fpregs(child, (struct user_i387_struct *)data); + ret = 0; + break; + } + + case PTRACE_GETFPXREGS: { /* Get the child extended FPU state. */ + if (!access_ok(VERIFY_WRITE, (unsigned *)data, + sizeof(struct user_fxsr_struct))) { + ret = -EIO; + break; + } + if ( !child->used_math ) + load_empty_fpu(child); + ret = get_fpxregs((struct user_fxsr_struct *)data, child); + break; + } + + case PTRACE_SETFPXREGS: { /* Set the child extended FPU state. */ + if (!access_ok(VERIFY_READ, (unsigned *)data, + sizeof(struct user_fxsr_struct))) { + ret = -EIO; + break; + } + child->used_math = 1; + ret = set_fpxregs(child, (struct user_fxsr_struct *)data); + break; + } + + case PTRACE_SETOPTIONS: { + if (data & PTRACE_O_TRACESYSGOOD) + child->ptrace |= PT_TRACESYSGOOD; + else + child->ptrace &= ~PT_TRACESYSGOOD; + ret = 0; + break; + } + + default: + ret = -EIO; + break; + } +out_tsk: + free_task_struct(child); +out: + unlock_kernel(); + return ret; +} + +asmlinkage void syscall_trace(void) +{ + if ((current->ptrace & (PT_PTRACED|PT_TRACESYS)) != + (PT_PTRACED|PT_TRACESYS)) + return; + /* the 0x80 provides a way for the tracing parent to distinguish + between a syscall stop and SIGTRAP delivery */ + current->exit_code = SIGTRAP | ((current->ptrace & PT_TRACESYSGOOD) + ? 0x80 : 0); + current->state = TASK_STOPPED; + notify_parent(current, SIGCHLD); + schedule(); + /* + * this isn't the same as continuing with a signal, but it will do + * for normal use. strace only continues with a signal if the + * stopping signal is not SIGTRAP. -brl + */ + if (current->exit_code) { + send_sig(current->exit_code, current, 1); + current->exit_code = 0; + } +} diff --git a/xenolinux-2.4.21-pre4-sparse/arch/xeno/kernel/semaphore.c b/xenolinux-2.4.21-pre4-sparse/arch/xeno/kernel/semaphore.c new file mode 100644 index 0000000000..fd44981804 --- /dev/null +++ b/xenolinux-2.4.21-pre4-sparse/arch/xeno/kernel/semaphore.c @@ -0,0 +1,291 @@ +/* + * i386 semaphore implementation. + * + * (C) Copyright 1999 Linus Torvalds + * + * Portions Copyright 1999 Red Hat, Inc. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + * + * rw semaphores implemented November 1999 by Benjamin LaHaise + */ +#include +#include +#include + +/* + * Semaphores are implemented using a two-way counter: + * The "count" variable is decremented for each process + * that tries to acquire the semaphore, while the "sleeping" + * variable is a count of such acquires. + * + * Notably, the inline "up()" and "down()" functions can + * efficiently test if they need to do any extra work (up + * needs to do something only if count was negative before + * the increment operation. + * + * "sleeping" and the contention routine ordering is + * protected by the semaphore spinlock. + * + * Note that these functions are only called when there is + * contention on the lock, and as such all this is the + * "non-critical" part of the whole semaphore business. The + * critical part is the inline stuff in + * where we want to avoid any extra jumps and calls. + */ + +/* + * Logic: + * - only on a boundary condition do we need to care. When we go + * from a negative count to a non-negative, we wake people up. + * - when we go from a non-negative count to a negative do we + * (a) synchronize with the "sleeper" count and (b) make sure + * that we're on the wakeup list before we synchronize so that + * we cannot lose wakeup events. + */ + +void __up(struct semaphore *sem) +{ + wake_up(&sem->wait); +} + +static spinlock_t semaphore_lock = SPIN_LOCK_UNLOCKED; + +void __down(struct semaphore * sem) +{ + struct task_struct *tsk = current; + DECLARE_WAITQUEUE(wait, tsk); + tsk->state = TASK_UNINTERRUPTIBLE; + add_wait_queue_exclusive(&sem->wait, &wait); + + spin_lock_irq(&semaphore_lock); + sem->sleepers++; + for (;;) { + int sleepers = sem->sleepers; + + /* + * Add "everybody else" into it. They aren't + * playing, because we own the spinlock. + */ + if (!atomic_add_negative(sleepers - 1, &sem->count)) { + sem->sleepers = 0; + break; + } + sem->sleepers = 1; /* us - see -1 above */ + spin_unlock_irq(&semaphore_lock); + + schedule(); + tsk->state = TASK_UNINTERRUPTIBLE; + spin_lock_irq(&semaphore_lock); + } + spin_unlock_irq(&semaphore_lock); + remove_wait_queue(&sem->wait, &wait); + tsk->state = TASK_RUNNING; + wake_up(&sem->wait); +} + +int __down_interruptible(struct semaphore * sem) +{ + int retval = 0; + struct task_struct *tsk = current; + DECLARE_WAITQUEUE(wait, tsk); + tsk->state = TASK_INTERRUPTIBLE; + add_wait_queue_exclusive(&sem->wait, &wait); + + spin_lock_irq(&semaphore_lock); + sem->sleepers ++; + for (;;) { + int sleepers = sem->sleepers; + + /* + * With signals pending, this turns into + * the trylock failure case - we won't be + * sleeping, and we* can't get the lock as + * it has contention. Just correct the count + * and exit. + */ + if (signal_pending(current)) { + retval = -EINTR; + sem->sleepers = 0; + atomic_add(sleepers, &sem->count); + break; + } + + /* + * Add "everybody else" into it. They aren't + * playing, because we own the spinlock. The + * "-1" is because we're still hoping to get + * the lock. + */ + if (!atomic_add_negative(sleepers - 1, &sem->count)) { + sem->sleepers = 0; + break; + } + sem->sleepers = 1; /* us - see -1 above */ + spin_unlock_irq(&semaphore_lock); + + schedule(); + tsk->state = TASK_INTERRUPTIBLE; + spin_lock_irq(&semaphore_lock); + } + spin_unlock_irq(&semaphore_lock); + tsk->state = TASK_RUNNING; + remove_wait_queue(&sem->wait, &wait); + wake_up(&sem->wait); + return retval; +} + +/* + * Trylock failed - make sure we correct for + * having decremented the count. + * + * We could have done the trylock with a + * single "cmpxchg" without failure cases, + * but then it wouldn't work on a 386. + */ +int __down_trylock(struct semaphore * sem) +{ + int sleepers; + unsigned long flags; + + spin_lock_irqsave(&semaphore_lock, flags); + sleepers = sem->sleepers + 1; + sem->sleepers = 0; + + /* + * Add "everybody else" and us into it. They aren't + * playing, because we own the spinlock. + */ + if (!atomic_add_negative(sleepers, &sem->count)) + wake_up(&sem->wait); + + spin_unlock_irqrestore(&semaphore_lock, flags); + return 1; +} + + +/* + * The semaphore operations have a special calling sequence that + * allow us to do a simpler in-line version of them. These routines + * need to convert that sequence back into the C sequence when + * there is contention on the semaphore. + * + * %ecx contains the semaphore pointer on entry. Save the C-clobbered + * registers (%eax, %edx and %ecx) except %eax when used as a return + * value.. + */ +asm( +".text\n" +".align 4\n" +".globl __down_failed\n" +"__down_failed:\n\t" +#if defined(CONFIG_FRAME_POINTER) + "pushl %ebp\n\t" + "movl %esp,%ebp\n\t" +#endif + "pushl %eax\n\t" + "pushl %edx\n\t" + "pushl %ecx\n\t" + "call __down\n\t" + "popl %ecx\n\t" + "popl %edx\n\t" + "popl %eax\n\t" +#if defined(CONFIG_FRAME_POINTER) + "movl %ebp,%esp\n\t" + "popl %ebp\n\t" +#endif + "ret" +); + +asm( +".text\n" +".align 4\n" +".globl __down_failed_interruptible\n" +"__down_failed_interruptible:\n\t" +#if defined(CONFIG_FRAME_POINTER) + "pushl %ebp\n\t" + "movl %esp,%ebp\n\t" +#endif + "pushl %edx\n\t" + "pushl %ecx\n\t" + "call __down_interruptible\n\t" + "popl %ecx\n\t" + "popl %edx\n\t" +#if defined(CONFIG_FRAME_POINTER) + "movl %ebp,%esp\n\t" + "popl %ebp\n\t" +#endif + "ret" +); + +asm( +".text\n" +".align 4\n" +".globl __down_failed_trylock\n" +"__down_failed_trylock:\n\t" +#if defined(CONFIG_FRAME_POINTER) + "pushl %ebp\n\t" + "movl %esp,%ebp\n\t" +#endif + "pushl %edx\n\t" + "pushl %ecx\n\t" + "call __down_trylock\n\t" + "popl %ecx\n\t" + "popl %edx\n\t" +#if defined(CONFIG_FRAME_POINTER) + "movl %ebp,%esp\n\t" + "popl %ebp\n\t" +#endif + "ret" +); + +asm( +".text\n" +".align 4\n" +".globl __up_wakeup\n" +"__up_wakeup:\n\t" + "pushl %eax\n\t" + "pushl %edx\n\t" + "pushl %ecx\n\t" + "call __up\n\t" + "popl %ecx\n\t" + "popl %edx\n\t" + "popl %eax\n\t" + "ret" +); + +/* + * rw spinlock fallbacks + */ +#if defined(CONFIG_SMP) +asm( +".text\n" +".align 4\n" +".globl __write_lock_failed\n" +"__write_lock_failed:\n\t" + LOCK "addl $" RW_LOCK_BIAS_STR ",(%eax)\n" +"1: rep; nop\n\t" + "cmpl $" RW_LOCK_BIAS_STR ",(%eax)\n\t" + "jne 1b\n\t" + LOCK "subl $" RW_LOCK_BIAS_STR ",(%eax)\n\t" + "jnz __write_lock_failed\n\t" + "ret" +); + +asm( +".text\n" +".align 4\n" +".globl __read_lock_failed\n" +"__read_lock_failed:\n\t" + LOCK "incl (%eax)\n" +"1: rep; nop\n\t" + "cmpl $1,(%eax)\n\t" + "js 1b\n\t" + LOCK "decl (%eax)\n\t" + "js __read_lock_failed\n\t" + "ret" +); +#endif diff --git a/xenolinux-2.4.21-pre4-sparse/arch/xeno/kernel/setup.c b/xenolinux-2.4.21-pre4-sparse/arch/xeno/kernel/setup.c new file mode 100644 index 0000000000..1650d7028f --- /dev/null +++ b/xenolinux-2.4.21-pre4-sparse/arch/xeno/kernel/setup.c @@ -0,0 +1,995 @@ +/* + * linux/arch/i386/kernel/setup.c + * + * Copyright (C) 1995 Linus Torvalds + */ + +/* + * This file handles the architecture-dependent parts of initialization + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#ifdef CONFIG_BLK_DEV_RAM +#include +#endif +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +shared_info_t *HYPERVISOR_shared_info; + +unsigned long *phys_to_machine_mapping; + +/* + * Machine setup.. + */ + +char ignore_irq13; /* set if exception 16 works */ +struct cpuinfo_x86 boot_cpu_data = { 0, 0, 0, 0, -1, 1, 0, 0, -1 }; + +unsigned long mmu_cr4_features; + +/* + * Bus types .. + */ +#ifdef CONFIG_EISA +int EISA_bus; +#endif +int MCA_bus; + +/* for MCA, but anyone else can use it if they want */ +unsigned int machine_id; +unsigned int machine_submodel_id; +unsigned int BIOS_revision; +unsigned int mca_pentium_flag; + +/* For PCI or other memory-mapped resources */ +unsigned long pci_mem_start = 0x10000000; + +/* + * Setup options + */ +struct drive_info_struct { char dummy[32]; } drive_info; +struct screen_info screen_info; +struct apm_info apm_info; +struct sys_desc_table_struct { + unsigned short length; + unsigned char table[0]; +}; + +unsigned char aux_device_present; + +extern int root_mountflags; +extern char _text, _etext, _edata, _end; + +int enable_acpi_smp_table; + +/* Raw start-of-day parameters from the hypervisor. */ +union start_info_union start_info_union; + +#define COMMAND_LINE_SIZE 256 +static char command_line[COMMAND_LINE_SIZE]; +char saved_command_line[COMMAND_LINE_SIZE]; + +static void __init parse_mem_cmdline (char ** cmdline_p) +{ + char c = ' ', *to = command_line, *from = saved_command_line; + int len = 0; + + /* Save unparsed command line copy for /proc/cmdline */ + memcpy(saved_command_line, start_info.cmd_line, COMMAND_LINE_SIZE); + saved_command_line[COMMAND_LINE_SIZE-1] = '\0'; + + for (;;) { + /* + * "mem=nopentium" disables the 4MB page tables. + * "mem=XXX[kKmM]" defines a memory region from HIGH_MEM + * to , overriding the bios size. + * "mem=XXX[KkmM]@XXX[KkmM]" defines a memory region from + * to +, overriding the bios size. + */ + if (c == ' ' && !memcmp(from, "mem=", 4)) { + if (to != command_line) + to--; + if (!memcmp(from+4, "nopentium", 9)) { + from += 9+4; + } else if (!memcmp(from+4, "exactmap", 8)) { + from += 8+4; + } else { + (void)memparse(from+4, &from); + if (*from == '@') + (void)memparse(from+1, &from); + } + } + + c = *(from++); + if (!c) + break; + if (COMMAND_LINE_SIZE <= ++len) + break; + *(to++) = c; + } + *to = '\0'; + *cmdline_p = command_line; +} + +void __init setup_arch(char **cmdline_p) +{ + unsigned long start_pfn, max_pfn, max_low_pfn; + unsigned long bootmap_size; + unsigned long i; + + extern void hypervisor_callback(void); + extern void failsafe_callback(void); + + extern unsigned long cpu0_pte_quicklist[]; + extern unsigned long cpu0_pgd_quicklist[]; + + HYPERVISOR_shared_info->event_address = + (unsigned long)hypervisor_callback; + HYPERVISOR_shared_info->failsafe_address = + (unsigned long)failsafe_callback; + + boot_cpu_data.pgd_quick = cpu0_pgd_quicklist; + boot_cpu_data.pte_quick = cpu0_pte_quicklist; + + ROOT_DEV = MKDEV(RAMDISK_MAJOR,0); + memset(&drive_info, 0, sizeof(drive_info)); + memset(&screen_info, 0, sizeof(screen_info)); + memset(&apm_info.bios, 0, sizeof(apm_info.bios)); + aux_device_present = 0; +#ifdef CONFIG_BLK_DEV_RAM + rd_image_start = 0; + rd_prompt = 0; + rd_doload = 0; +#endif + + root_mountflags &= ~MS_RDONLY; + init_mm.start_code = (unsigned long) &_text; + init_mm.end_code = (unsigned long) &_etext; + init_mm.end_data = (unsigned long) &_edata; + init_mm.brk = (unsigned long) &_end; + + parse_mem_cmdline(cmdline_p); + +#define PFN_UP(x) (((x) + PAGE_SIZE-1) >> PAGE_SHIFT) +#define PFN_DOWN(x) ((x) >> PAGE_SHIFT) +#define PFN_PHYS(x) ((x) << PAGE_SHIFT) + +/* + * 128MB for vmalloc and initrd + */ +#define VMALLOC_RESERVE (unsigned long)(128 << 20) +#define MAXMEM (unsigned long)(HYPERVISOR_VIRT_START-PAGE_OFFSET-VMALLOC_RESERVE) +#define MAXMEM_PFN PFN_DOWN(MAXMEM) +#define MAX_NONPAE_PFN (1 << 20) + + /* + * partially used pages are not usable - thus + * we are rounding upwards: + */ +#ifdef CONFIG_BLK_DEV_INITRD + if ( start_info.mod_start ) + start_pfn = PFN_UP(__pa(start_info.mod_start + start_info.mod_len)); + else +#endif + start_pfn = PFN_UP(__pa(&_end)); + max_pfn = start_info.nr_pages; + + /* + * Determine low and high memory ranges: + */ + max_low_pfn = max_pfn; + if (max_low_pfn > MAXMEM_PFN) { + max_low_pfn = MAXMEM_PFN; +#ifndef CONFIG_HIGHMEM + /* Maximum memory usable is what is directly addressable */ + printk(KERN_WARNING "Warning only %ldMB will be used.\n", + MAXMEM>>20); + if (max_pfn > MAX_NONPAE_PFN) + printk(KERN_WARNING "Use a PAE enabled kernel.\n"); + else + printk(KERN_WARNING "Use a HIGHMEM enabled kernel.\n"); +#else /* !CONFIG_HIGHMEM */ +#ifndef CONFIG_X86_PAE + if (max_pfn > MAX_NONPAE_PFN) { + max_pfn = MAX_NONPAE_PFN; + printk(KERN_WARNING "Warning only 4GB will be used.\n"); + printk(KERN_WARNING "Use a PAE enabled kernel.\n"); + } +#endif /* !CONFIG_X86_PAE */ +#endif /* !CONFIG_HIGHMEM */ + } + +#ifdef CONFIG_HIGHMEM + highstart_pfn = highend_pfn = max_pfn; + if (max_pfn > MAXMEM_PFN) { + highstart_pfn = MAXMEM_PFN; + printk(KERN_NOTICE "%ldMB HIGHMEM available.\n", + pages_to_mb(highend_pfn - highstart_pfn)); + } +#endif + + /* + * Initialize the boot-time allocator, and free up all RAM. + * Then reserve space for OS image, and the bootmem bitmap. + */ + bootmap_size = init_bootmem(start_pfn, max_low_pfn); + free_bootmem(0, PFN_PHYS(max_low_pfn)); + reserve_bootmem(0, PFN_PHYS(start_pfn) + bootmap_size + PAGE_SIZE-1); + + /* Now reserve space for the hypervisor-provided page tables. */ + { + unsigned long *pgd = (unsigned long *)start_info.pt_base; + unsigned long pte; + int i; + reserve_bootmem(__pa(pgd), PAGE_SIZE); + for ( i = 0; i < (HYPERVISOR_VIRT_START>>22); i++ ) + { + unsigned long pgde = *pgd++; + if ( !(pgde & 1) ) continue; + pte = machine_to_phys(pgde & PAGE_MASK); + reserve_bootmem(pte, PAGE_SIZE); + } + } + cur_pgd = init_mm.pgd = (pgd_t *)start_info.pt_base; + + /* Now initialise the physical->machine mapping table. */ + phys_to_machine_mapping = alloc_bootmem(max_pfn * sizeof(unsigned long)); + for ( i = 0; i < max_pfn; i++ ) + { + unsigned long pgde, *ppte; + unsigned long pfn = i + (PAGE_OFFSET >> PAGE_SHIFT); + pgde = *((unsigned long *)start_info.pt_base + (pfn >> 10)); + ppte = (unsigned long *)machine_to_phys(pgde & PAGE_MASK) + (pfn&1023); + phys_to_machine_mapping[i] = + (*(unsigned long *)__va(ppte)) >> PAGE_SHIFT; + } + +#ifdef CONFIG_BLK_DEV_INITRD + if (start_info.mod_start) { + if ((__pa(start_info.mod_start) + start_info.mod_len) <= + (max_low_pfn << PAGE_SHIFT)) { + initrd_start = start_info.mod_start; + initrd_end = initrd_start + start_info.mod_len; + initrd_below_start_ok = 1; + } + else { + printk(KERN_ERR "initrd extends beyond end of memory " + "(0x%08lx > 0x%08lx)\ndisabling initrd\n", + __pa(start_info.mod_start) + start_info.mod_len, + max_low_pfn << PAGE_SHIFT); + initrd_start = 0; + } + } +#endif + + paging_init(); +} + +static int cachesize_override __initdata = -1; +static int __init cachesize_setup(char *str) +{ + get_option (&str, &cachesize_override); + return 1; +} +__setup("cachesize=", cachesize_setup); + + +static int __init get_model_name(struct cpuinfo_x86 *c) +{ + unsigned int *v; + char *p, *q; + + if (cpuid_eax(0x80000000) < 0x80000004) + return 0; + + v = (unsigned int *) c->x86_model_id; + cpuid(0x80000002, &v[0], &v[1], &v[2], &v[3]); + cpuid(0x80000003, &v[4], &v[5], &v[6], &v[7]); + cpuid(0x80000004, &v[8], &v[9], &v[10], &v[11]); + c->x86_model_id[48] = 0; + + /* Intel chips right-justify this string for some dumb reason; + undo that brain damage */ + p = q = &c->x86_model_id[0]; + while ( *p == ' ' ) + p++; + if ( p != q ) { + while ( *p ) + *q++ = *p++; + while ( q <= &c->x86_model_id[48] ) + *q++ = '\0'; /* Zero-pad the rest */ + } + + return 1; +} + + +static void __init display_cacheinfo(struct cpuinfo_x86 *c) +{ + unsigned int n, dummy, ecx, edx, l2size; + + n = cpuid_eax(0x80000000); + + if (n >= 0x80000005) { + cpuid(0x80000005, &dummy, &dummy, &ecx, &edx); + printk(KERN_INFO "CPU: L1 I Cache: %dK (%d bytes/line), D cache %dK (%d bytes/line)\n", + edx>>24, edx&0xFF, ecx>>24, ecx&0xFF); + c->x86_cache_size=(ecx>>24)+(edx>>24); + } + + if (n < 0x80000006) /* Some chips just has a large L1. */ + return; + + ecx = cpuid_ecx(0x80000006); + l2size = ecx >> 16; + + /* AMD errata T13 (order #21922) */ + if ((c->x86_vendor == X86_VENDOR_AMD) && (c->x86 == 6)) { + if (c->x86_model == 3 && c->x86_mask == 0) /* Duron Rev A0 */ + l2size = 64; + if (c->x86_model == 4 && + (c->x86_mask==0 || c->x86_mask==1)) /* Tbird rev A1/A2 */ + l2size = 256; + } + + /* Intel PIII Tualatin. This comes in two flavours. + * One has 256kb of cache, the other 512. We have no way + * to determine which, so we use a boottime override + * for the 512kb model, and assume 256 otherwise. + */ + if ((c->x86_vendor == X86_VENDOR_INTEL) && (c->x86 == 6) && + (c->x86_model == 11) && (l2size == 0)) + l2size = 256; + + /* VIA C3 CPUs (670-68F) need further shifting. */ + if (c->x86_vendor == X86_VENDOR_CENTAUR && (c->x86 == 6) && + ((c->x86_model == 7) || (c->x86_model == 8))) { + l2size = l2size >> 8; + } + + /* Allow user to override all this if necessary. */ + if (cachesize_override != -1) + l2size = cachesize_override; + + if ( l2size == 0 ) + return; /* Again, no L2 cache is possible */ + + c->x86_cache_size = l2size; + + printk(KERN_INFO "CPU: L2 Cache: %dK (%d bytes/line)\n", + l2size, ecx & 0xFF); +} + + +static int __init init_amd(struct cpuinfo_x86 *c) +{ + int r; + + /* Bit 31 in normal CPUID used for nonstandard 3DNow ID; + 3DNow is IDd by bit 31 in extended CPUID (1*32+31) anyway */ + clear_bit(0*32+31, &c->x86_capability); + + r = get_model_name(c); + + switch(c->x86) + { + case 6: /* An Athlon/Duron. We can trust the BIOS probably */ + break; + default: + panic("Unsupported AMD processor\n"); + } + + display_cacheinfo(c); + return r; +} + + +static void __init init_intel(struct cpuinfo_x86 *c) +{ + char *p = NULL; + unsigned int l1i = 0, l1d = 0, l2 = 0, l3 = 0; /* Cache sizes */ + + if (c->cpuid_level > 1) { + /* supports eax=2 call */ + int i, j, n; + int regs[4]; + unsigned char *dp = (unsigned char *)regs; + + /* Number of times to iterate */ + n = cpuid_eax(2) & 0xFF; + + for ( i = 0 ; i < n ; i++ ) { + cpuid(2, ®s[0], ®s[1], ®s[2], ®s[3]); + + /* If bit 31 is set, this is an unknown format */ + for ( j = 0 ; j < 3 ; j++ ) { + if ( regs[j] < 0 ) regs[j] = 0; + } + + /* Byte 0 is level count, not a descriptor */ + for ( j = 1 ; j < 16 ; j++ ) { + unsigned char des = dp[j]; + unsigned char dl, dh; + unsigned int cs; + + dh = des >> 4; + dl = des & 0x0F; + + /* Black magic... */ + + switch ( dh ) + { + case 0: + switch ( dl ) { + case 6: + /* L1 I cache */ + l1i += 8; + break; + case 8: + /* L1 I cache */ + l1i += 16; + break; + case 10: + /* L1 D cache */ + l1d += 8; + break; + case 12: + /* L1 D cache */ + l1d += 16; + break; + default:; + /* TLB, or unknown */ + } + break; + case 2: + if ( dl ) { + /* L3 cache */ + cs = (dl-1) << 9; + l3 += cs; + } + break; + case 4: + if ( c->x86 > 6 && dl ) { + /* P4 family */ + /* L3 cache */ + cs = 128 << (dl-1); + l3 += cs; + break; + } + /* else same as 8 - fall through */ + case 8: + if ( dl ) { + /* L2 cache */ + cs = 128 << (dl-1); + l2 += cs; + } + break; + case 6: + if (dl > 5) { + /* L1 D cache */ + cs = 8<<(dl-6); + l1d += cs; + } + break; + case 7: + if ( dl >= 8 ) + { + /* L2 cache */ + cs = 64<<(dl-8); + l2 += cs; + } else { + /* L0 I cache, count as L1 */ + cs = dl ? (16 << (dl-1)) : 12; + l1i += cs; + } + break; + default: + /* TLB, or something else we don't know about */ + break; + } + } + } + if ( l1i || l1d ) + printk(KERN_INFO "CPU: L1 I cache: %dK, L1 D cache: %dK\n", + l1i, l1d); + if ( l2 ) + printk(KERN_INFO "CPU: L2 cache: %dK\n", l2); + if ( l3 ) + printk(KERN_INFO "CPU: L3 cache: %dK\n", l3); + + /* + * This assumes the L3 cache is shared; it typically lives in + * the northbridge. The L1 caches are included by the L2 + * cache, and so should not be included for the purpose of + * SMP switching weights. + */ + c->x86_cache_size = l2 ? l2 : (l1i+l1d); + } + + /* SEP CPUID bug: Pentium Pro reports SEP but doesn't have it */ + if ( c->x86 == 6 && c->x86_model < 3 && c->x86_mask < 3 ) + clear_bit(X86_FEATURE_SEP, &c->x86_capability); + + /* Names for the Pentium II/Celeron processors + detectable only by also checking the cache size. + Dixon is NOT a Celeron. */ + if (c->x86 == 6) { + switch (c->x86_model) { + case 5: + if (l2 == 0) + p = "Celeron (Covington)"; + if (l2 == 256) + p = "Mobile Pentium II (Dixon)"; + break; + + case 6: + if (l2 == 128) + p = "Celeron (Mendocino)"; + break; + + case 8: + if (l2 == 128) + p = "Celeron (Coppermine)"; + break; + } + } + + if ( p ) + strcpy(c->x86_model_id, p); +} + +void __init get_cpu_vendor(struct cpuinfo_x86 *c) +{ + char *v = c->x86_vendor_id; + + if (!strcmp(v, "GenuineIntel")) + c->x86_vendor = X86_VENDOR_INTEL; + else if (!strcmp(v, "AuthenticAMD")) + c->x86_vendor = X86_VENDOR_AMD; + else + c->x86_vendor = X86_VENDOR_UNKNOWN; +} + +struct cpu_model_info { + int vendor; + int family; + char *model_names[16]; +}; + +/* Naming convention should be: [()] */ +/* This table only is used unless init_() below doesn't set it; */ +/* in particular, if CPUID levels 0x80000002..4 are supported, this isn't used */ +static struct cpu_model_info cpu_models[] __initdata = { + { X86_VENDOR_INTEL, 6, + { "Pentium Pro A-step", "Pentium Pro", NULL, "Pentium II (Klamath)", + NULL, "Pentium II (Deschutes)", "Mobile Pentium II", + "Pentium III (Katmai)", "Pentium III (Coppermine)", NULL, + "Pentium III (Cascades)", NULL, NULL, NULL, NULL }}, + { X86_VENDOR_AMD, 6, /* Is this this really necessary?? */ + { "Athlon", "Athlon", + "Athlon", NULL, "Athlon", NULL, + NULL, NULL, NULL, + NULL, NULL, NULL, NULL, NULL, NULL, NULL }} +}; + +/* Look up CPU names by table lookup. */ +static char __init *table_lookup_model(struct cpuinfo_x86 *c) +{ + struct cpu_model_info *info = cpu_models; + int i; + + if ( c->x86_model >= 16 ) + return NULL; /* Range check */ + + for ( i = 0 ; i < sizeof(cpu_models)/sizeof(struct cpu_model_info) ; i++ ) { + if ( info->vendor == c->x86_vendor && + info->family == c->x86 ) { + return info->model_names[c->x86_model]; + } + info++; + } + return NULL; /* Not found */ +} + + + +/* Standard macro to see if a specific flag is changeable */ +static inline int flag_is_changeable_p(u32 flag) +{ + u32 f1, f2; + + asm("pushfl\n\t" + "pushfl\n\t" + "popl %0\n\t" + "movl %0,%1\n\t" + "xorl %2,%0\n\t" + "pushl %0\n\t" + "popfl\n\t" + "pushfl\n\t" + "popl %0\n\t" + "popfl\n\t" + : "=&r" (f1), "=&r" (f2) + : "ir" (flag)); + + return ((f1^f2) & flag) != 0; +} + + +/* Probe for the CPUID instruction */ +static int __init have_cpuid_p(void) +{ + return flag_is_changeable_p(X86_EFLAGS_ID); +} + + + +/* + * This does the hard work of actually picking apart the CPU stuff... + */ +void __init identify_cpu(struct cpuinfo_x86 *c) +{ + int junk, i; + u32 xlvl, tfms; + + c->loops_per_jiffy = loops_per_jiffy; + c->x86_cache_size = -1; + c->x86_vendor = X86_VENDOR_UNKNOWN; + c->cpuid_level = -1; /* CPUID not detected */ + c->x86_model = c->x86_mask = 0; /* So far unknown... */ + c->x86_vendor_id[0] = '\0'; /* Unset */ + c->x86_model_id[0] = '\0'; /* Unset */ + memset(&c->x86_capability, 0, sizeof c->x86_capability); + c->hard_math = 1; + + if ( !have_cpuid_p() ) { + panic("Processor must support CPUID\n"); + } else { + /* CPU does have CPUID */ + + /* Get vendor name */ + cpuid(0x00000000, &c->cpuid_level, + (int *)&c->x86_vendor_id[0], + (int *)&c->x86_vendor_id[8], + (int *)&c->x86_vendor_id[4]); + + get_cpu_vendor(c); + /* Initialize the standard set of capabilities */ + /* Note that the vendor-specific code below might override */ + + /* Intel-defined flags: level 0x00000001 */ + if ( c->cpuid_level >= 0x00000001 ) { + cpuid(0x00000001, &tfms, &junk, &junk, + &c->x86_capability[0]); + c->x86 = (tfms >> 8) & 15; + c->x86_model = (tfms >> 4) & 15; + c->x86_mask = tfms & 15; + } else { + /* Have CPUID level 0 only - unheard of */ + c->x86 = 4; + } + + /* AMD-defined flags: level 0x80000001 */ + xlvl = cpuid_eax(0x80000000); + if ( (xlvl & 0xffff0000) == 0x80000000 ) { + if ( xlvl >= 0x80000001 ) + c->x86_capability[1] = cpuid_edx(0x80000001); + if ( xlvl >= 0x80000004 ) + get_model_name(c); /* Default name */ + } + + /* Transmeta-defined flags: level 0x80860001 */ + xlvl = cpuid_eax(0x80860000); + if ( (xlvl & 0xffff0000) == 0x80860000 ) { + if ( xlvl >= 0x80860001 ) + c->x86_capability[2] = cpuid_edx(0x80860001); + } + } + + printk(KERN_DEBUG "CPU: Before vendor init, caps: %08x %08x %08x, vendor = %d\n", + c->x86_capability[0], + c->x86_capability[1], + c->x86_capability[2], + c->x86_vendor); + + /* + * Vendor-specific initialization. In this section we + * canonicalize the feature flags, meaning if there are + * features a certain CPU supports which CPUID doesn't + * tell us, CPUID claiming incorrect flags, or other bugs, + * we handle them here. + * + * At the end of this section, c->x86_capability better + * indicate the features this CPU genuinely supports! + */ + switch ( c->x86_vendor ) { + case X86_VENDOR_AMD: + init_amd(c); + break; + + case X86_VENDOR_INTEL: + init_intel(c); + break; + + default: + panic("Unsupported CPU vendor\n"); + } + + printk(KERN_DEBUG "CPU: After vendor init, caps: %08x %08x %08x %08x\n", + c->x86_capability[0], + c->x86_capability[1], + c->x86_capability[2], + c->x86_capability[3]); + + + /* If the model name is still unset, do table lookup. */ + if ( !c->x86_model_id[0] ) { + char *p; + p = table_lookup_model(c); + if ( p ) + strcpy(c->x86_model_id, p); + else + /* Last resort... */ + sprintf(c->x86_model_id, "%02x/%02x", + c->x86_vendor, c->x86_model); + } + + /* Now the feature flags better reflect actual CPU features! */ + + printk(KERN_DEBUG "CPU: After generic, caps: %08x %08x %08x %08x\n", + c->x86_capability[0], + c->x86_capability[1], + c->x86_capability[2], + c->x86_capability[3]); + + /* + * On SMP, boot_cpu_data holds the common feature set between + * all CPUs; so make sure that we indicate which features are + * common between the CPUs. The first time this routine gets + * executed, c == &boot_cpu_data. + */ + if ( c != &boot_cpu_data ) { + /* AND the already accumulated flags with these */ + for ( i = 0 ; i < NCAPINTS ; i++ ) + boot_cpu_data.x86_capability[i] &= c->x86_capability[i]; + } + + printk(KERN_DEBUG "CPU: Common caps: %08x %08x %08x %08x\n", + boot_cpu_data.x86_capability[0], + boot_cpu_data.x86_capability[1], + boot_cpu_data.x86_capability[2], + boot_cpu_data.x86_capability[3]); +} + + +/* These need to match */ +static char *cpu_vendor_names[] __initdata = { + "Intel", "Cyrix", "AMD", "UMC", "NexGen", "Centaur", "Rise", "Transmeta" }; + + +void __init print_cpu_info(struct cpuinfo_x86 *c) +{ + char *vendor = NULL; + + if (c->x86_vendor < sizeof(cpu_vendor_names)/sizeof(char *)) + vendor = cpu_vendor_names[c->x86_vendor]; + else if (c->cpuid_level >= 0) + vendor = c->x86_vendor_id; + + if (vendor && strncmp(c->x86_model_id, vendor, strlen(vendor))) + printk("%s ", vendor); + + if (!c->x86_model_id[0]) + printk("%d86", c->x86); + else + printk("%s", c->x86_model_id); + + if (c->x86_mask || c->cpuid_level >= 0) + printk(" stepping %02x\n", c->x86_mask); + else + printk("\n"); +} + +/* + * Get CPU information for use by the procfs. + */ +static int show_cpuinfo(struct seq_file *m, void *v) +{ + /* + * These flag bits must match the definitions in . + * NULL means this bit is undefined or reserved; either way it doesn't + * have meaning as far as Linux is concerned. Note that it's important + * to realize there is a difference between this table and CPUID -- if + * applications want to get the raw CPUID data, they should access + * /dev/cpu//cpuid instead. + */ + static char *x86_cap_flags[] = { + /* Intel-defined */ + "fpu", "vme", "de", "pse", "tsc", "msr", "pae", "mce", + "cx8", "apic", NULL, "sep", "mtrr", "pge", "mca", "cmov", + "pat", "pse36", "pn", "clflush", NULL, "dts", "acpi", "mmx", + "fxsr", "sse", "sse2", "ss", NULL, "tm", "ia64", NULL, + + /* AMD-defined */ + NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, + NULL, NULL, NULL, "syscall", NULL, NULL, NULL, NULL, + NULL, NULL, NULL, NULL, NULL, NULL, "mmxext", NULL, + NULL, NULL, NULL, NULL, NULL, "lm", "3dnowext", "3dnow", + + /* Transmeta-defined */ + "recovery", "longrun", NULL, "lrti", NULL, NULL, NULL, NULL, + NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, + NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, + NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, + + /* Other (Linux-defined) */ + "cxmmx", "k6_mtrr", "cyrix_arr", "centaur_mcr", NULL, NULL, NULL, NULL, + NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, + NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, + NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, + }; + struct cpuinfo_x86 *c = v; + int i, n = c - cpu_data; + int fpu_exception; + +#ifdef CONFIG_SMP + if (!(cpu_online_map & (1<x86_vendor_id[0] ? c->x86_vendor_id : "unknown", + c->x86, + c->x86_model, + c->x86_model_id[0] ? c->x86_model_id : "unknown"); + + if (c->x86_mask || c->cpuid_level >= 0) + seq_printf(m, "stepping\t: %d\n", c->x86_mask); + else + seq_printf(m, "stepping\t: unknown\n"); + + if ( test_bit(X86_FEATURE_TSC, &c->x86_capability) ) { + seq_printf(m, "cpu MHz\t\t: %lu.%03lu\n", + cpu_khz / 1000, (cpu_khz % 1000)); + } + + /* Cache size */ + if (c->x86_cache_size >= 0) + seq_printf(m, "cache size\t: %d KB\n", c->x86_cache_size); + + /* We use exception 16 if we have hardware math and we've either seen it or the CPU claims it is internal */ + fpu_exception = c->hard_math && (ignore_irq13 || cpu_has_fpu); + seq_printf(m, "fdiv_bug\t: %s\n" + "hlt_bug\t\t: %s\n" + "f00f_bug\t: %s\n" + "coma_bug\t: %s\n" + "fpu\t\t: %s\n" + "fpu_exception\t: %s\n" + "cpuid level\t: %d\n" + "wp\t\t: %s\n" + "flags\t\t:", + c->fdiv_bug ? "yes" : "no", + c->hlt_works_ok ? "no" : "yes", + c->f00f_bug ? "yes" : "no", + c->coma_bug ? "yes" : "no", + c->hard_math ? "yes" : "no", + fpu_exception ? "yes" : "no", + c->cpuid_level, + c->wp_works_ok ? "yes" : "no"); + + for ( i = 0 ; i < 32*NCAPINTS ; i++ ) + if ( test_bit(i, &c->x86_capability) && + x86_cap_flags[i] != NULL ) + seq_printf(m, " %s", x86_cap_flags[i]); + + seq_printf(m, "\nbogomips\t: %lu.%02lu\n\n", + c->loops_per_jiffy/(500000/HZ), + (c->loops_per_jiffy/(5000/HZ)) % 100); + return 0; +} + +static void *c_start(struct seq_file *m, loff_t *pos) +{ + return *pos < NR_CPUS ? cpu_data + *pos : NULL; +} +static void *c_next(struct seq_file *m, void *v, loff_t *pos) +{ + ++*pos; + return c_start(m, pos); +} +static void c_stop(struct seq_file *m, void *v) +{ +} +struct seq_operations cpuinfo_op = { + start: c_start, + next: c_next, + stop: c_stop, + show: show_cpuinfo, +}; + +unsigned long cpu_initialized __initdata = 0; + +/* + * cpu_init() initializes state that is per-CPU. Some data is already + * initialized (naturally) in the bootstrap process, such as the GDT + * and IDT. We reload them nevertheless, this function acts as a + * 'CPU state barrier', nothing should get across. + */ +void __init cpu_init (void) +{ + int nr = smp_processor_id(); + + if (test_and_set_bit(nr, &cpu_initialized)) { + printk(KERN_WARNING "CPU#%d already initialized!\n", nr); + for (;;) __sti(); + } + printk(KERN_INFO "Initializing CPU#%d\n", nr); + + /* + * set up and load the per-CPU TSS and LDT + */ + atomic_inc(&init_mm.mm_count); + current->active_mm = &init_mm; + if(current->mm) + BUG(); + enter_lazy_tlb(&init_mm, current, nr); + + HYPERVISOR_stack_and_ldt_switch(__KERNEL_DS, current->thread.esp0, 0); + + /* Force FPU initialization. */ + current->flags &= ~PF_USEDFPU; + current->used_math = 0; + stts(); +} + + +/****************************************************************************** + * Time-to-die callback handling. + */ + +static void time_to_die(int irq, void *unused, struct pt_regs *regs) +{ + extern void ctrl_alt_del(void); + ctrl_alt_del(); +} + +static int __init setup_death_event(void) +{ + (void)request_irq(_EVENT_DIE, time_to_die, 0, "die", NULL); + return 0; +} + +__initcall(setup_death_event); diff --git a/xenolinux-2.4.21-pre4-sparse/arch/xeno/kernel/signal.c b/xenolinux-2.4.21-pre4-sparse/arch/xeno/kernel/signal.c new file mode 100644 index 0000000000..f646c5c0ca --- /dev/null +++ b/xenolinux-2.4.21-pre4-sparse/arch/xeno/kernel/signal.c @@ -0,0 +1,717 @@ +/* + * linux/arch/i386/kernel/signal.c + * + * Copyright (C) 1991, 1992 Linus Torvalds + * + * 1997-11-28 Modified for POSIX.1b signals by Richard Henderson + * 2000-06-20 Pentium III FXSR, SSE support by Gareth Hughes + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#define DEBUG_SIG 0 + +#define _BLOCKABLE (~(sigmask(SIGKILL) | sigmask(SIGSTOP))) + +int FASTCALL(do_signal(struct pt_regs *regs, sigset_t *oldset)); + +int copy_siginfo_to_user(siginfo_t *to, siginfo_t *from) +{ + if (!access_ok (VERIFY_WRITE, to, sizeof(siginfo_t))) + return -EFAULT; + if (from->si_code < 0) + return __copy_to_user(to, from, sizeof(siginfo_t)); + else { + int err; + + /* If you change siginfo_t structure, please be sure + this code is fixed accordingly. + It should never copy any pad contained in the structure + to avoid security leaks, but must copy the generic + 3 ints plus the relevant union member. */ + err = __put_user(from->si_signo, &to->si_signo); + err |= __put_user(from->si_errno, &to->si_errno); + err |= __put_user((short)from->si_code, &to->si_code); + /* First 32bits of unions are always present. */ + err |= __put_user(from->si_pid, &to->si_pid); + switch (from->si_code >> 16) { + case __SI_FAULT >> 16: + break; + case __SI_CHLD >> 16: + err |= __put_user(from->si_utime, &to->si_utime); + err |= __put_user(from->si_stime, &to->si_stime); + err |= __put_user(from->si_status, &to->si_status); + default: + err |= __put_user(from->si_uid, &to->si_uid); + break; + /* case __SI_RT: This is not generated by the kernel as of now. */ + } + return err; + } +} + +/* + * Atomically swap in the new signal mask, and wait for a signal. + */ +asmlinkage int +sys_sigsuspend(int history0, int history1, old_sigset_t mask) +{ + struct pt_regs * regs = (struct pt_regs *) &history0; + sigset_t saveset; + + mask &= _BLOCKABLE; + spin_lock_irq(¤t->sigmask_lock); + saveset = current->blocked; + siginitset(¤t->blocked, mask); + recalc_sigpending(current); + spin_unlock_irq(¤t->sigmask_lock); + + regs->eax = -EINTR; + while (1) { + current->state = TASK_INTERRUPTIBLE; + schedule(); + if (do_signal(regs, &saveset)) + return -EINTR; + } +} + +asmlinkage int +sys_rt_sigsuspend(sigset_t *unewset, size_t sigsetsize) +{ + struct pt_regs * regs = (struct pt_regs *) &unewset; + sigset_t saveset, newset; + + /* XXX: Don't preclude handling different sized sigset_t's. */ + if (sigsetsize != sizeof(sigset_t)) + return -EINVAL; + + if (copy_from_user(&newset, unewset, sizeof(newset))) + return -EFAULT; + sigdelsetmask(&newset, ~_BLOCKABLE); + + spin_lock_irq(¤t->sigmask_lock); + saveset = current->blocked; + current->blocked = newset; + recalc_sigpending(current); + spin_unlock_irq(¤t->sigmask_lock); + + regs->eax = -EINTR; + while (1) { + current->state = TASK_INTERRUPTIBLE; + schedule(); + if (do_signal(regs, &saveset)) + return -EINTR; + } +} + +asmlinkage int +sys_sigaction(int sig, const struct old_sigaction *act, + struct old_sigaction *oact) +{ + struct k_sigaction new_ka, old_ka; + int ret; + + if (act) { + old_sigset_t mask; + if (verify_area(VERIFY_READ, act, sizeof(*act)) || + __get_user(new_ka.sa.sa_handler, &act->sa_handler) || + __get_user(new_ka.sa.sa_restorer, &act->sa_restorer)) + return -EFAULT; + __get_user(new_ka.sa.sa_flags, &act->sa_flags); + __get_user(mask, &act->sa_mask); + siginitset(&new_ka.sa.sa_mask, mask); + } + + ret = do_sigaction(sig, act ? &new_ka : NULL, oact ? &old_ka : NULL); + + if (!ret && oact) { + if (verify_area(VERIFY_WRITE, oact, sizeof(*oact)) || + __put_user(old_ka.sa.sa_handler, &oact->sa_handler) || + __put_user(old_ka.sa.sa_restorer, &oact->sa_restorer)) + return -EFAULT; + __put_user(old_ka.sa.sa_flags, &oact->sa_flags); + __put_user(old_ka.sa.sa_mask.sig[0], &oact->sa_mask); + } + + return ret; +} + +asmlinkage int +sys_sigaltstack(const stack_t *uss, stack_t *uoss) +{ + struct pt_regs *regs = (struct pt_regs *) &uss; + return do_sigaltstack(uss, uoss, regs->esp); +} + + +/* + * Do a signal return; undo the signal stack. + */ + +struct sigframe +{ + char *pretcode; + int sig; + struct sigcontext sc; + struct _fpstate fpstate; + unsigned long extramask[_NSIG_WORDS-1]; + char retcode[8]; +}; + +struct rt_sigframe +{ + char *pretcode; + int sig; + struct siginfo *pinfo; + void *puc; + struct siginfo info; + struct ucontext uc; + struct _fpstate fpstate; + char retcode[8]; +}; + +static int +restore_sigcontext(struct pt_regs *regs, struct sigcontext *sc, int *peax) +{ + unsigned int err = 0; + +#define COPY(x) err |= __get_user(regs->x, &sc->x) + +#define COPY_SEG(seg) \ + { unsigned short tmp; \ + err |= __get_user(tmp, &sc->seg); \ + regs->x##seg = tmp; } + +#define COPY_SEG_STRICT(seg) \ + { unsigned short tmp; \ + err |= __get_user(tmp, &sc->seg); \ + regs->x##seg = tmp|3; } + +#define GET_SEG(seg) \ + { unsigned short tmp; \ + err |= __get_user(tmp, &sc->seg); \ + loadsegment(seg,tmp); } + + GET_SEG(gs); + GET_SEG(fs); + COPY_SEG(es); + COPY_SEG(ds); + COPY(edi); + COPY(esi); + COPY(ebp); + COPY(esp); + COPY(ebx); + COPY(edx); + COPY(ecx); + COPY(eip); + COPY_SEG_STRICT(cs); + COPY_SEG_STRICT(ss); + + { + unsigned int tmpflags; + err |= __get_user(tmpflags, &sc->eflags); + regs->eflags = (regs->eflags & ~0x40DD5) | (tmpflags & 0x40DD5); + regs->orig_eax = -1; /* disable syscall checks */ + } + + { + struct _fpstate * buf; + err |= __get_user(buf, &sc->fpstate); + if (buf) { + if (verify_area(VERIFY_READ, buf, sizeof(*buf))) + goto badframe; + err |= restore_i387(buf); + } + } + + err |= __get_user(*peax, &sc->eax); + return err; + +badframe: + return 1; +} + +asmlinkage int sys_sigreturn(unsigned long __unused) +{ + struct pt_regs *regs = (struct pt_regs *) &__unused; + struct sigframe *frame = (struct sigframe *)(regs->esp - 8); + sigset_t set; + int eax; + + if (verify_area(VERIFY_READ, frame, sizeof(*frame))) + goto badframe; + if (__get_user(set.sig[0], &frame->sc.oldmask) + || (_NSIG_WORDS > 1 + && __copy_from_user(&set.sig[1], &frame->extramask, + sizeof(frame->extramask)))) + goto badframe; + + sigdelsetmask(&set, ~_BLOCKABLE); + spin_lock_irq(¤t->sigmask_lock); + current->blocked = set; + recalc_sigpending(current); + spin_unlock_irq(¤t->sigmask_lock); + + if (restore_sigcontext(regs, &frame->sc, &eax)) + goto badframe; + return eax; + +badframe: + force_sig(SIGSEGV, current); + return 0; +} + +asmlinkage int sys_rt_sigreturn(unsigned long __unused) +{ + struct pt_regs *regs = (struct pt_regs *) &__unused; + struct rt_sigframe *frame = (struct rt_sigframe *)(regs->esp - 4); + sigset_t set; + stack_t st; + int eax; + + if (verify_area(VERIFY_READ, frame, sizeof(*frame))) + goto badframe; + if (__copy_from_user(&set, &frame->uc.uc_sigmask, sizeof(set))) + goto badframe; + + sigdelsetmask(&set, ~_BLOCKABLE); + spin_lock_irq(¤t->sigmask_lock); + current->blocked = set; + recalc_sigpending(current); + spin_unlock_irq(¤t->sigmask_lock); + + if (restore_sigcontext(regs, &frame->uc.uc_mcontext, &eax)) + goto badframe; + + if (__copy_from_user(&st, &frame->uc.uc_stack, sizeof(st))) + goto badframe; + /* It is more difficult to avoid calling this function than to + call it and ignore errors. */ + do_sigaltstack(&st, NULL, regs->esp); + + return eax; + +badframe: + force_sig(SIGSEGV, current); + return 0; +} + +/* + * Set up a signal frame. + */ + +static int +setup_sigcontext(struct sigcontext *sc, struct _fpstate *fpstate, + struct pt_regs *regs, unsigned long mask) +{ + int tmp, err = 0; + + tmp = 0; + __asm__("movl %%gs,%0" : "=r"(tmp): "0"(tmp)); + err |= __put_user(tmp, (unsigned int *)&sc->gs); + __asm__("movl %%fs,%0" : "=r"(tmp): "0"(tmp)); + err |= __put_user(tmp, (unsigned int *)&sc->fs); + + err |= __put_user(regs->xes, (unsigned int *)&sc->es); + err |= __put_user(regs->xds, (unsigned int *)&sc->ds); + err |= __put_user(regs->edi, &sc->edi); + err |= __put_user(regs->esi, &sc->esi); + err |= __put_user(regs->ebp, &sc->ebp); + err |= __put_user(regs->esp, &sc->esp); + err |= __put_user(regs->ebx, &sc->ebx); + err |= __put_user(regs->edx, &sc->edx); + err |= __put_user(regs->ecx, &sc->ecx); + err |= __put_user(regs->eax, &sc->eax); + err |= __put_user(current->thread.trap_no, &sc->trapno); + err |= __put_user(current->thread.error_code, &sc->err); + err |= __put_user(regs->eip, &sc->eip); + err |= __put_user(regs->xcs, (unsigned int *)&sc->cs); + err |= __put_user(regs->eflags, &sc->eflags); + err |= __put_user(regs->esp, &sc->esp_at_signal); + err |= __put_user(regs->xss, (unsigned int *)&sc->ss); + + tmp = save_i387(fpstate); + if (tmp < 0) + err = 1; + else + err |= __put_user(tmp ? fpstate : NULL, &sc->fpstate); + + /* non-iBCS2 extensions.. */ + err |= __put_user(mask, &sc->oldmask); + err |= __put_user(current->thread.cr2, &sc->cr2); + + return err; +} + +/* + * Determine which stack to use.. + */ +static inline void * +get_sigframe(struct k_sigaction *ka, struct pt_regs * regs, size_t frame_size) +{ + unsigned long esp; + + /* Default to using normal stack */ + esp = regs->esp; + + /* This is the X/Open sanctioned signal stack switching. */ + if (ka->sa.sa_flags & SA_ONSTACK) { + if (sas_ss_flags(esp) == 0) + esp = current->sas_ss_sp + current->sas_ss_size; + } + + /* This is the legacy signal stack switching. */ + else if ((regs->xss & 0xffff) != __USER_DS && + !(ka->sa.sa_flags & SA_RESTORER) && + ka->sa.sa_restorer) { + esp = (unsigned long) ka->sa.sa_restorer; + } + + return (void *)((esp - frame_size) & -8ul); +} + +static void setup_frame(int sig, struct k_sigaction *ka, + sigset_t *set, struct pt_regs * regs) +{ + struct sigframe *frame; + int err = 0; + + frame = get_sigframe(ka, regs, sizeof(*frame)); + + if (!access_ok(VERIFY_WRITE, frame, sizeof(*frame))) + goto give_sigsegv; + + err |= __put_user((current->exec_domain + && current->exec_domain->signal_invmap + && sig < 32 + ? current->exec_domain->signal_invmap[sig] + : sig), + &frame->sig); + if (err) + goto give_sigsegv; + + err |= setup_sigcontext(&frame->sc, &frame->fpstate, regs, set->sig[0]); + if (err) + goto give_sigsegv; + + if (_NSIG_WORDS > 1) { + err |= __copy_to_user(frame->extramask, &set->sig[1], + sizeof(frame->extramask)); + } + if (err) + goto give_sigsegv; + + /* Set up to return from userspace. If provided, use a stub + already in userspace. */ + if (ka->sa.sa_flags & SA_RESTORER) { + err |= __put_user(ka->sa.sa_restorer, &frame->pretcode); + } else { + err |= __put_user(frame->retcode, &frame->pretcode); + /* This is popl %eax ; movl $,%eax ; int $0x80 */ + err |= __put_user(0xb858, (short *)(frame->retcode+0)); + err |= __put_user(__NR_sigreturn, (int *)(frame->retcode+2)); + err |= __put_user(0x80cd, (short *)(frame->retcode+6)); + } + + if (err) + goto give_sigsegv; + + /* Set up registers for signal handler */ + regs->esp = (unsigned long) frame; + regs->eip = (unsigned long) ka->sa.sa_handler; + + set_fs(USER_DS); + regs->xds = __USER_DS; + regs->xes = __USER_DS; + regs->xss = __USER_DS; + regs->xcs = __USER_CS; + regs->eflags &= ~TF_MASK; + +#if DEBUG_SIG + printk("SIG deliver (%s:%d): sp=%p pc=%p ra=%p\n", + current->comm, current->pid, frame, regs->eip, frame->pretcode); +#endif + + return; + +give_sigsegv: + if (sig == SIGSEGV) + ka->sa.sa_handler = SIG_DFL; + force_sig(SIGSEGV, current); +} + +static void setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info, + sigset_t *set, struct pt_regs * regs) +{ + struct rt_sigframe *frame; + int err = 0; + + frame = get_sigframe(ka, regs, sizeof(*frame)); + + if (!access_ok(VERIFY_WRITE, frame, sizeof(*frame))) + goto give_sigsegv; + + err |= __put_user((current->exec_domain + && current->exec_domain->signal_invmap + && sig < 32 + ? current->exec_domain->signal_invmap[sig] + : sig), + &frame->sig); + err |= __put_user(&frame->info, &frame->pinfo); + err |= __put_user(&frame->uc, &frame->puc); + err |= copy_siginfo_to_user(&frame->info, info); + if (err) + goto give_sigsegv; + + /* Create the ucontext. */ + err |= __put_user(0, &frame->uc.uc_flags); + err |= __put_user(0, &frame->uc.uc_link); + err |= __put_user(current->sas_ss_sp, &frame->uc.uc_stack.ss_sp); + err |= __put_user(sas_ss_flags(regs->esp), + &frame->uc.uc_stack.ss_flags); + err |= __put_user(current->sas_ss_size, &frame->uc.uc_stack.ss_size); + err |= setup_sigcontext(&frame->uc.uc_mcontext, &frame->fpstate, + regs, set->sig[0]); + err |= __copy_to_user(&frame->uc.uc_sigmask, set, sizeof(*set)); + if (err) + goto give_sigsegv; + + /* Set up to return from userspace. If provided, use a stub + already in userspace. */ + if (ka->sa.sa_flags & SA_RESTORER) { + err |= __put_user(ka->sa.sa_restorer, &frame->pretcode); + } else { + err |= __put_user(frame->retcode, &frame->pretcode); + /* This is movl $,%eax ; int $0x80 */ + err |= __put_user(0xb8, (char *)(frame->retcode+0)); + err |= __put_user(__NR_rt_sigreturn, (int *)(frame->retcode+1)); + err |= __put_user(0x80cd, (short *)(frame->retcode+5)); + } + + if (err) + goto give_sigsegv; + + /* Set up registers for signal handler */ + regs->esp = (unsigned long) frame; + regs->eip = (unsigned long) ka->sa.sa_handler; + + set_fs(USER_DS); + regs->xds = __USER_DS; + regs->xes = __USER_DS; + regs->xss = __USER_DS; + regs->xcs = __USER_CS; + regs->eflags &= ~TF_MASK; + +#if DEBUG_SIG + printk("SIG deliver (%s:%d): sp=%p pc=%p ra=%p\n", + current->comm, current->pid, frame, regs->eip, frame->pretcode); +#endif + + return; + +give_sigsegv: + if (sig == SIGSEGV) + ka->sa.sa_handler = SIG_DFL; + force_sig(SIGSEGV, current); +} + +/* + * OK, we're invoking a handler + */ + +static void +handle_signal(unsigned long sig, struct k_sigaction *ka, + siginfo_t *info, sigset_t *oldset, struct pt_regs * regs) +{ + /* Are we from a system call? */ + if (regs->orig_eax >= 0) { + /* If so, check system call restarting.. */ + switch (regs->eax) { + case -ERESTARTNOHAND: + regs->eax = -EINTR; + break; + + case -ERESTARTSYS: + if (!(ka->sa.sa_flags & SA_RESTART)) { + regs->eax = -EINTR; + break; + } + /* fallthrough */ + case -ERESTARTNOINTR: + regs->eax = regs->orig_eax; + regs->eip -= 2; + } + } + + /* Set up the stack frame */ + if (ka->sa.sa_flags & SA_SIGINFO) + setup_rt_frame(sig, ka, info, oldset, regs); + else + setup_frame(sig, ka, oldset, regs); + + if (ka->sa.sa_flags & SA_ONESHOT) + ka->sa.sa_handler = SIG_DFL; + + if (!(ka->sa.sa_flags & SA_NODEFER)) { + spin_lock_irq(¤t->sigmask_lock); + sigorsets(¤t->blocked,¤t->blocked,&ka->sa.sa_mask); + sigaddset(¤t->blocked,sig); + recalc_sigpending(current); + spin_unlock_irq(¤t->sigmask_lock); + } +} + +/* + * Note that 'init' is a special process: it doesn't get signals it doesn't + * want to handle. Thus you cannot kill init even with a SIGKILL even by + * mistake. + */ +int do_signal(struct pt_regs *regs, sigset_t *oldset) +{ + siginfo_t info; + struct k_sigaction *ka; + + /* + * We want the common case to go fast, which + * is why we may in certain cases get here from + * kernel mode. Just return without doing anything + * if so. + */ + if ((regs->xcs & 2) != 2) + return 1; + + if (!oldset) + oldset = ¤t->blocked; + + for (;;) { + unsigned long signr; + + spin_lock_irq(¤t->sigmask_lock); + signr = dequeue_signal(¤t->blocked, &info); + spin_unlock_irq(¤t->sigmask_lock); + + if (!signr) + break; + + if ((current->ptrace & PT_PTRACED) && signr != SIGKILL) { + /* Let the debugger run. */ + current->exit_code = signr; + current->state = TASK_STOPPED; + notify_parent(current, SIGCHLD); + schedule(); + + /* We're back. Did the debugger cancel the sig? */ + if (!(signr = current->exit_code)) + continue; + current->exit_code = 0; + + /* The debugger continued. Ignore SIGSTOP. */ + if (signr == SIGSTOP) + continue; + + /* Update the siginfo structure. Is this good? */ + if (signr != info.si_signo) { + info.si_signo = signr; + info.si_errno = 0; + info.si_code = SI_USER; + info.si_pid = current->p_pptr->pid; + info.si_uid = current->p_pptr->uid; + } + + /* If the (new) signal is now blocked, requeue it. */ + if (sigismember(¤t->blocked, signr)) { + send_sig_info(signr, &info, current); + continue; + } + } + + ka = ¤t->sig->action[signr-1]; + if (ka->sa.sa_handler == SIG_IGN) { + if (signr != SIGCHLD) + continue; + /* Check for SIGCHLD: it's special. */ + while (sys_wait4(-1, NULL, WNOHANG, NULL) > 0) + /* nothing */; + continue; + } + + if (ka->sa.sa_handler == SIG_DFL) { + int exit_code = signr; + + /* Init gets no signals it doesn't want. */ + if (current->pid == 1) + continue; + + switch (signr) { + case SIGCONT: case SIGCHLD: case SIGWINCH: case SIGURG: + continue; + + case SIGTSTP: case SIGTTIN: case SIGTTOU: + if (is_orphaned_pgrp(current->pgrp)) + continue; + /* FALLTHRU */ + + case SIGSTOP: { + struct signal_struct *sig; + current->state = TASK_STOPPED; + current->exit_code = signr; + sig = current->p_pptr->sig; + if (sig && !(sig->action[SIGCHLD-1].sa.sa_flags & SA_NOCLDSTOP)) + notify_parent(current, SIGCHLD); + schedule(); + continue; + } + + case SIGQUIT: case SIGILL: case SIGTRAP: + case SIGABRT: case SIGFPE: case SIGSEGV: + case SIGBUS: case SIGSYS: case SIGXCPU: case SIGXFSZ: + if (do_coredump(signr, regs)) + exit_code |= 0x80; + /* FALLTHRU */ + + default: + sig_exit(signr, exit_code, &info); + /* NOTREACHED */ + } + } + + /* Reenable any watchpoints before delivering the + * signal to user space. The processor register will + * have been cleared if the watchpoint triggered + * inside the kernel. + */ + if ( current->thread.debugreg[7] != 0 ) + HYPERVISOR_set_debugreg(7, current->thread.debugreg[7]); + + /* Whee! Actually deliver the signal. */ + handle_signal(signr, ka, &info, oldset, regs); + return 1; + } + + /* Did we come from a system call? */ + if (regs->orig_eax >= 0) { + /* Restart the system call - no handlers present */ + if (regs->eax == -ERESTARTNOHAND || + regs->eax == -ERESTARTSYS || + regs->eax == -ERESTARTNOINTR) { + regs->eax = regs->orig_eax; + regs->eip -= 2; + } + } + return 0; +} diff --git a/xenolinux-2.4.21-pre4-sparse/arch/xeno/kernel/sys_i386.c b/xenolinux-2.4.21-pre4-sparse/arch/xeno/kernel/sys_i386.c new file mode 100644 index 0000000000..5fd6910b9c --- /dev/null +++ b/xenolinux-2.4.21-pre4-sparse/arch/xeno/kernel/sys_i386.c @@ -0,0 +1,256 @@ +/* + * linux/arch/i386/kernel/sys_i386.c + * + * This file contains various random system calls that + * have a non-standard calling sequence on the Linux/i386 + * platform. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include + +/* + * sys_pipe() is the normal C calling standard for creating + * a pipe. It's not the way Unix traditionally does this, though. + */ +asmlinkage int sys_pipe(unsigned long * fildes) +{ + int fd[2]; + int error; + + error = do_pipe(fd); + if (!error) { + if (copy_to_user(fildes, fd, 2*sizeof(int))) + error = -EFAULT; + } + return error; +} + +/* common code for old and new mmaps */ +static inline long do_mmap2( + unsigned long addr, unsigned long len, + unsigned long prot, unsigned long flags, + unsigned long fd, unsigned long pgoff) +{ + int error = -EBADF; + struct file * file = NULL; + + flags &= ~(MAP_EXECUTABLE | MAP_DENYWRITE); + if (!(flags & MAP_ANONYMOUS)) { + file = fget(fd); + if (!file) + goto out; + } + + down_write(¤t->mm->mmap_sem); + error = do_mmap_pgoff(file, addr, len, prot, flags, pgoff); + up_write(¤t->mm->mmap_sem); + + if (file) + fput(file); +out: + return error; +} + +asmlinkage long sys_mmap2(unsigned long addr, unsigned long len, + unsigned long prot, unsigned long flags, + unsigned long fd, unsigned long pgoff) +{ + return do_mmap2(addr, len, prot, flags, fd, pgoff); +} + +/* + * Perform the select(nd, in, out, ex, tv) and mmap() system + * calls. Linux/i386 didn't use to be able to handle more than + * 4 system call parameters, so these system calls used a memory + * block for parameter passing.. + */ + +struct mmap_arg_struct { + unsigned long addr; + unsigned long len; + unsigned long prot; + unsigned long flags; + unsigned long fd; + unsigned long offset; +}; + +asmlinkage int old_mmap(struct mmap_arg_struct *arg) +{ + struct mmap_arg_struct a; + int err = -EFAULT; + + if (copy_from_user(&a, arg, sizeof(a))) + goto out; + + err = -EINVAL; + if (a.offset & ~PAGE_MASK) + goto out; + + err = do_mmap2(a.addr, a.len, a.prot, a.flags, a.fd, a.offset >> PAGE_SHIFT); +out: + return err; +} + + +extern asmlinkage int sys_select(int, fd_set *, fd_set *, fd_set *, struct timeval *); + +struct sel_arg_struct { + unsigned long n; + fd_set *inp, *outp, *exp; + struct timeval *tvp; +}; + +asmlinkage int old_select(struct sel_arg_struct *arg) +{ + struct sel_arg_struct a; + + if (copy_from_user(&a, arg, sizeof(a))) + return -EFAULT; + /* sys_select() does the appropriate kernel locking */ + return sys_select(a.n, a.inp, a.outp, a.exp, a.tvp); +} + +/* + * sys_ipc() is the de-multiplexer for the SysV IPC calls.. + * + * This is really horribly ugly. + */ +asmlinkage int sys_ipc (uint call, int first, int second, + int third, void *ptr, long fifth) +{ + int version, ret; + + version = call >> 16; /* hack for backward compatibility */ + call &= 0xffff; + + switch (call) { + case SEMOP: + return sys_semop (first, (struct sembuf *)ptr, second); + case SEMGET: + return sys_semget (first, second, third); + case SEMCTL: { + union semun fourth; + if (!ptr) + return -EINVAL; + if (get_user(fourth.__pad, (void **) ptr)) + return -EFAULT; + return sys_semctl (first, second, third, fourth); + } + + case MSGSND: + return sys_msgsnd (first, (struct msgbuf *) ptr, + second, third); + case MSGRCV: + switch (version) { + case 0: { + struct ipc_kludge tmp; + if (!ptr) + return -EINVAL; + + if (copy_from_user(&tmp, + (struct ipc_kludge *) ptr, + sizeof (tmp))) + return -EFAULT; + return sys_msgrcv (first, tmp.msgp, second, + tmp.msgtyp, third); + } + default: + return sys_msgrcv (first, + (struct msgbuf *) ptr, + second, fifth, third); + } + case MSGGET: + return sys_msgget ((key_t) first, second); + case MSGCTL: + return sys_msgctl (first, second, (struct msqid_ds *) ptr); + + case SHMAT: + switch (version) { + default: { + ulong raddr; + ret = sys_shmat (first, (char *) ptr, second, &raddr); + if (ret) + return ret; + return put_user (raddr, (ulong *) third); + } + case 1: /* iBCS2 emulator entry point */ + if (!segment_eq(get_fs(), get_ds())) + return -EINVAL; + return sys_shmat (first, (char *) ptr, second, (ulong *) third); + } + case SHMDT: + return sys_shmdt ((char *)ptr); + case SHMGET: + return sys_shmget (first, second, third); + case SHMCTL: + return sys_shmctl (first, second, + (struct shmid_ds *) ptr); + default: + return -EINVAL; + } +} + +/* + * Old cruft + */ +asmlinkage int sys_uname(struct old_utsname * name) +{ + int err; + if (!name) + return -EFAULT; + down_read(&uts_sem); + err=copy_to_user(name, &system_utsname, sizeof (*name)); + up_read(&uts_sem); + return err?-EFAULT:0; +} + +asmlinkage int sys_olduname(struct oldold_utsname * name) +{ + int error; + + if (!name) + return -EFAULT; + if (!access_ok(VERIFY_WRITE,name,sizeof(struct oldold_utsname))) + return -EFAULT; + + down_read(&uts_sem); + + error = __copy_to_user(&name->sysname,&system_utsname.sysname,__OLD_UTS_LEN); + error |= __put_user(0,name->sysname+__OLD_UTS_LEN); + error |= __copy_to_user(&name->nodename,&system_utsname.nodename,__OLD_UTS_LEN); + error |= __put_user(0,name->nodename+__OLD_UTS_LEN); + error |= __copy_to_user(&name->release,&system_utsname.release,__OLD_UTS_LEN); + error |= __put_user(0,name->release+__OLD_UTS_LEN); + error |= __copy_to_user(&name->version,&system_utsname.version,__OLD_UTS_LEN); + error |= __put_user(0,name->version+__OLD_UTS_LEN); + error |= __copy_to_user(&name->machine,&system_utsname.machine,__OLD_UTS_LEN); + error |= __put_user(0,name->machine+__OLD_UTS_LEN); + + up_read(&uts_sem); + + error = error ? -EFAULT : 0; + + return error; +} + +asmlinkage int sys_pause(void) +{ + current->state = TASK_INTERRUPTIBLE; + schedule(); + return -ERESTARTNOHAND; +} + diff --git a/xenolinux-2.4.21-pre4-sparse/arch/xeno/kernel/time.c b/xenolinux-2.4.21-pre4-sparse/arch/xeno/kernel/time.c new file mode 100644 index 0000000000..55b100e257 --- /dev/null +++ b/xenolinux-2.4.21-pre4-sparse/arch/xeno/kernel/time.c @@ -0,0 +1,348 @@ +/* -*- Mode:C; c-basic-offset:4; tab-width:4 -*- + **************************************************************************** + * (C) 2002 - Rolf Neugebauer - Intel Research Cambridge + **************************************************************************** + * + * File: arch.xeno/time.c + * Author: Rolf Neugebauer + * Changes: + * + * Date: Nov 2002 + * + * Environment: XenoLinux + * Description: Interface with Hypervisor to get correct notion of time + * Currently supports Systemtime and WallClock time. + * + * (This has hardly any resemblence with the Linux code but left the + * copyright notice anyway. Ignore the comments in the copyright notice.) + **************************************************************************** + * $Id: c-insert.c,v 1.7 2002/11/08 16:04:34 rn Exp $ + **************************************************************************** + */ + +/* + * linux/arch/i386/kernel/time.c + * + * Copyright (C) 1991, 1992, 1995 Linus Torvalds + * + * This file contains the PC-specific time handling details: + * reading the RTC at bootup, etc.. + * 1994-07-02 Alan Modra + * fixed set_rtc_mmss, fixed time.year for >= 2000, new mktime + * 1995-03-26 Markus Kuhn + * fixed 500 ms bug at call to set_rtc_mmss, fixed DS12887 + * precision CMOS clock update + * 1996-05-03 Ingo Molnar + * fixed time warps in do_[slow|fast]_gettimeoffset() + * 1997-09-10 Updated NTP code according to technical memorandum Jan '96 + * "A Kernel Model for Precision Timekeeping" by Dave Mills + * 1998-09-05 (Various) + * More robust do_fast_gettimeoffset() algorithm implemented + * (works with APM, Cyrix 6x86MX and Centaur C6), + * monotonic gettimeofday() with fast_get_timeoffset(), + * drift-proof precision TSC calibration on boot + * (C. Scott Ananian , Andrew D. + * Balsa , Philip Gladstone ; + * ported from 2.0.35 Jumbo-9 by Michael Krause ). + * 1998-12-16 Andrea Arcangeli + * Fixed Jumbo-9 code in 2.1.131: do_gettimeofday was missing 1 jiffy + * because was not accounting lost_ticks. + * 1998-12-24 Copyright (C) 1998 Andrea Arcangeli + * Fixed a xtime SMP race (we need the xtime_lock rw spinlock to + * serialize accesses to xtime/lost_ticks). + */ + +#include +#include +#include +#include +#include +#include +#include + +#include +#include + +#include +#include +#include +#include +#include +#include + +#undef XENO_TIME_DEBUG /* adds sanity checks and periodic printouts */ + +spinlock_t rtc_lock = SPIN_LOCK_UNLOCKED; +extern rwlock_t xtime_lock; + +unsigned long cpu_khz; /* get this from Xen, used elsewhere */ +static spinlock_t hyp_stime_lock = SPIN_LOCK_UNLOCKED; +static spinlock_t hyp_wctime_lock = SPIN_LOCK_UNLOCKED; + +static u32 st_scale_f; +static u32 st_scale_i; +static u32 shadow_st_pcc; +static s64 shadow_st; + +/* + * System time. + * Although the rest of the Linux kernel doesn't know about this, we + * we use it to extrapolate passage of wallclock time. + * We need to read the values from the shared info page "atomically" + * and use the cycle counter value as the "version" number. Clashes + * should be very rare. + */ +static inline long long get_s_time(void) +{ + unsigned long flags; + u32 delta_tsc, low, pcc; + u64 delta; + s64 now; + + spin_lock_irqsave(&hyp_stime_lock, flags); + + while ((pcc = HYPERVISOR_shared_info->st_timestamp) != shadow_st_pcc) + { + barrier(); + shadow_st_pcc = pcc; + shadow_st = HYPERVISOR_shared_info->system_time; + barrier(); + } + + now = shadow_st; + /* only use bottom 32bits of TSC. This should be sufficient */ + rdtscl(low); + delta_tsc = low - pcc; + delta = ((u64)delta_tsc * st_scale_f); + delta >>= 32; + delta += ((u64)delta_tsc * st_scale_i); + + spin_unlock_irqrestore(&hyp_time_lock, flags); + + return now + delta; + +} +#define NOW() ((long long)get_s_time()) + +/* + * Wallclock time. + * Based on what the hypervisor tells us, extrapolated using system time. + * Again need to read a number of values from the shared page "atomically". + * this time using a version number. + */ +static u32 shadow_wc_version=0; +static long shadow_tv_sec; +static long shadow_tv_usec; +static long long shadow_wc_timestamp; +void do_gettimeofday(struct timeval *tv) +{ + unsigned long flags; + long usec, sec; + u32 version; + u64 now; + + spin_lock_irqsave(&hyp_wctime_lock, flags); + + while ((version = HYPERVISOR_shared_info->wc_version)!= shadow_wc_version) + { + barrier(); + shadow_wc_version = version; + shadow_tv_sec = HYPERVISOR_shared_info->tv_sec; + shadow_tv_usec = HYPERVISOR_shared_info->tv_usec; + shadow_wc_timestamp = HYPERVISOR_shared_info->wc_timestamp; + barrier(); + } + + now = NOW(); + usec = ((unsigned long)(now-shadow_wc_timestamp))/1000; + sec = shadow_tv_sec; + usec += shadow_tv_usec; + + while ( usec >= 1000000 ) + { + usec -= 1000000; + sec++; + } + + tv->tv_sec = sec; + tv->tv_usec = usec; + + spin_unlock_irqrestore(&hyp_time_lock, flags); + +#ifdef XENO_TIME_DEBUG + { + static long long old_now=0; + static long long wct=0, old_wct=0; + + /* This debug code checks if time increase over two subsequent calls */ + wct=(((long long)sec) * 1000000) + usec; + /* wall clock time going backwards */ + if ((wct < old_wct) ) { + printk("Urgh1: wc diff=%6ld, usec = %ld (0x%lX)\n", + (long)(wct-old_wct), usec, usec); + printk(" st diff=%lld cur st=0x%016llX old st=0x%016llX\n", + now-old_now, now, old_now); + } + + /* system time going backwards */ + if (now<=old_now) { + printk("Urgh2: st diff=%lld cur st=0x%016llX old st=0x%016llX\n", + now-old_now, now, old_now); + } + old_wct = wct; + old_now = now; + } +#endif + +} + +void do_settimeofday(struct timeval *tv) +{ +/* XXX RN: should do something special here for dom0 */ +#if 0 + write_lock_irq(&xtime_lock); + /* + * This is revolting. We need to set "xtime" correctly. However, the + * value in this location is the value at the most recent update of + * wall time. Discover what correction gettimeofday() would have + * made, and then undo it! + */ + tv->tv_usec -= do_gettimeoffset(); + tv->tv_usec -= (jiffies - wall_jiffies) * (1000000 / HZ); + + while ( tv->tv_usec < 0 ) + { + tv->tv_usec += 1000000; + tv->tv_sec--; + } + + xtime = *tv; + time_adjust = 0; /* stop active adjtime() */ + time_status |= STA_UNSYNC; + time_maxerror = NTP_PHASE_LIMIT; + time_esterror = NTP_PHASE_LIMIT; + write_unlock_irq(&xtime_lock); +#endif +} + + +/* + * Timer ISR. + * Unlike normal Linux these don't come in at a fixed rate of HZ. + * In here we wrok out how often it should have been called and then call + * the architecture independent part (do_timer()) the appropriate number of + * times. A bit of a nasty hack, to keep the "other" notion of wallclock time + * happy. + */ +static long long us_per_tick=1000000/HZ; +static long long last_irq; +static inline void do_timer_interrupt(int irq, void *dev_id, + struct pt_regs *regs) +{ + struct timeval tv; + long long time, delta; + +#ifdef XENO_TIME_DEBUG + static u32 foo_count = 0; + foo_count++; + if (foo_count>= 1000) { + s64 n = NOW(); + struct timeval tv; + do_gettimeofday(&tv); + printk("0x%08X%08X %ld:%ld\n", + (u32)(n>>32), (u32)n, tv.tv_sec, tv.tv_usec); + foo_count = 0; + } +#endif + /* + * The next bit really sucks: + * Linux not only uses do_gettimeofday() to keep a notion of + * wallclock time, but also maintains the xtime struct and jiffies. + * (Even worse some userland code accesses this via the sys_time() + * system call) + * Unfortunately, xtime is maintain in the architecture independent + * part of the timer ISR (./kernel/timer.c sic!). So, although we have + * perfectly valid notion of wallclock time from the hypervisor we here + * fake missed timer interrupts so that the arch independent part of + * the Timer ISR updates jiffies for us *and* once the bh gets run + * updates xtime accordingly. Yuck! + */ + + /* work out the number of jiffies past and update them */ + do_gettimeofday(&tv); + time = (((long long)tv.tv_sec) * 1000000) + tv.tv_usec; + delta = time - last_irq; + if (delta <= 0) { + printk ("Timer ISR: Time went backwards: %lld\n", delta); + return; + } + while (delta >= us_per_tick) { + do_timer(regs); + delta -= us_per_tick; + last_irq += us_per_tick; + } + +#if 0 + if (!user_mode(regs)) + x86_do_profile(regs->eip); +#endif +} + +static void timer_interrupt(int irq, void *dev_id, struct pt_regs *regs) +{ + write_lock(&xtime_lock); + do_timer_interrupt(irq, NULL, regs); + write_unlock(&xtime_lock); +} + +static struct irqaction irq_timer = { + timer_interrupt, + SA_INTERRUPT, + 0, + "timer", + NULL, + NULL +}; + +void __init time_init(void) +{ + unsigned long long alarm; + u64 cpu_freq = HYPERVISOR_shared_info->cpu_freq; + u64 scale; + + cpu_khz = (u32)cpu_freq/1000; + printk("Xen reported: %lu.%03lu MHz processor.\n", + cpu_khz / 1000, cpu_khz % 1000); + + /* + * calculate systemtime scaling factor + * XXX RN: have to cast cpu_freq to u32 limits it to 4.29 GHz. + * Get a better do_div! + */ + scale = 1000000000LL << 32; + do_div(scale,(u32)cpu_freq); + st_scale_f = scale & 0xffffffff; + st_scale_i = scale >> 32; + printk("System Time scale: %X %X\n",st_scale_i, st_scale_f); + + do_gettimeofday(&xtime); + last_irq = (((long long)xtime.tv_sec) * 1000000) + xtime.tv_usec; + + setup_irq(TIMER_IRQ, &irq_timer); + + /* + * Start ticker. Note that timing runs of wall clock, not virtual + * 'domain' time. This means that clock sshould run at the correct + * rate. For things like scheduling, it's not clear whether it + * matters which sort of time we use. + * XXX RN: unimplemented. + */ + + rdtscll(alarm); +#if 0 + alarm += (1000/HZ)*HYPERVISOR_shared_info->ticks_per_ms; + HYPERVISOR_shared_info->wall_timeout = alarm; + HYPERVISOR_shared_info->domain_timeout = ~0ULL; +#endif + clear_bit(_EVENT_TIMER, &HYPERVISOR_shared_info->events); +} diff --git a/xenolinux-2.4.21-pre4-sparse/arch/xeno/kernel/traps.c b/xenolinux-2.4.21-pre4-sparse/arch/xeno/kernel/traps.c new file mode 100644 index 0000000000..da7cd7413e --- /dev/null +++ b/xenolinux-2.4.21-pre4-sparse/arch/xeno/kernel/traps.c @@ -0,0 +1,565 @@ +/* + * linux/arch/i386/traps.c + * + * Copyright (C) 1991, 1992 Linus Torvalds + * + * Pentium III FXSR, SSE support + * Gareth Hughes , May 2000 + */ + +/* + * 'Traps.c' handles hardware traps and faults after we have saved some + * state in 'asm.s'. + */ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include + +#include +#include + +#include + +#include +#include + +asmlinkage int system_call(void); + +asmlinkage void divide_error(void); +asmlinkage void debug(void); +asmlinkage void int3(void); +asmlinkage void overflow(void); +asmlinkage void bounds(void); +asmlinkage void invalid_op(void); +asmlinkage void device_not_available(void); +asmlinkage void double_fault(void); +asmlinkage void coprocessor_segment_overrun(void); +asmlinkage void invalid_TSS(void); +asmlinkage void segment_not_present(void); +asmlinkage void stack_segment(void); +asmlinkage void general_protection(void); +asmlinkage void page_fault(void); +asmlinkage void coprocessor_error(void); +asmlinkage void simd_coprocessor_error(void); +asmlinkage void alignment_check(void); +asmlinkage void spurious_interrupt_bug(void); +asmlinkage void machine_check(void); + +int kstack_depth_to_print = 24; + + +/* + * If the address is either in the .text section of the + * kernel, or in the vmalloc'ed module regions, it *may* + * be the address of a calling routine + */ + +#ifdef CONFIG_MODULES + +extern struct module *module_list; +extern struct module kernel_module; + +static inline int kernel_text_address(unsigned long addr) +{ + int retval = 0; + struct module *mod; + + if (addr >= (unsigned long) &_stext && + addr <= (unsigned long) &_etext) + return 1; + + for (mod = module_list; mod != &kernel_module; mod = mod->next) { + /* mod_bound tests for addr being inside the vmalloc'ed + * module area. Of course it'd be better to test only + * for the .text subset... */ + if (mod_bound(addr, 0, mod)) { + retval = 1; + break; + } + } + + return retval; +} + +#else + +static inline int kernel_text_address(unsigned long addr) +{ + return (addr >= (unsigned long) &_stext && + addr <= (unsigned long) &_etext); +} + +#endif + +void show_trace(unsigned long * stack) +{ + int i; + unsigned long addr; + + if (!stack) + stack = (unsigned long*)&stack; + + printk("Call Trace: "); + i = 1; + while (((long) stack & (THREAD_SIZE-1)) != 0) { + addr = *stack++; + if (kernel_text_address(addr)) { + if (i && ((i % 6) == 0)) + printk("\n "); + printk("[<%08lx>] ", addr); + i++; + } + } + printk("\n"); +} + +void show_trace_task(struct task_struct *tsk) +{ + unsigned long esp = tsk->thread.esp; + + /* User space on another CPU? */ + if ((esp ^ (unsigned long)tsk) & (PAGE_MASK<<1)) + return; + show_trace((unsigned long *)esp); +} + +void show_stack(unsigned long * esp) +{ + unsigned long *stack; + int i; + + // debugging aid: "show_stack(NULL);" prints the + // back trace for this cpu. + + if(esp==NULL) + esp=(unsigned long*)&esp; + + stack = esp; + for(i=0; i < kstack_depth_to_print; i++) { + if (((long) stack & (THREAD_SIZE-1)) == 0) + break; + if (i && ((i % 8) == 0)) + printk("\n "); + printk("%08lx ", *stack++); + } + printk("\n"); + show_trace(esp); +} + +void show_registers(struct pt_regs *regs) +{ + int i; + int in_kernel = 1; + unsigned long esp; + unsigned short ss; + + esp = (unsigned long) (®s->esp); + ss = __KERNEL_DS; + if (regs->xcs & 2) { + in_kernel = 0; + esp = regs->esp; + ss = regs->xss & 0xffff; + } + printk("CPU: %d\nEIP: %04x:[<%08lx>] %s\nEFLAGS: %08lx\n", + smp_processor_id(), 0xffff & regs->xcs, regs->eip, print_tainted(), regs->eflags); + printk("eax: %08lx ebx: %08lx ecx: %08lx edx: %08lx\n", + regs->eax, regs->ebx, regs->ecx, regs->edx); + printk("esi: %08lx edi: %08lx ebp: %08lx esp: %08lx\n", + regs->esi, regs->edi, regs->ebp, esp); + printk("ds: %04x es: %04x ss: %04x\n", + regs->xds & 0xffff, regs->xes & 0xffff, ss); + printk("Process %s (pid: %d, stackpage=%08lx)", + current->comm, current->pid, 4096+(unsigned long)current); + /* + * When in-kernel, we also print out the stack and code at the + * time of the fault.. + */ + if (in_kernel) { + + printk("\nStack: "); + show_stack((unsigned long*)esp); + +#if 0 + printk("\nCode: "); + if(regs->eip < PAGE_OFFSET) + goto bad; + + for(i=0;i<20;i++) + { + unsigned char c; + if(__get_user(c, &((unsigned char*)regs->eip)[i])) { +bad: + printk(" Bad EIP value."); + break; + } + printk("%02x ", c); + } +#endif + } + printk("\n"); +} + +spinlock_t die_lock = SPIN_LOCK_UNLOCKED; + +void die(const char * str, struct pt_regs * regs, long err) +{ + console_verbose(); + spin_lock_irq(&die_lock); + bust_spinlocks(1); + printk("%s: %04lx\n", str, err & 0xffff); + show_registers(regs); + bust_spinlocks(0); + spin_unlock_irq(&die_lock); + do_exit(SIGSEGV); +} + +static inline void die_if_kernel(const char * str, struct pt_regs * regs, long err) +{ + if (!(2 & regs->xcs)) + die(str, regs, err); +} + + +static void inline do_trap(int trapnr, int signr, char *str, + struct pt_regs * regs, long error_code, + siginfo_t *info) +{ + if (!(regs->xcs & 2)) + goto kernel_trap; + + /*trap_signal:*/ { + struct task_struct *tsk = current; + tsk->thread.error_code = error_code; + tsk->thread.trap_no = trapnr; + if (info) + force_sig_info(signr, info, tsk); + else + force_sig(signr, tsk); + return; + } + + kernel_trap: { + unsigned long fixup = search_exception_table(regs->eip); + if (fixup) + regs->eip = fixup; + else + die(str, regs, error_code); + return; + } +} + +#define DO_ERROR(trapnr, signr, str, name) \ +asmlinkage void do_##name(struct pt_regs * regs, long error_code) \ +{ \ + do_trap(trapnr, signr, str, regs, error_code, NULL); \ +} + +#define DO_ERROR_INFO(trapnr, signr, str, name, sicode, siaddr) \ +asmlinkage void do_##name(struct pt_regs * regs, long error_code) \ +{ \ + siginfo_t info; \ + info.si_signo = signr; \ + info.si_errno = 0; \ + info.si_code = sicode; \ + info.si_addr = (void *)siaddr; \ + do_trap(trapnr, signr, str, regs, error_code, &info); \ +} + +DO_ERROR_INFO( 0, SIGFPE, "divide error", divide_error, FPE_INTDIV, regs->eip) +DO_ERROR( 3, SIGTRAP, "int3", int3) +DO_ERROR( 4, SIGSEGV, "overflow", overflow) +DO_ERROR( 5, SIGSEGV, "bounds", bounds) +DO_ERROR_INFO( 6, SIGILL, "invalid operand", invalid_op, ILL_ILLOPN, regs->eip) +DO_ERROR( 7, SIGSEGV, "device not available", device_not_available) +DO_ERROR( 8, SIGSEGV, "double fault", double_fault) +DO_ERROR( 9, SIGFPE, "coprocessor segment overrun", coprocessor_segment_overrun) +DO_ERROR(10, SIGSEGV, "invalid TSS", invalid_TSS) +DO_ERROR(11, SIGBUS, "segment not present", segment_not_present) +DO_ERROR(12, SIGBUS, "stack segment", stack_segment) +DO_ERROR_INFO(17, SIGBUS, "alignment check", alignment_check, BUS_ADRALN, 0) +DO_ERROR(18, SIGBUS, "machine check", machine_check) + +asmlinkage void do_general_protection(struct pt_regs * regs, long error_code) +{ + if (!(regs->xcs & 2)) + goto gp_in_kernel; + + current->thread.error_code = error_code; + current->thread.trap_no = 13; + force_sig(SIGSEGV, current); + return; + +gp_in_kernel: + { + unsigned long fixup; + fixup = search_exception_table(regs->eip); + if (fixup) { + regs->eip = fixup; + return; + } + die("general protection fault", regs, error_code); + } +} + + +asmlinkage void do_debug(struct pt_regs * regs, long error_code) +{ + unsigned int condition; + struct task_struct *tsk = current; + siginfo_t info; + + condition = HYPERVISOR_get_debugreg(6); + + /* Mask out spurious debug traps due to lazy DR7 setting */ + if (condition & (DR_TRAP0|DR_TRAP1|DR_TRAP2|DR_TRAP3)) { + if (!tsk->thread.debugreg[7]) + goto clear_dr7; + } + + /* Save debug status register where ptrace can see it */ + tsk->thread.debugreg[6] = condition; + + /* Mask out spurious TF errors due to lazy TF clearing */ + if (condition & DR_STEP) { + /* + * The TF error should be masked out only if the current + * process is not traced and if the TRAP flag has been set + * previously by a tracing process (condition detected by + * the PT_DTRACE flag); remember that the i386 TRAP flag + * can be modified by the process itself in user mode, + * allowing programs to debug themselves without the ptrace() + * interface. + */ + if ((tsk->ptrace & (PT_DTRACE|PT_PTRACED)) == PT_DTRACE) + goto clear_TF; + } + + /* Ok, finally something we can handle */ + tsk->thread.trap_no = 1; + tsk->thread.error_code = error_code; + info.si_signo = SIGTRAP; + info.si_errno = 0; + info.si_code = TRAP_BRKPT; + + /* If this is a kernel mode trap, save the user PC on entry to + * the kernel, that's what the debugger can make sense of. + */ + info.si_addr = ((regs->xcs & 3) == 0) ? (void *)tsk->thread.eip : + (void *)regs->eip; + force_sig_info(SIGTRAP, &info, tsk); + + /* Disable additional traps. They'll be re-enabled when + * the signal is delivered. + */ + clear_dr7: + HYPERVISOR_set_debugreg(7, 0); + return; + + clear_TF: + regs->eflags &= ~TF_MASK; + return; +} + + +/* + * Note that we play around with the 'TS' bit in an attempt to get + * the correct behaviour even in the presence of the asynchronous + * IRQ13 behaviour + */ +void math_error(void *eip) +{ + struct task_struct * task; + siginfo_t info; + unsigned short cwd, swd; + + /* + * Save the info for the exception handler and clear the error. + */ + task = current; + save_init_fpu(task); + task->thread.trap_no = 16; + task->thread.error_code = 0; + info.si_signo = SIGFPE; + info.si_errno = 0; + info.si_code = __SI_FAULT; + info.si_addr = eip; + /* + * (~cwd & swd) will mask out exceptions that are not set to unmasked + * status. 0x3f is the exception bits in these regs, 0x200 is the + * C1 reg you need in case of a stack fault, 0x040 is the stack + * fault bit. We should only be taking one exception at a time, + * so if this combination doesn't produce any single exception, + * then we have a bad program that isn't syncronizing its FPU usage + * and it will suffer the consequences since we won't be able to + * fully reproduce the context of the exception + */ + cwd = get_fpu_cwd(task); + swd = get_fpu_swd(task); + switch (((~cwd) & swd & 0x3f) | (swd & 0x240)) { + case 0x000: + default: + break; + case 0x001: /* Invalid Op */ + case 0x040: /* Stack Fault */ + case 0x240: /* Stack Fault | Direction */ + info.si_code = FPE_FLTINV; + break; + case 0x002: /* Denormalize */ + case 0x010: /* Underflow */ + info.si_code = FPE_FLTUND; + break; + case 0x004: /* Zero Divide */ + info.si_code = FPE_FLTDIV; + break; + case 0x008: /* Overflow */ + info.si_code = FPE_FLTOVF; + break; + case 0x020: /* Precision */ + info.si_code = FPE_FLTRES; + break; + } + force_sig_info(SIGFPE, &info, task); +} + +asmlinkage void do_coprocessor_error(struct pt_regs * regs, long error_code) +{ + ignore_irq13 = 1; + math_error((void *)regs->eip); +} + +void simd_math_error(void *eip) +{ + struct task_struct * task; + siginfo_t info; + unsigned short mxcsr; + + /* + * Save the info for the exception handler and clear the error. + */ + task = current; + save_init_fpu(task); + task->thread.trap_no = 19; + task->thread.error_code = 0; + info.si_signo = SIGFPE; + info.si_errno = 0; + info.si_code = __SI_FAULT; + info.si_addr = eip; + /* + * The SIMD FPU exceptions are handled a little differently, as there + * is only a single status/control register. Thus, to determine which + * unmasked exception was caught we must mask the exception mask bits + * at 0x1f80, and then use these to mask the exception bits at 0x3f. + */ + mxcsr = get_fpu_mxcsr(task); + switch (~((mxcsr & 0x1f80) >> 7) & (mxcsr & 0x3f)) { + case 0x000: + default: + break; + case 0x001: /* Invalid Op */ + info.si_code = FPE_FLTINV; + break; + case 0x002: /* Denormalize */ + case 0x010: /* Underflow */ + info.si_code = FPE_FLTUND; + break; + case 0x004: /* Zero Divide */ + info.si_code = FPE_FLTDIV; + break; + case 0x008: /* Overflow */ + info.si_code = FPE_FLTOVF; + break; + case 0x020: /* Precision */ + info.si_code = FPE_FLTRES; + break; + } + force_sig_info(SIGFPE, &info, task); +} + +asmlinkage void do_simd_coprocessor_error(struct pt_regs * regs, + long error_code) +{ + if (cpu_has_xmm) { + /* Handle SIMD FPU exceptions on PIII+ processors. */ + ignore_irq13 = 1; + simd_math_error((void *)regs->eip); + } else { + die_if_kernel("cache flush denied", regs, error_code); + current->thread.trap_no = 19; + current->thread.error_code = error_code; + force_sig(SIGSEGV, current); + } +} + +asmlinkage void do_spurious_interrupt_bug(struct pt_regs * regs, + long error_code) +{ +} + +/* + * 'math_state_restore()' saves the current math information in the + * old math state array, and gets the new ones from the current task + * + * Careful.. There are problems with IBM-designed IRQ13 behaviour. + * Don't touch unless you *really* know how it works. + */ +asmlinkage void math_state_restore(struct pt_regs regs) +{ + if (current->used_math) { + restore_fpu(current); + } else { + init_fpu(); + } + current->flags |= PF_USEDFPU; /* So we fnsave on switch_to() */ +} + +static trap_info_t trap_table[] = { + { 0, 0, __KERNEL_CS, (unsigned long)divide_error }, + { 1, 0, __KERNEL_CS, (unsigned long)debug }, + { 3, 3, __KERNEL_CS, (unsigned long)int3 }, + { 4, 3, __KERNEL_CS, (unsigned long)overflow }, + { 5, 3, __KERNEL_CS, (unsigned long)bounds }, + { 6, 0, __KERNEL_CS, (unsigned long)invalid_op }, + { 7, 0, __KERNEL_CS, (unsigned long)device_not_available }, + { 8, 0, __KERNEL_CS, (unsigned long)double_fault }, + { 9, 0, __KERNEL_CS, (unsigned long)coprocessor_segment_overrun }, + { 10, 0, __KERNEL_CS, (unsigned long)invalid_TSS }, + { 11, 0, __KERNEL_CS, (unsigned long)segment_not_present }, + { 12, 0, __KERNEL_CS, (unsigned long)stack_segment }, + { 13, 0, __KERNEL_CS, (unsigned long)general_protection }, + { 14, 0, __KERNEL_CS, (unsigned long)page_fault }, + { 15, 0, __KERNEL_CS, (unsigned long)spurious_interrupt_bug }, + { 16, 0, __KERNEL_CS, (unsigned long)coprocessor_error }, + { 17, 0, __KERNEL_CS, (unsigned long)alignment_check }, + { 18, 0, __KERNEL_CS, (unsigned long)machine_check }, + { 19, 0, __KERNEL_CS, (unsigned long)simd_coprocessor_error }, + { SYSCALL_VECTOR, + 3, __KERNEL_CS, (unsigned long)system_call }, + { 0, 0, 0, 0 } +}; + + + +void __init trap_init(void) +{ + HYPERVISOR_set_trap_table(trap_table); + HYPERVISOR_set_fast_trap(SYSCALL_VECTOR); + cpu_init(); +} diff --git a/xenolinux-2.4.21-pre4-sparse/arch/xeno/lib/Makefile b/xenolinux-2.4.21-pre4-sparse/arch/xeno/lib/Makefile new file mode 100644 index 0000000000..2224f0312c --- /dev/null +++ b/xenolinux-2.4.21-pre4-sparse/arch/xeno/lib/Makefile @@ -0,0 +1,15 @@ + +.S.o: + $(CC) $(AFLAGS) -c $< -o $*.o + +L_TARGET = lib.a + +obj-y = checksum.o old-checksum.o delay.o \ + usercopy.o getuser.o \ + memcpy.o strstr.o + +obj-$(CONFIG_X86_USE_3DNOW) += mmx.o +obj-$(CONFIG_HAVE_DEC_LOCK) += dec_and_lock.o +obj-$(CONFIG_DEBUG_IOVIRT) += iodebug.o + +include $(TOPDIR)/Rules.make diff --git a/xenolinux-2.4.21-pre4-sparse/arch/xeno/lib/checksum.S b/xenolinux-2.4.21-pre4-sparse/arch/xeno/lib/checksum.S new file mode 100644 index 0000000000..94c7867ddc --- /dev/null +++ b/xenolinux-2.4.21-pre4-sparse/arch/xeno/lib/checksum.S @@ -0,0 +1,496 @@ +/* + * INET An implementation of the TCP/IP protocol suite for the LINUX + * operating system. INET is implemented using the BSD Socket + * interface as the means of communication with the user level. + * + * IP/TCP/UDP checksumming routines + * + * Authors: Jorge Cwik, + * Arnt Gulbrandsen, + * Tom May, + * Pentium Pro/II routines: + * Alexander Kjeldaas + * Finn Arne Gangstad + * Lots of code moved from tcp.c and ip.c; see those files + * for more names. + * + * Changes: Ingo Molnar, converted csum_partial_copy() to 2.1 exception + * handling. + * Andi Kleen, add zeroing on error + * converted to pure assembler + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ + +#include +#include + +/* + * computes a partial checksum, e.g. for TCP/UDP fragments + */ + +/* +unsigned int csum_partial(const unsigned char * buff, int len, unsigned int sum) + */ + +.text +.align 4 +.globl csum_partial + +#ifndef CONFIG_X86_USE_PPRO_CHECKSUM + + /* + * Experiments with Ethernet and SLIP connections show that buff + * is aligned on either a 2-byte or 4-byte boundary. We get at + * least a twofold speedup on 486 and Pentium if it is 4-byte aligned. + * Fortunately, it is easy to convert 2-byte alignment to 4-byte + * alignment for the unrolled loop. + */ +csum_partial: + pushl %esi + pushl %ebx + movl 20(%esp),%eax # Function arg: unsigned int sum + movl 16(%esp),%ecx # Function arg: int len + movl 12(%esp),%esi # Function arg: unsigned char *buff + testl $3, %esi # Check alignment. + jz 2f # Jump if alignment is ok. + testl $1, %esi # Check alignment. + jz 10f # Jump if alignment is boundary of 2bytes. + + # buf is odd + dec %ecx + jl 8f + movzbl (%esi), %ebx + adcl %ebx, %eax + roll $8, %eax + inc %esi + testl $2, %esi + jz 2f +10: + subl $2, %ecx # Alignment uses up two bytes. + jae 1f # Jump if we had at least two bytes. + addl $2, %ecx # ecx was < 2. Deal with it. + jmp 4f +1: movw (%esi), %bx + addl $2, %esi + addw %bx, %ax + adcl $0, %eax +2: + movl %ecx, %edx + shrl $5, %ecx + jz 2f + testl %esi, %esi +1: movl (%esi), %ebx + adcl %ebx, %eax + movl 4(%esi), %ebx + adcl %ebx, %eax + movl 8(%esi), %ebx + adcl %ebx, %eax + movl 12(%esi), %ebx + adcl %ebx, %eax + movl 16(%esi), %ebx + adcl %ebx, %eax + movl 20(%esi), %ebx + adcl %ebx, %eax + movl 24(%esi), %ebx + adcl %ebx, %eax + movl 28(%esi), %ebx + adcl %ebx, %eax + lea 32(%esi), %esi + dec %ecx + jne 1b + adcl $0, %eax +2: movl %edx, %ecx + andl $0x1c, %edx + je 4f + shrl $2, %edx # This clears CF +3: adcl (%esi), %eax + lea 4(%esi), %esi + dec %edx + jne 3b + adcl $0, %eax +4: andl $3, %ecx + jz 7f + cmpl $2, %ecx + jb 5f + movw (%esi),%cx + leal 2(%esi),%esi + je 6f + shll $16,%ecx +5: movb (%esi),%cl +6: addl %ecx,%eax + adcl $0, %eax +7: + testl $1, 12(%esp) + jz 8f + roll $8, %eax +8: + popl %ebx + popl %esi + ret + +#else + +/* Version for PentiumII/PPro */ + +csum_partial: + pushl %esi + pushl %ebx + movl 20(%esp),%eax # Function arg: unsigned int sum + movl 16(%esp),%ecx # Function arg: int len + movl 12(%esp),%esi # Function arg: const unsigned char *buf + + testl $3, %esi + jnz 25f +10: + movl %ecx, %edx + movl %ecx, %ebx + andl $0x7c, %ebx + shrl $7, %ecx + addl %ebx,%esi + shrl $2, %ebx + negl %ebx + lea 45f(%ebx,%ebx,2), %ebx + testl %esi, %esi + jmp *%ebx + + # Handle 2-byte-aligned regions +20: addw (%esi), %ax + lea 2(%esi), %esi + adcl $0, %eax + jmp 10b +25: + testl $1, %esi + jz 30f + # buf is odd + dec %ecx + jl 90f + movzbl (%esi), %ebx + addl %ebx, %eax + adcl $0, %eax + roll $8, %eax + inc %esi + testl $2, %esi + jz 10b + +30: subl $2, %ecx + ja 20b + je 32f + addl $2, %ecx + jz 80f + movzbl (%esi),%ebx # csumming 1 byte, 2-aligned + addl %ebx, %eax + adcl $0, %eax + jmp 80f +32: + addw (%esi), %ax # csumming 2 bytes, 2-aligned + adcl $0, %eax + jmp 80f + +40: + addl -128(%esi), %eax + adcl -124(%esi), %eax + adcl -120(%esi), %eax + adcl -116(%esi), %eax + adcl -112(%esi), %eax + adcl -108(%esi), %eax + adcl -104(%esi), %eax + adcl -100(%esi), %eax + adcl -96(%esi), %eax + adcl -92(%esi), %eax + adcl -88(%esi), %eax + adcl -84(%esi), %eax + adcl -80(%esi), %eax + adcl -76(%esi), %eax + adcl -72(%esi), %eax + adcl -68(%esi), %eax + adcl -64(%esi), %eax + adcl -60(%esi), %eax + adcl -56(%esi), %eax + adcl -52(%esi), %eax + adcl -48(%esi), %eax + adcl -44(%esi), %eax + adcl -40(%esi), %eax + adcl -36(%esi), %eax + adcl -32(%esi), %eax + adcl -28(%esi), %eax + adcl -24(%esi), %eax + adcl -20(%esi), %eax + adcl -16(%esi), %eax + adcl -12(%esi), %eax + adcl -8(%esi), %eax + adcl -4(%esi), %eax +45: + lea 128(%esi), %esi + adcl $0, %eax + dec %ecx + jge 40b + movl %edx, %ecx +50: andl $3, %ecx + jz 80f + + # Handle the last 1-3 bytes without jumping + notl %ecx # 1->2, 2->1, 3->0, higher bits are masked + movl $0xffffff,%ebx # by the shll and shrl instructions + shll $3,%ecx + shrl %cl,%ebx + andl -128(%esi),%ebx # esi is 4-aligned so should be ok + addl %ebx,%eax + adcl $0,%eax +80: + testl $1, 12(%esp) + jz 90f + roll $8, %eax +90: + popl %ebx + popl %esi + ret + +#endif + +/* +unsigned int csum_partial_copy_generic (const char *src, char *dst, + int len, int sum, int *src_err_ptr, int *dst_err_ptr) + */ + +/* + * Copy from ds while checksumming, otherwise like csum_partial + * + * The macros SRC and DST specify the type of access for the instruction. + * thus we can call a custom exception handler for all access types. + * + * FIXME: could someone double-check whether I haven't mixed up some SRC and + * DST definitions? It's damn hard to trigger all cases. I hope I got + * them all but there's no guarantee. + */ + +#define SRC(y...) \ + 9999: y; \ + .section __ex_table, "a"; \ + .long 9999b, 6001f ; \ + .previous + +#define DST(y...) \ + 9999: y; \ + .section __ex_table, "a"; \ + .long 9999b, 6002f ; \ + .previous + +.align 4 +.globl csum_partial_copy_generic + +#ifndef CONFIG_X86_USE_PPRO_CHECKSUM + +#define ARGBASE 16 +#define FP 12 + +csum_partial_copy_generic: + subl $4,%esp + pushl %edi + pushl %esi + pushl %ebx + movl ARGBASE+16(%esp),%eax # sum + movl ARGBASE+12(%esp),%ecx # len + movl ARGBASE+4(%esp),%esi # src + movl ARGBASE+8(%esp),%edi # dst + + testl $2, %edi # Check alignment. + jz 2f # Jump if alignment is ok. + subl $2, %ecx # Alignment uses up two bytes. + jae 1f # Jump if we had at least two bytes. + addl $2, %ecx # ecx was < 2. Deal with it. + jmp 4f +SRC(1: movw (%esi), %bx ) + addl $2, %esi +DST( movw %bx, (%edi) ) + addl $2, %edi + addw %bx, %ax + adcl $0, %eax +2: + movl %ecx, FP(%esp) + shrl $5, %ecx + jz 2f + testl %esi, %esi +SRC(1: movl (%esi), %ebx ) +SRC( movl 4(%esi), %edx ) + adcl %ebx, %eax +DST( movl %ebx, (%edi) ) + adcl %edx, %eax +DST( movl %edx, 4(%edi) ) + +SRC( movl 8(%esi), %ebx ) +SRC( movl 12(%esi), %edx ) + adcl %ebx, %eax +DST( movl %ebx, 8(%edi) ) + adcl %edx, %eax +DST( movl %edx, 12(%edi) ) + +SRC( movl 16(%esi), %ebx ) +SRC( movl 20(%esi), %edx ) + adcl %ebx, %eax +DST( movl %ebx, 16(%edi) ) + adcl %edx, %eax +DST( movl %edx, 20(%edi) ) + +SRC( movl 24(%esi), %ebx ) +SRC( movl 28(%esi), %edx ) + adcl %ebx, %eax +DST( movl %ebx, 24(%edi) ) + adcl %edx, %eax +DST( movl %edx, 28(%edi) ) + + lea 32(%esi), %esi + lea 32(%edi), %edi + dec %ecx + jne 1b + adcl $0, %eax +2: movl FP(%esp), %edx + movl %edx, %ecx + andl $0x1c, %edx + je 4f + shrl $2, %edx # This clears CF +SRC(3: movl (%esi), %ebx ) + adcl %ebx, %eax +DST( movl %ebx, (%edi) ) + lea 4(%esi), %esi + lea 4(%edi), %edi + dec %edx + jne 3b + adcl $0, %eax +4: andl $3, %ecx + jz 7f + cmpl $2, %ecx + jb 5f +SRC( movw (%esi), %cx ) + leal 2(%esi), %esi +DST( movw %cx, (%edi) ) + leal 2(%edi), %edi + je 6f + shll $16,%ecx +SRC(5: movb (%esi), %cl ) +DST( movb %cl, (%edi) ) +6: addl %ecx, %eax + adcl $0, %eax +7: +5000: + +# Exception handler: +.section .fixup, "ax" + +6001: + movl ARGBASE+20(%esp), %ebx # src_err_ptr + movl $-EFAULT, (%ebx) + + # zero the complete destination - computing the rest + # is too much work + movl ARGBASE+8(%esp), %edi # dst + movl ARGBASE+12(%esp), %ecx # len + xorl %eax,%eax + rep ; stosb + + jmp 5000b + +6002: + movl ARGBASE+24(%esp), %ebx # dst_err_ptr + movl $-EFAULT,(%ebx) + jmp 5000b + +.previous + + popl %ebx + popl %esi + popl %edi + popl %ecx # equivalent to addl $4,%esp + ret + +#else + +/* Version for PentiumII/PPro */ + +#define ROUND1(x) \ + SRC(movl x(%esi), %ebx ) ; \ + addl %ebx, %eax ; \ + DST(movl %ebx, x(%edi) ) ; + +#define ROUND(x) \ + SRC(movl x(%esi), %ebx ) ; \ + adcl %ebx, %eax ; \ + DST(movl %ebx, x(%edi) ) ; + +#define ARGBASE 12 + +csum_partial_copy_generic: + pushl %ebx + pushl %edi + pushl %esi + movl ARGBASE+4(%esp),%esi #src + movl ARGBASE+8(%esp),%edi #dst + movl ARGBASE+12(%esp),%ecx #len + movl ARGBASE+16(%esp),%eax #sum +# movl %ecx, %edx + movl %ecx, %ebx + movl %esi, %edx + shrl $6, %ecx + andl $0x3c, %ebx + negl %ebx + subl %ebx, %esi + subl %ebx, %edi + lea -1(%esi),%edx + andl $-32,%edx + lea 3f(%ebx,%ebx), %ebx + testl %esi, %esi + jmp *%ebx +1: addl $64,%esi + addl $64,%edi + SRC(movb -32(%edx),%bl) ; SRC(movb (%edx),%bl) + ROUND1(-64) ROUND(-60) ROUND(-56) ROUND(-52) + ROUND (-48) ROUND(-44) ROUND(-40) ROUND(-36) + ROUND (-32) ROUND(-28) ROUND(-24) ROUND(-20) + ROUND (-16) ROUND(-12) ROUND(-8) ROUND(-4) +3: adcl $0,%eax + addl $64, %edx + dec %ecx + jge 1b +4: movl ARGBASE+12(%esp),%edx #len + andl $3, %edx + jz 7f + cmpl $2, %edx + jb 5f +SRC( movw (%esi), %dx ) + leal 2(%esi), %esi +DST( movw %dx, (%edi) ) + leal 2(%edi), %edi + je 6f + shll $16,%edx +5: +SRC( movb (%esi), %dl ) +DST( movb %dl, (%edi) ) +6: addl %edx, %eax + adcl $0, %eax +7: +.section .fixup, "ax" +6001: movl ARGBASE+20(%esp), %ebx # src_err_ptr + movl $-EFAULT, (%ebx) + # zero the complete destination (computing the rest is too much work) + movl ARGBASE+8(%esp),%edi # dst + movl ARGBASE+12(%esp),%ecx # len + xorl %eax,%eax + rep; stosb + jmp 7b +6002: movl ARGBASE+24(%esp), %ebx # dst_err_ptr + movl $-EFAULT, (%ebx) + jmp 7b +.previous + + popl %esi + popl %edi + popl %ebx + ret + +#undef ROUND +#undef ROUND1 + +#endif diff --git a/xenolinux-2.4.21-pre4-sparse/arch/xeno/lib/dec_and_lock.c b/xenolinux-2.4.21-pre4-sparse/arch/xeno/lib/dec_and_lock.c new file mode 100644 index 0000000000..ffd4869001 --- /dev/null +++ b/xenolinux-2.4.21-pre4-sparse/arch/xeno/lib/dec_and_lock.c @@ -0,0 +1,40 @@ +/* + * x86 version of "atomic_dec_and_lock()" using + * the atomic "cmpxchg" instruction. + * + * (For CPU's lacking cmpxchg, we use the slow + * generic version, and this one never even gets + * compiled). + */ + +#include +#include + +int atomic_dec_and_lock(atomic_t *atomic, spinlock_t *lock) +{ + int counter; + int newcount; + +repeat: + counter = atomic_read(atomic); + newcount = counter-1; + + if (!newcount) + goto slow_path; + + asm volatile("lock; cmpxchgl %1,%2" + :"=a" (newcount) + :"r" (newcount), "m" (atomic->counter), "0" (counter)); + + /* If the above failed, "eax" will have changed */ + if (newcount != counter) + goto repeat; + return 0; + +slow_path: + spin_lock(lock); + if (atomic_dec_and_test(atomic)) + return 1; + spin_unlock(lock); + return 0; +} diff --git a/xenolinux-2.4.21-pre4-sparse/arch/xeno/lib/delay.c b/xenolinux-2.4.21-pre4-sparse/arch/xeno/lib/delay.c new file mode 100644 index 0000000000..47b8ac71a9 --- /dev/null +++ b/xenolinux-2.4.21-pre4-sparse/arch/xeno/lib/delay.c @@ -0,0 +1,47 @@ +/* + * Precise Delay Loops for i386 + * + * Copyright (C) 1993 Linus Torvalds + * Copyright (C) 1997 Martin Mares + * + * The __delay function must _NOT_ be inlined as its execution time + * depends wildly on alignment on many x86 processors. The additional + * jump magic is needed to get the timing stable on all the CPU's + * we have to worry about. + */ + +#include +#include +#include +#include +#include + +#ifdef CONFIG_SMP +#include +#endif + +void __delay(unsigned long loops) +{ + unsigned long bclock, now; + + rdtscl(bclock); + do + { + rep_nop(); + rdtscl(now); + } while ((now-bclock) < loops); +} + +inline void __const_udelay(unsigned long xloops) +{ + int d0; + __asm__("mull %0" + :"=d" (xloops), "=&a" (d0) + :"1" (xloops),"0" (current_cpu_data.loops_per_jiffy)); + __delay(xloops * HZ); +} + +void __udelay(unsigned long usecs) +{ + __const_udelay(usecs * 0x000010c6); /* 2**32 / 1000000 */ +} diff --git a/xenolinux-2.4.21-pre4-sparse/arch/xeno/lib/getuser.S b/xenolinux-2.4.21-pre4-sparse/arch/xeno/lib/getuser.S new file mode 100644 index 0000000000..c244721e70 --- /dev/null +++ b/xenolinux-2.4.21-pre4-sparse/arch/xeno/lib/getuser.S @@ -0,0 +1,73 @@ +/* + * __get_user functions. + * + * (C) Copyright 1998 Linus Torvalds + * + * These functions have a non-standard call interface + * to make them more efficient, especially as they + * return an error value in addition to the "real" + * return value. + */ + +/* + * __get_user_X + * + * Inputs: %eax contains the address + * + * Outputs: %eax is error code (0 or -EFAULT) + * %edx contains zero-extended value + * + * These functions should not modify any other registers, + * as they get called from within inline assembly. + */ + +addr_limit = 12 + +.text +.align 4 +.globl __get_user_1 +__get_user_1: + movl %esp,%edx + andl $0xffffe000,%edx + cmpl addr_limit(%edx),%eax + jae bad_get_user +1: movzbl (%eax),%edx + xorl %eax,%eax + ret + +.align 4 +.globl __get_user_2 +__get_user_2: + addl $1,%eax + movl %esp,%edx + jc bad_get_user + andl $0xffffe000,%edx + cmpl addr_limit(%edx),%eax + jae bad_get_user +2: movzwl -1(%eax),%edx + xorl %eax,%eax + ret + +.align 4 +.globl __get_user_4 +__get_user_4: + addl $3,%eax + movl %esp,%edx + jc bad_get_user + andl $0xffffe000,%edx + cmpl addr_limit(%edx),%eax + jae bad_get_user +3: movl -3(%eax),%edx + xorl %eax,%eax + ret + +bad_get_user: + xorl %edx,%edx + movl $-14,%eax + ret + +.section __ex_table,"a" + .long 1b,bad_get_user + .long 2b,bad_get_user + .long 3b,bad_get_user +.previous diff --git a/xenolinux-2.4.21-pre4-sparse/arch/xeno/lib/iodebug.c b/xenolinux-2.4.21-pre4-sparse/arch/xeno/lib/iodebug.c new file mode 100644 index 0000000000..701a07fe72 --- /dev/null +++ b/xenolinux-2.4.21-pre4-sparse/arch/xeno/lib/iodebug.c @@ -0,0 +1,19 @@ +#include + +void * __io_virt_debug(unsigned long x, const char *file, int line) +{ + if (x < PAGE_OFFSET) { + printk("io mapaddr 0x%05lx not valid at %s:%d!\n", x, file, line); + return __va(x); + } + return (void *)x; +} + +unsigned long __io_phys_debug(unsigned long x, const char *file, int line) +{ + if (x < PAGE_OFFSET) { + printk("io mapaddr 0x%05lx not valid at %s:%d!\n", x, file, line); + return x; + } + return __pa(x); +} diff --git a/xenolinux-2.4.21-pre4-sparse/arch/xeno/lib/memcpy.c b/xenolinux-2.4.21-pre4-sparse/arch/xeno/lib/memcpy.c new file mode 100644 index 0000000000..4cb37b6e50 --- /dev/null +++ b/xenolinux-2.4.21-pre4-sparse/arch/xeno/lib/memcpy.c @@ -0,0 +1,19 @@ +#include +#include + +#undef memcpy +#undef memset + +void * memcpy(void * to, const void * from, size_t n) +{ +#ifdef CONFIG_X86_USE_3DNOW + return __memcpy3d(to, from, n); +#else + return __memcpy(to, from, n); +#endif +} + +void * memset(void * s, int c, size_t count) +{ + return __memset(s, c, count); +} diff --git a/xenolinux-2.4.21-pre4-sparse/arch/xeno/lib/mmx.c b/xenolinux-2.4.21-pre4-sparse/arch/xeno/lib/mmx.c new file mode 100644 index 0000000000..2e52b5d5af --- /dev/null +++ b/xenolinux-2.4.21-pre4-sparse/arch/xeno/lib/mmx.c @@ -0,0 +1,399 @@ +#include +#include +#include +#include + +#include +#include + + +/* + * MMX 3DNow! library helper functions + * + * To do: + * We can use MMX just for prefetch in IRQ's. This may be a win. + * (reported so on K6-III) + * We should use a better code neutral filler for the short jump + * leal ebx. [ebx] is apparently best for K6-2, but Cyrix ?? + * We also want to clobber the filler register so we dont get any + * register forwarding stalls on the filler. + * + * Add *user handling. Checksums are not a win with MMX on any CPU + * tested so far for any MMX solution figured. + * + * 22/09/2000 - Arjan van de Ven + * Improved for non-egineering-sample Athlons + * + */ + +void *_mmx_memcpy(void *to, const void *from, size_t len) +{ + void *p; + int i; + + if (in_interrupt()) + return __memcpy(to, from, len); + + p = to; + i = len >> 6; /* len/64 */ + + kernel_fpu_begin(); + + __asm__ __volatile__ ( + "1: prefetch (%0)\n" /* This set is 28 bytes */ + " prefetch 64(%0)\n" + " prefetch 128(%0)\n" + " prefetch 192(%0)\n" + " prefetch 256(%0)\n" + "2: \n" + ".section .fixup, \"ax\"\n" + "3: movw $0x1AEB, 1b\n" /* jmp on 26 bytes */ + " jmp 2b\n" + ".previous\n" + ".section __ex_table,\"a\"\n" + " .align 4\n" + " .long 1b, 3b\n" + ".previous" + : : "r" (from) ); + + + for(; i>5; i--) + { + __asm__ __volatile__ ( + "1: prefetch 320(%0)\n" + "2: movq (%0), %%mm0\n" + " movq 8(%0), %%mm1\n" + " movq 16(%0), %%mm2\n" + " movq 24(%0), %%mm3\n" + " movq %%mm0, (%1)\n" + " movq %%mm1, 8(%1)\n" + " movq %%mm2, 16(%1)\n" + " movq %%mm3, 24(%1)\n" + " movq 32(%0), %%mm0\n" + " movq 40(%0), %%mm1\n" + " movq 48(%0), %%mm2\n" + " movq 56(%0), %%mm3\n" + " movq %%mm0, 32(%1)\n" + " movq %%mm1, 40(%1)\n" + " movq %%mm2, 48(%1)\n" + " movq %%mm3, 56(%1)\n" + ".section .fixup, \"ax\"\n" + "3: movw $0x05EB, 1b\n" /* jmp on 5 bytes */ + " jmp 2b\n" + ".previous\n" + ".section __ex_table,\"a\"\n" + " .align 4\n" + " .long 1b, 3b\n" + ".previous" + : : "r" (from), "r" (to) : "memory"); + from+=64; + to+=64; + } + + for(; i>0; i--) + { + __asm__ __volatile__ ( + " movq (%0), %%mm0\n" + " movq 8(%0), %%mm1\n" + " movq 16(%0), %%mm2\n" + " movq 24(%0), %%mm3\n" + " movq %%mm0, (%1)\n" + " movq %%mm1, 8(%1)\n" + " movq %%mm2, 16(%1)\n" + " movq %%mm3, 24(%1)\n" + " movq 32(%0), %%mm0\n" + " movq 40(%0), %%mm1\n" + " movq 48(%0), %%mm2\n" + " movq 56(%0), %%mm3\n" + " movq %%mm0, 32(%1)\n" + " movq %%mm1, 40(%1)\n" + " movq %%mm2, 48(%1)\n" + " movq %%mm3, 56(%1)\n" + : : "r" (from), "r" (to) : "memory"); + from+=64; + to+=64; + } + /* + * Now do the tail of the block + */ + __memcpy(to, from, len&63); + kernel_fpu_end(); + return p; +} + +#ifdef CONFIG_MK7 + +/* + * The K7 has streaming cache bypass load/store. The Cyrix III, K6 and + * other MMX using processors do not. + */ + +static void fast_clear_page(void *page) +{ + int i; + + kernel_fpu_begin(); + + __asm__ __volatile__ ( + " pxor %%mm0, %%mm0\n" : : + ); + + for(i=0;i<4096/64;i++) + { + __asm__ __volatile__ ( + " movntq %%mm0, (%0)\n" + " movntq %%mm0, 8(%0)\n" + " movntq %%mm0, 16(%0)\n" + " movntq %%mm0, 24(%0)\n" + " movntq %%mm0, 32(%0)\n" + " movntq %%mm0, 40(%0)\n" + " movntq %%mm0, 48(%0)\n" + " movntq %%mm0, 56(%0)\n" + : : "r" (page) : "memory"); + page+=64; + } + /* since movntq is weakly-ordered, a "sfence" is needed to become + * ordered again. + */ + __asm__ __volatile__ ( + " sfence \n" : : + ); + kernel_fpu_end(); +} + +static void fast_copy_page(void *to, void *from) +{ + int i; + + kernel_fpu_begin(); + + /* maybe the prefetch stuff can go before the expensive fnsave... + * but that is for later. -AV + */ + __asm__ __volatile__ ( + "1: prefetch (%0)\n" + " prefetch 64(%0)\n" + " prefetch 128(%0)\n" + " prefetch 192(%0)\n" + " prefetch 256(%0)\n" + "2: \n" + ".section .fixup, \"ax\"\n" + "3: movw $0x1AEB, 1b\n" /* jmp on 26 bytes */ + " jmp 2b\n" + ".previous\n" + ".section __ex_table,\"a\"\n" + " .align 4\n" + " .long 1b, 3b\n" + ".previous" + : : "r" (from) ); + + for(i=0; i<(4096-320)/64; i++) + { + __asm__ __volatile__ ( + "1: prefetch 320(%0)\n" + "2: movq (%0), %%mm0\n" + " movntq %%mm0, (%1)\n" + " movq 8(%0), %%mm1\n" + " movntq %%mm1, 8(%1)\n" + " movq 16(%0), %%mm2\n" + " movntq %%mm2, 16(%1)\n" + " movq 24(%0), %%mm3\n" + " movntq %%mm3, 24(%1)\n" + " movq 32(%0), %%mm4\n" + " movntq %%mm4, 32(%1)\n" + " movq 40(%0), %%mm5\n" + " movntq %%mm5, 40(%1)\n" + " movq 48(%0), %%mm6\n" + " movntq %%mm6, 48(%1)\n" + " movq 56(%0), %%mm7\n" + " movntq %%mm7, 56(%1)\n" + ".section .fixup, \"ax\"\n" + "3: movw $0x05EB, 1b\n" /* jmp on 5 bytes */ + " jmp 2b\n" + ".previous\n" + ".section __ex_table,\"a\"\n" + " .align 4\n" + " .long 1b, 3b\n" + ".previous" + : : "r" (from), "r" (to) : "memory"); + from+=64; + to+=64; + } + for(i=(4096-320)/64; i<4096/64; i++) + { + __asm__ __volatile__ ( + "2: movq (%0), %%mm0\n" + " movntq %%mm0, (%1)\n" + " movq 8(%0), %%mm1\n" + " movntq %%mm1, 8(%1)\n" + " movq 16(%0), %%mm2\n" + " movntq %%mm2, 16(%1)\n" + " movq 24(%0), %%mm3\n" + " movntq %%mm3, 24(%1)\n" + " movq 32(%0), %%mm4\n" + " movntq %%mm4, 32(%1)\n" + " movq 40(%0), %%mm5\n" + " movntq %%mm5, 40(%1)\n" + " movq 48(%0), %%mm6\n" + " movntq %%mm6, 48(%1)\n" + " movq 56(%0), %%mm7\n" + " movntq %%mm7, 56(%1)\n" + : : "r" (from), "r" (to) : "memory"); + from+=64; + to+=64; + } + /* since movntq is weakly-ordered, a "sfence" is needed to become + * ordered again. + */ + __asm__ __volatile__ ( + " sfence \n" : : + ); + kernel_fpu_end(); +} + +#else + +/* + * Generic MMX implementation without K7 specific streaming + */ + +static void fast_clear_page(void *page) +{ + int i; + + kernel_fpu_begin(); + + __asm__ __volatile__ ( + " pxor %%mm0, %%mm0\n" : : + ); + + for(i=0;i<4096/128;i++) + { + __asm__ __volatile__ ( + " movq %%mm0, (%0)\n" + " movq %%mm0, 8(%0)\n" + " movq %%mm0, 16(%0)\n" + " movq %%mm0, 24(%0)\n" + " movq %%mm0, 32(%0)\n" + " movq %%mm0, 40(%0)\n" + " movq %%mm0, 48(%0)\n" + " movq %%mm0, 56(%0)\n" + " movq %%mm0, 64(%0)\n" + " movq %%mm0, 72(%0)\n" + " movq %%mm0, 80(%0)\n" + " movq %%mm0, 88(%0)\n" + " movq %%mm0, 96(%0)\n" + " movq %%mm0, 104(%0)\n" + " movq %%mm0, 112(%0)\n" + " movq %%mm0, 120(%0)\n" + : : "r" (page) : "memory"); + page+=128; + } + + kernel_fpu_end(); +} + +static void fast_copy_page(void *to, void *from) +{ + int i; + + + kernel_fpu_begin(); + + __asm__ __volatile__ ( + "1: prefetch (%0)\n" + " prefetch 64(%0)\n" + " prefetch 128(%0)\n" + " prefetch 192(%0)\n" + " prefetch 256(%0)\n" + "2: \n" + ".section .fixup, \"ax\"\n" + "3: movw $0x1AEB, 1b\n" /* jmp on 26 bytes */ + " jmp 2b\n" + ".previous\n" + ".section __ex_table,\"a\"\n" + " .align 4\n" + " .long 1b, 3b\n" + ".previous" + : : "r" (from) ); + + for(i=0; i<4096/64; i++) + { + __asm__ __volatile__ ( + "1: prefetch 320(%0)\n" + "2: movq (%0), %%mm0\n" + " movq 8(%0), %%mm1\n" + " movq 16(%0), %%mm2\n" + " movq 24(%0), %%mm3\n" + " movq %%mm0, (%1)\n" + " movq %%mm1, 8(%1)\n" + " movq %%mm2, 16(%1)\n" + " movq %%mm3, 24(%1)\n" + " movq 32(%0), %%mm0\n" + " movq 40(%0), %%mm1\n" + " movq 48(%0), %%mm2\n" + " movq 56(%0), %%mm3\n" + " movq %%mm0, 32(%1)\n" + " movq %%mm1, 40(%1)\n" + " movq %%mm2, 48(%1)\n" + " movq %%mm3, 56(%1)\n" + ".section .fixup, \"ax\"\n" + "3: movw $0x05EB, 1b\n" /* jmp on 5 bytes */ + " jmp 2b\n" + ".previous\n" + ".section __ex_table,\"a\"\n" + " .align 4\n" + " .long 1b, 3b\n" + ".previous" + : : "r" (from), "r" (to) : "memory"); + from+=64; + to+=64; + } + kernel_fpu_end(); +} + + +#endif + +/* + * Favour MMX for page clear and copy. + */ + +static void slow_zero_page(void * page) +{ + int d0, d1; + __asm__ __volatile__( \ + "cld\n\t" \ + "rep ; stosl" \ + : "=&c" (d0), "=&D" (d1) + :"a" (0),"1" (page),"0" (1024) + :"memory"); +} + +void mmx_clear_page(void * page) +{ + if(in_interrupt()) + slow_zero_page(page); + else + fast_clear_page(page); +} + +static void slow_copy_page(void *to, void *from) +{ + int d0, d1, d2; + __asm__ __volatile__( \ + "cld\n\t" \ + "rep ; movsl" \ + : "=&c" (d0), "=&D" (d1), "=&S" (d2) \ + : "0" (1024),"1" ((long) to),"2" ((long) from) \ + : "memory"); +} + + +void mmx_copy_page(void *to, void *from) +{ + if(in_interrupt()) + slow_copy_page(to, from); + else + fast_copy_page(to, from); +} diff --git a/xenolinux-2.4.21-pre4-sparse/arch/xeno/lib/old-checksum.c b/xenolinux-2.4.21-pre4-sparse/arch/xeno/lib/old-checksum.c new file mode 100644 index 0000000000..ae3a38043a --- /dev/null +++ b/xenolinux-2.4.21-pre4-sparse/arch/xeno/lib/old-checksum.c @@ -0,0 +1,19 @@ +/* + * FIXME: old compatibility stuff, will be removed soon. + */ + +#include + +unsigned int csum_partial_copy( const char *src, char *dst, int len, int sum) +{ + int src_err=0, dst_err=0; + + sum = csum_partial_copy_generic ( src, dst, len, sum, &src_err, &dst_err); + + if (src_err || dst_err) + printk("old csum_partial_copy_fromuser(), tell mingo to convert me.\n"); + + return sum; +} + + diff --git a/xenolinux-2.4.21-pre4-sparse/arch/xeno/lib/strstr.c b/xenolinux-2.4.21-pre4-sparse/arch/xeno/lib/strstr.c new file mode 100644 index 0000000000..a3dafbf59d --- /dev/null +++ b/xenolinux-2.4.21-pre4-sparse/arch/xeno/lib/strstr.c @@ -0,0 +1,31 @@ +#include + +char * strstr(const char * cs,const char * ct) +{ +int d0, d1; +register char * __res; +__asm__ __volatile__( + "movl %6,%%edi\n\t" + "repne\n\t" + "scasb\n\t" + "notl %%ecx\n\t" + "decl %%ecx\n\t" /* NOTE! This also sets Z if searchstring='' */ + "movl %%ecx,%%edx\n" + "1:\tmovl %6,%%edi\n\t" + "movl %%esi,%%eax\n\t" + "movl %%edx,%%ecx\n\t" + "repe\n\t" + "cmpsb\n\t" + "je 2f\n\t" /* also works for empty string, see above */ + "xchgl %%eax,%%esi\n\t" + "incl %%esi\n\t" + "cmpb $0,-1(%%eax)\n\t" + "jne 1b\n\t" + "xorl %%eax,%%eax\n\t" + "2:" + :"=a" (__res), "=&c" (d0), "=&S" (d1) + :"0" (0), "1" (0xffffffff), "2" (cs), "g" (ct) + :"dx", "di"); +return __res; +} + diff --git a/xenolinux-2.4.21-pre4-sparse/arch/xeno/lib/usercopy.c b/xenolinux-2.4.21-pre4-sparse/arch/xeno/lib/usercopy.c new file mode 100644 index 0000000000..d81fa81af8 --- /dev/null +++ b/xenolinux-2.4.21-pre4-sparse/arch/xeno/lib/usercopy.c @@ -0,0 +1,190 @@ +/* + * User address space access functions. + * The non inlined parts of asm-i386/uaccess.h are here. + * + * Copyright 1997 Andi Kleen + * Copyright 1997 Linus Torvalds + */ +#include +#include +#include + +#ifdef CONFIG_X86_USE_3DNOW_AND_WORKS + +unsigned long +__generic_copy_to_user(void *to, const void *from, unsigned long n) +{ + if (access_ok(VERIFY_WRITE, to, n)) + { + if(n<512) + __copy_user(to,from,n); + else + mmx_copy_user(to,from,n); + } + return n; +} + +unsigned long +__generic_copy_from_user(void *to, const void *from, unsigned long n) +{ + if (access_ok(VERIFY_READ, from, n)) + { + if(n<512) + __copy_user_zeroing(to,from,n); + else + mmx_copy_user_zeroing(to, from, n); + } + else + memset(to, 0, n); + return n; +} + +#else + +unsigned long +__generic_copy_to_user(void *to, const void *from, unsigned long n) +{ + prefetch(from); + if (access_ok(VERIFY_WRITE, to, n)) + __copy_user(to,from,n); + return n; +} + +unsigned long +__generic_copy_from_user(void *to, const void *from, unsigned long n) +{ + prefetchw(to); + if (access_ok(VERIFY_READ, from, n)) + __copy_user_zeroing(to,from,n); + else + memset(to, 0, n); + return n; +} + +#endif + +/* + * Copy a null terminated string from userspace. + */ + +#define __do_strncpy_from_user(dst,src,count,res) \ +do { \ + int __d0, __d1, __d2; \ + __asm__ __volatile__( \ + " testl %1,%1\n" \ + " jz 2f\n" \ + "0: lodsb\n" \ + " stosb\n" \ + " testb %%al,%%al\n" \ + " jz 1f\n" \ + " decl %1\n" \ + " jnz 0b\n" \ + "1: subl %1,%0\n" \ + "2:\n" \ + ".section .fixup,\"ax\"\n" \ + "3: movl %5,%0\n" \ + " jmp 2b\n" \ + ".previous\n" \ + ".section __ex_table,\"a\"\n" \ + " .align 4\n" \ + " .long 0b,3b\n" \ + ".previous" \ + : "=d"(res), "=c"(count), "=&a" (__d0), "=&S" (__d1), \ + "=&D" (__d2) \ + : "i"(-EFAULT), "0"(count), "1"(count), "3"(src), "4"(dst) \ + : "memory"); \ +} while (0) + +long +__strncpy_from_user(char *dst, const char *src, long count) +{ + long res; + __do_strncpy_from_user(dst, src, count, res); + return res; +} + +long +strncpy_from_user(char *dst, const char *src, long count) +{ + long res = -EFAULT; + if (access_ok(VERIFY_READ, src, 1)) + __do_strncpy_from_user(dst, src, count, res); + return res; +} + + +/* + * Zero Userspace + */ + +#define __do_clear_user(addr,size) \ +do { \ + int __d0; \ + __asm__ __volatile__( \ + "0: rep; stosl\n" \ + " movl %2,%0\n" \ + "1: rep; stosb\n" \ + "2:\n" \ + ".section .fixup,\"ax\"\n" \ + "3: lea 0(%2,%0,4),%0\n" \ + " jmp 2b\n" \ + ".previous\n" \ + ".section __ex_table,\"a\"\n" \ + " .align 4\n" \ + " .long 0b,3b\n" \ + " .long 1b,2b\n" \ + ".previous" \ + : "=&c"(size), "=&D" (__d0) \ + : "r"(size & 3), "0"(size / 4), "1"(addr), "a"(0)); \ +} while (0) + +unsigned long +clear_user(void *to, unsigned long n) +{ + if (access_ok(VERIFY_WRITE, to, n)) + __do_clear_user(to, n); + return n; +} + +unsigned long +__clear_user(void *to, unsigned long n) +{ + __do_clear_user(to, n); + return n; +} + +/* + * Return the size of a string (including the ending 0) + * + * Return 0 on exception, a value greater than N if too long + */ + +long strnlen_user(const char *s, long n) +{ + unsigned long mask = -__addr_ok(s); + unsigned long res, tmp; + + __asm__ __volatile__( + " testl %0, %0\n" + " jz 3f\n" + " andl %0,%%ecx\n" + "0: repne; scasb\n" + " setne %%al\n" + " subl %%ecx,%0\n" + " addl %0,%%eax\n" + "1:\n" + ".section .fixup,\"ax\"\n" + "2: xorl %%eax,%%eax\n" + " jmp 1b\n" + "3: movb $1,%%al\n" + " jmp 1b\n" + ".previous\n" + ".section __ex_table,\"a\"\n" + " .align 4\n" + " .long 0b,2b\n" + ".previous" + :"=r" (n), "=D" (s), "=a" (res), "=c" (tmp) + :"0" (n), "1" (s), "2" (0), "3" (mask) + :"cc"); + return res & mask; +} diff --git a/xenolinux-2.4.21-pre4-sparse/arch/xeno/mm/Makefile b/xenolinux-2.4.21-pre4-sparse/arch/xeno/mm/Makefile new file mode 100644 index 0000000000..9ab0821b2e --- /dev/null +++ b/xenolinux-2.4.21-pre4-sparse/arch/xeno/mm/Makefile @@ -0,0 +1,16 @@ +# +# Makefile for the linux i386-specific parts of the memory manager. +# +# Note! Dependencies are done automagically by 'make dep', which also +# removes any old dependencies. DON'T put your own dependencies here +# unless it's something special (ie not a .c file). +# +# Note 2! The CFLAGS definition is now in the main makefile... + +O_TARGET := mm.o + +obj-y := init.o fault.o extable.o pageattr.o hypervisor.o get_unmapped_area.o mmu_context.o + +export-objs := pageattr.o + +include $(TOPDIR)/Rules.make diff --git a/xenolinux-2.4.21-pre4-sparse/arch/xeno/mm/extable.c b/xenolinux-2.4.21-pre4-sparse/arch/xeno/mm/extable.c new file mode 100644 index 0000000000..4cd9f064c3 --- /dev/null +++ b/xenolinux-2.4.21-pre4-sparse/arch/xeno/mm/extable.c @@ -0,0 +1,62 @@ +/* + * linux/arch/i386/mm/extable.c + */ + +#include +#include +#include +#include + +extern const struct exception_table_entry __start___ex_table[]; +extern const struct exception_table_entry __stop___ex_table[]; + +static inline unsigned long +search_one_table(const struct exception_table_entry *first, + const struct exception_table_entry *last, + unsigned long value) +{ + while (first <= last) { + const struct exception_table_entry *mid; + long diff; + + mid = (last - first) / 2 + first; + diff = mid->insn - value; + if (diff == 0) + return mid->fixup; + else if (diff < 0) + first = mid+1; + else + last = mid-1; + } + return 0; +} + +extern spinlock_t modlist_lock; + +unsigned long +search_exception_table(unsigned long addr) +{ + unsigned long ret = 0; + +#ifndef CONFIG_MODULES + /* There is only the kernel to search. */ + ret = search_one_table(__start___ex_table, __stop___ex_table-1, addr); + return ret; +#else + unsigned long flags; + /* The kernel is the last "module" -- no need to treat it special. */ + struct module *mp; + + spin_lock_irqsave(&modlist_lock, flags); + for (mp = module_list; mp != NULL; mp = mp->next) { + if (mp->ex_table_start == NULL || !(mp->flags&(MOD_RUNNING|MOD_INITIALIZING))) + continue; + ret = search_one_table(mp->ex_table_start, + mp->ex_table_end - 1, addr); + if (ret) + break; + } + spin_unlock_irqrestore(&modlist_lock, flags); + return ret; +#endif +} diff --git a/xenolinux-2.4.21-pre4-sparse/arch/xeno/mm/fault.c b/xenolinux-2.4.21-pre4-sparse/arch/xeno/mm/fault.c new file mode 100644 index 0000000000..fb11ea03c4 --- /dev/null +++ b/xenolinux-2.4.21-pre4-sparse/arch/xeno/mm/fault.c @@ -0,0 +1,390 @@ +/* + * linux/arch/i386/mm/fault.c + * + * Copyright (C) 1995 Linus Torvalds + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include /* For unblank_screen() */ + +#include +#include +#include +#include + +extern void die(const char *,struct pt_regs *,long); + +pgd_t *cur_pgd; + +/* + * Ugly, ugly, but the goto's result in better assembly.. + */ +int __verify_write(const void * addr, unsigned long size) +{ + struct vm_area_struct * vma; + unsigned long start = (unsigned long) addr; + + if (!size) + return 1; + + vma = find_vma(current->mm, start); + if (!vma) + goto bad_area; + if (vma->vm_start > start) + goto check_stack; + +good_area: + if (!(vma->vm_flags & VM_WRITE)) + goto bad_area; + size--; + size += start & ~PAGE_MASK; + size >>= PAGE_SHIFT; + start &= PAGE_MASK; + + for (;;) { + survive: + { + int fault = handle_mm_fault(current->mm, vma, start, 1); + if (!fault) + goto bad_area; + if (fault < 0) + goto out_of_memory; + } + if (!size) + break; + size--; + start += PAGE_SIZE; + if (start < vma->vm_end) + continue; + vma = vma->vm_next; + if (!vma || vma->vm_start != start) + goto bad_area; + if (!(vma->vm_flags & VM_WRITE)) + goto bad_area;; + } + return 1; + +check_stack: + if (!(vma->vm_flags & VM_GROWSDOWN)) + goto bad_area; + if (expand_stack(vma, start) == 0) + goto good_area; + +bad_area: + return 0; + +out_of_memory: + if (current->pid == 1) { + yield(); + goto survive; + } + goto bad_area; +} + +extern spinlock_t timerlist_lock; + +/* + * Unlock any spinlocks which will prevent us from getting the + * message out (timerlist_lock is acquired through the + * console unblank code) + */ +void bust_spinlocks(int yes) +{ + spin_lock_init(&timerlist_lock); + if (yes) { + oops_in_progress = 1; + } else { + int loglevel_save = console_loglevel; +#ifdef CONFIG_VT + unblank_screen(); +#endif + oops_in_progress = 0; + /* + * OK, the message is on the console. Now we call printk() + * without oops_in_progress set so that printk will give klogd + * a poke. Hold onto your hats... + */ + console_loglevel = 15; /* NMI oopser may have shut the console up */ + printk(" "); + console_loglevel = loglevel_save; + } +} + +/* + * This routine handles page faults. It determines the address, + * and the problem, and then passes it off to one of the appropriate + * routines. + * + * error_code: + * bit 0 == 0 means no page found, 1 means protection fault + * bit 1 == 0 means read, 1 means write + * bit 2 == 0 means kernel, 1 means user-mode + */ +asmlinkage void do_page_fault(struct pt_regs *regs, + unsigned long error_code, + unsigned long address) +{ + struct task_struct *tsk = current; + struct mm_struct *mm; + struct vm_area_struct * vma; + unsigned long page; + unsigned long fixup; + int write; + siginfo_t info; + + /* Set the "privileged fault" bit to something sane. */ + error_code &= 3; + error_code |= (regs->xcs & 2) << 1; + +#if PT_UPDATE_DEBUG > 0 + if ( (error_code == 0) && (address >= TASK_SIZE) ) + { + unsigned long paddr = __pa(address); + int i; + for ( i = 0; i < pt_update_queue_idx; i++ ) + { + if ( update_debug_queue[i].ptr == paddr ) + { + printk("XXX now(EIP=%08lx:ptr=%08lx) " + "then(%s/%d:p/v=%08lx/%08lx)\n", + regs->eip, address, + update_debug_queue[i].file, + update_debug_queue[i].line, + update_debug_queue[i].ptr, + update_debug_queue[i].val); + } + } + } +#endif + + if ( flush_page_update_queue() != 0 ) return; + + /* + * We fault-in kernel-space virtual memory on-demand. The + * 'reference' page table is init_mm.pgd. + * + * NOTE! We MUST NOT take any locks for this case. We may + * be in an interrupt or a critical region, and should + * only copy the information from the master page table, + * nothing more. + * + * This verifies that the fault happens in kernel space + * (error_code & 4) == 0, and that the fault was not a + * protection error (error_code & 1) == 0. + */ + if (address >= TASK_SIZE && !(error_code & 5)) + goto vmalloc_fault; + + mm = tsk->mm; + info.si_code = SEGV_MAPERR; + + /* + * If we're in an interrupt or have no user + * context, we must not take the fault.. + */ + if (in_interrupt() || !mm) + goto no_context; + + down_read(&mm->mmap_sem); + + vma = find_vma(mm, address); + if (!vma) + goto bad_area; + if (vma->vm_start <= address) + goto good_area; + if (!(vma->vm_flags & VM_GROWSDOWN)) + goto bad_area; + if (error_code & 4) { + /* + * accessing the stack below %esp is always a bug. + * The "+ 32" is there due to some instructions (like + * pusha) doing post-decrement on the stack and that + * doesn't show up until later.. + */ + if (address + 32 < regs->esp) + goto bad_area; + } + if (expand_stack(vma, address)) + goto bad_area; +/* + * Ok, we have a good vm_area for this memory access, so + * we can handle it.. + */ +good_area: + info.si_code = SEGV_ACCERR; + write = 0; + switch (error_code & 3) { + default: /* 3: write, present */ + /* fall through */ + case 2: /* write, not present */ + if (!(vma->vm_flags & VM_WRITE)) + goto bad_area; + write++; + break; + case 1: /* read, present */ + goto bad_area; + case 0: /* read, not present */ + if (!(vma->vm_flags & (VM_READ | VM_EXEC))) + goto bad_area; + } + + survive: + /* + * If for any reason at all we couldn't handle the fault, + * make sure we exit gracefully rather than endlessly redo + * the fault. + */ + switch (handle_mm_fault(mm, vma, address, write)) { + case 1: + tsk->min_flt++; + break; + case 2: + tsk->maj_flt++; + break; + case 0: + goto do_sigbus; + default: + goto out_of_memory; + } + + up_read(&mm->mmap_sem); + return; + +/* + * Something tried to access memory that isn't in our memory map.. + * Fix it, but check if it's kernel or user first.. + */ +bad_area: + up_read(&mm->mmap_sem); + + /* User mode accesses just cause a SIGSEGV */ + if (error_code & 4) { + tsk->thread.cr2 = address; + tsk->thread.error_code = error_code; + tsk->thread.trap_no = 14; + info.si_signo = SIGSEGV; + info.si_errno = 0; + /* info.si_code has been set above */ + info.si_addr = (void *)address; + force_sig_info(SIGSEGV, &info, tsk); + return; + } + +no_context: + /* Are we prepared to handle this kernel fault? */ + if ((fixup = search_exception_table(regs->eip)) != 0) { + regs->eip = fixup; + return; + } + +/* + * Oops. The kernel tried to access some bad page. We'll have to + * terminate things with extreme prejudice. + */ + + bust_spinlocks(1); + + if (address < PAGE_SIZE) + printk(KERN_ALERT "Unable to handle kernel NULL pointer dereference"); + else + printk(KERN_ALERT "Unable to handle kernel paging request"); + printk(" at virtual address %08lx\n",address); + printk(" printing eip:\n"); + printk("%08lx\n", regs->eip); + page = ((unsigned long *) cur_pgd)[address >> 22]; + printk(KERN_ALERT "*pde=%08lx(%08lx)\n", page, machine_to_phys(page)); + if (page & 1) { + page &= PAGE_MASK; + address &= 0x003ff000; + page = machine_to_phys(page); + page = ((unsigned long *) __va(page))[address >> PAGE_SHIFT]; + printk(KERN_ALERT "*pte=%08lx(%08lx)\n", page, + machine_to_phys(page)); + } + die("Oops", regs, error_code); + bust_spinlocks(0); + do_exit(SIGKILL); + +/* + * We ran out of memory, or some other thing happened to us that made + * us unable to handle the page fault gracefully. + */ +out_of_memory: + if (tsk->pid == 1) { + yield(); + goto survive; + } + up_read(&mm->mmap_sem); + printk("VM: killing process %s\n", tsk->comm); + if (error_code & 4) + do_exit(SIGKILL); + goto no_context; + +do_sigbus: + up_read(&mm->mmap_sem); + + /* + * Send a sigbus, regardless of whether we were in kernel + * or user mode. + */ + tsk->thread.cr2 = address; + tsk->thread.error_code = error_code; + tsk->thread.trap_no = 14; + info.si_signo = SIGBUS; + info.si_errno = 0; + info.si_code = BUS_ADRERR; + info.si_addr = (void *)address; + force_sig_info(SIGBUS, &info, tsk); + + /* Kernel mode? Handle exceptions or die */ + if (!(error_code & 4)) + goto no_context; + return; + +vmalloc_fault: + { + /* + * Synchronize this task's top level page-table + * with the 'reference' page table. + * + * Do _not_ use "tsk" here. We might be inside + * an interrupt in the middle of a task switch.. + */ + int offset = __pgd_offset(address); + pgd_t *pgd, *pgd_k; + pmd_t *pmd, *pmd_k; + pte_t *pte_k; + + pgd = offset + cur_pgd; + pgd_k = init_mm.pgd + offset; + + if (!pgd_present(*pgd_k)) + goto no_context; + set_pgd(pgd, *pgd_k); + + pmd = pmd_offset(pgd, address); + pmd_k = pmd_offset(pgd_k, address); + if (!pmd_present(*pmd_k)) + goto no_context; + set_pmd(pmd, *pmd_k); + XENO_flush_page_update_queue(); /* flush PMD update */ + + pte_k = pte_offset(pmd_k, address); + if (!pte_present(*pte_k)) + goto no_context; + return; + } +} diff --git a/xenolinux-2.4.21-pre4-sparse/arch/xeno/mm/get_unmapped_area.c b/xenolinux-2.4.21-pre4-sparse/arch/xeno/mm/get_unmapped_area.c new file mode 100644 index 0000000000..a7b4447589 --- /dev/null +++ b/xenolinux-2.4.21-pre4-sparse/arch/xeno/mm/get_unmapped_area.c @@ -0,0 +1,137 @@ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include + +/* +static int direct_mapped(unsigned long addr) +{ + direct_mmap_node_t * node; + struct list_head * curr; + struct list_head * direct_list = ¤t->mm->context.direct_list; + + curr = direct_list->next; + while(curr != direct_list){ + node = list_entry(curr, direct_mmap_node_t, list); + if(node->addr == addr) + break; + curr = curr->next; + } + + if(curr == direct_list) + return 0; + + return 1; +} +*/ +/* +unsigned long arch_get_unmapped_area(struct file *filp, unsigned long addr, unsigned long len, unsigned long pgoff, unsigned long flags) +{ + struct vm_area_struct *vma; + + if (len > TASK_SIZE) + return -ENOMEM; + + if (addr) { + addr = PAGE_ALIGN(addr); + vma = find_vma(current->mm, addr); + if (TASK_SIZE - len >= addr && + (!vma || addr + len <= vma->vm_start)) + return addr; + } + addr = PAGE_ALIGN(TASK_UNMAPPED_BASE); + + for (vma = find_vma(current->mm, addr); ; vma = vma->vm_next) { + if (TASK_SIZE - len < addr) + return -ENOMEM; + + if ((!vma || addr + len <= vma->vm_start) && !direct_mapped(addr)) + return addr; + + addr = vma->vm_end; + } +} +*/ +struct list_head *find_direct(struct list_head *list, unsigned long addr) +{ + struct list_head * curr; + struct list_head * direct_list = ¤t->mm->context.direct_list; + direct_mmap_node_t * node; + + for ( curr = direct_list->next; curr != direct_list; curr = curr->next ) + { + node = list_entry(curr, direct_mmap_node_t, list); + if ( node->vm_start >= addr ) break; + } + + return curr; +} + +unsigned long arch_get_unmapped_area(struct file *filp, unsigned long +addr, unsigned long len, unsigned long pgoff, unsigned long flags) +{ + struct vm_area_struct *vma; + direct_mmap_node_t * node; + struct list_head * curr; + struct list_head * direct_list = ¤t->mm->context.direct_list; + + if (len > TASK_SIZE) + return -ENOMEM; + + if ( addr ) + { + addr = PAGE_ALIGN(addr); + vma = find_vma(current->mm, addr); + curr = find_direct(direct_list, addr); + node = list_entry(curr, direct_mmap_node_t, list); + if ( (TASK_SIZE - len >= addr) && + (!vma || addr + len <= vma->vm_start) && + ((curr == direct_list) || addr + len <= node->vm_start) ) + return addr; + } + + addr = PAGE_ALIGN(TASK_UNMAPPED_BASE); + + + /* Find first VMA and direct_map nodes with vm_start > addr */ + vma = find_vma(current->mm, addr); + curr = find_direct(direct_list, addr); + node = list_entry(curr, direct_mmap_node_t, list); + + for ( ; ; ) + { + if ( TASK_SIZE - len < addr ) return -ENOMEM; + + if ( vma && ((curr == direct_list) || (vma->vm_start < node->vm_start))) + { + /* Do we fit before VMA node? */ + if ( addr + len <= vma->vm_start ) return addr; + addr = vma->vm_end; + vma = vma->vm_next; + } + else if ( curr != direct_list ) + { + /* Do we fit before direct_map node? */ + if ( addr + len <= node->vm_start) return addr; + addr = node->vm_end; + curr = curr->next; + node = list_entry(curr, direct_mmap_node_t, list); + } + else + { + /* !vma && curr == direct_list */ + return addr; + } + } +} diff --git a/xenolinux-2.4.21-pre4-sparse/arch/xeno/mm/hypervisor.c b/xenolinux-2.4.21-pre4-sparse/arch/xeno/mm/hypervisor.c new file mode 100644 index 0000000000..b051684aa2 --- /dev/null +++ b/xenolinux-2.4.21-pre4-sparse/arch/xeno/mm/hypervisor.c @@ -0,0 +1,168 @@ +/****************************************************************************** + * xeno/mm/hypervisor.c + * + * Update page tables via the hypervisor. + * + * Copyright (c) 2002, K A Fraser + */ + +#include +#include +#include +#include +#include + +#define QUEUE_SIZE 2048 +static page_update_request_t update_queue[QUEUE_SIZE]; +unsigned int pt_update_queue_idx = 0; +#define idx pt_update_queue_idx + +#if PT_UPDATE_DEBUG > 0 +page_update_debug_t update_debug_queue[QUEUE_SIZE] = {{0}}; +#undef queue_l1_entry_update +#undef queue_l2_entry_update +static void DEBUG_allow_pt_reads(void) +{ + pte_t *pte; + page_update_request_t update; + int i; + for ( i = idx-1; i >= 0; i-- ) + { + pte = update_debug_queue[i].ptep; + if ( pte == NULL ) continue; + update_debug_queue[i].ptep = NULL; + update.ptr = phys_to_machine(__pa(pte)); + update.val = update_debug_queue[i].pteval; + HYPERVISOR_pt_update(&update, 1); + } +} +static void DEBUG_disallow_pt_read(unsigned long pa) +{ + pte_t *pte; + pmd_t *pmd; + pgd_t *pgd; + unsigned long pteval; + /* + * We may fault because of an already outstanding update. + * That's okay -- it'll get fixed up in the fault handler. + */ + page_update_request_t update; + unsigned long va = (unsigned long)__va(pa); + pgd = pgd_offset_k(va); + pmd = pmd_offset(pgd, va); + pte = pte_offset(pmd, va); + update.ptr = phys_to_machine(__pa(pte)); + pteval = *(unsigned long *)pte; + update.val = pteval & ~_PAGE_PRESENT; + HYPERVISOR_pt_update(&update, 1); + update_debug_queue[idx].ptep = pte; + update_debug_queue[idx].pteval = pteval; +} +#endif + +#if PT_UPDATE_DEBUG > 1 +#undef queue_pt_switch +#undef queue_tlb_flush +#undef queue_invlpg +#undef queue_pgd_pin +#undef queue_pgd_unpin +#undef queue_pte_pin +#undef queue_pte_unpin +#endif + + +/* + * This is the current pagetable base pointer, which is updated + * on context switch. + */ +unsigned long pt_baseptr; + +void _flush_page_update_queue(void) +{ + if ( idx == 0 ) return; +#if PT_UPDATE_DEBUG > 1 + printk("Flushing %d entries from pt update queue\n", idx); +#endif +#if PT_UPDATE_DEBUG > 0 + DEBUG_allow_pt_reads(); +#endif + HYPERVISOR_pt_update(update_queue, idx); + idx = 0; +} + +static void increment_index(void) +{ + if ( ++idx == QUEUE_SIZE ) _flush_page_update_queue(); +} + +void queue_l1_entry_update(unsigned long ptr, unsigned long val) +{ +#if PT_UPDATE_DEBUG > 0 + DEBUG_disallow_pt_read(ptr); +#endif + update_queue[idx].ptr = phys_to_machine(ptr); + update_queue[idx].val = val; + increment_index(); +} + +void queue_l2_entry_update(unsigned long ptr, unsigned long val) +{ + update_queue[idx].ptr = phys_to_machine(ptr); + update_queue[idx].val = val; + increment_index(); +} + +void queue_pt_switch(unsigned long ptr) +{ + update_queue[idx].ptr = phys_to_machine(ptr); + update_queue[idx].ptr |= PGREQ_EXTENDED_COMMAND; + update_queue[idx].val = PGEXT_NEW_BASEPTR; + increment_index(); +} + +void queue_tlb_flush(void) +{ + update_queue[idx].ptr = PGREQ_EXTENDED_COMMAND; + update_queue[idx].val = PGEXT_TLB_FLUSH; + increment_index(); +} + +void queue_invlpg(unsigned long ptr) +{ + update_queue[idx].ptr = PGREQ_EXTENDED_COMMAND; + update_queue[idx].val = ptr & PAGE_MASK; + update_queue[idx].val |= PGEXT_INVLPG; + increment_index(); +} + +void queue_pgd_pin(unsigned long ptr) +{ + update_queue[idx].ptr = phys_to_machine(ptr); + update_queue[idx].ptr |= PGREQ_EXTENDED_COMMAND; + update_queue[idx].val = PGEXT_PIN_L2_TABLE; + increment_index(); +} + +void queue_pgd_unpin(unsigned long ptr) +{ + update_queue[idx].ptr = phys_to_machine(ptr); + update_queue[idx].ptr |= PGREQ_EXTENDED_COMMAND; + update_queue[idx].val = PGEXT_UNPIN_TABLE; + increment_index(); +} + +void queue_pte_pin(unsigned long ptr) +{ + update_queue[idx].ptr = phys_to_machine(ptr); + update_queue[idx].ptr |= PGREQ_EXTENDED_COMMAND; + update_queue[idx].val = PGEXT_PIN_L1_TABLE; + increment_index(); +} + +void queue_pte_unpin(unsigned long ptr) +{ + update_queue[idx].ptr = phys_to_machine(ptr); + update_queue[idx].ptr |= PGREQ_EXTENDED_COMMAND; + update_queue[idx].val = PGEXT_UNPIN_TABLE; + increment_index(); +} diff --git a/xenolinux-2.4.21-pre4-sparse/arch/xeno/mm/init.c b/xenolinux-2.4.21-pre4-sparse/arch/xeno/mm/init.c new file mode 100644 index 0000000000..eb24b1ccbf --- /dev/null +++ b/xenolinux-2.4.21-pre4-sparse/arch/xeno/mm/init.c @@ -0,0 +1,396 @@ +/* + * linux/arch/i386/mm/init.c + * + * Copyright (C) 1995 Linus Torvalds + * + * Support of BIGMEM added by Gerhard Wichert, Siemens AG, July 1999 + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#ifdef CONFIG_BLK_DEV_INITRD +#include +#endif +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include + +mmu_gather_t mmu_gathers[NR_CPUS]; +unsigned long highstart_pfn, highend_pfn; +static unsigned long totalram_pages; +static unsigned long totalhigh_pages; + +int do_check_pgt_cache(int low, int high) +{ + int freed = 0; + if(pgtable_cache_size > high) { + do { + if (!QUICKLIST_EMPTY(pgd_quicklist)) { + free_pgd_slow(get_pgd_fast()); + freed++; + } + if (!QUICKLIST_EMPTY(pte_quicklist)) { + pte_free_slow(pte_alloc_one_fast(NULL, 0)); + freed++; + } + } while(pgtable_cache_size > low); + } + return freed; +} + +void show_mem(void) +{ + int i, total = 0, reserved = 0; + int shared = 0, cached = 0; + int highmem = 0; + + printk("Mem-info:\n"); + show_free_areas(); + printk("Free swap: %6dkB\n",nr_swap_pages<<(PAGE_SHIFT-10)); + i = max_mapnr; + while (i-- > 0) { + total++; + if (PageHighMem(mem_map+i)) + highmem++; + if (PageReserved(mem_map+i)) + reserved++; + else if (PageSwapCache(mem_map+i)) + cached++; + else if (page_count(mem_map+i)) + shared += page_count(mem_map+i) - 1; + } + printk("%d pages of RAM\n", total); + printk("%d pages of HIGHMEM\n",highmem); + printk("%d reserved pages\n",reserved); + printk("%d pages shared\n",shared); + printk("%d pages swap cached\n",cached); + printk("%ld pages in page table cache\n",pgtable_cache_size); + show_buffers(); +} + +/* References to section boundaries */ + +extern char _text, _etext, _edata, __bss_start, _end; +extern char __init_begin, __init_end; + +static inline void set_pte_phys (unsigned long vaddr, + unsigned long phys, pgprot_t flags) +{ + pgprot_t prot; + pgd_t *pgd; + pmd_t *pmd; + pte_t *pte; + + pgd = init_mm.pgd + __pgd_offset(vaddr); + if (pgd_none(*pgd)) { + printk("PAE BUG #00!\n"); + return; + } + pmd = pmd_offset(pgd, vaddr); + if (pmd_none(*pmd)) { + printk("PAE BUG #01!\n"); + return; + } + pte = pte_offset(pmd, vaddr); +#if 0 // XXX Xen ????? + /* stored as-is, to permit clearing entries */ + set_pte(pte, mk_pte_phys(phys, flags)); +#endif + if (pte_val(*pte)) + pte_ERROR(*pte); + + pgprot_val(prot) = pgprot_val(PAGE_KERNEL) | pgprot_val(flags); + + /* We queue directly, avoiding hidden phys->machine translation. */ + queue_l1_entry_update(__pa(pte), phys | pgprot_val(prot)); + + /* + * It's enough to flush this one mapping. + * (PGE mappings get flushed as well) + */ + __flush_tlb_one(vaddr); +} + +void __set_fixmap (enum fixed_addresses idx, unsigned long phys, + pgprot_t flags) +{ + unsigned long address = __fix_to_virt(idx); + + if (idx >= __end_of_fixed_addresses) { + printk("Invalid __set_fixmap\n"); + return; + } + set_pte_phys(address, phys, flags); +} + +static void __init fixrange_init (unsigned long start, + unsigned long end, pgd_t *pgd_base) +{ + pgd_t *pgd, *kpgd; + pmd_t *pmd, *kpmd; + pte_t *pte, *kpte; + int i, j; + unsigned long vaddr; + + vaddr = start; + i = __pgd_offset(vaddr); + j = __pmd_offset(vaddr); + pgd = pgd_base + i; + + for ( ; (i < PTRS_PER_PGD) && (vaddr != end); pgd++, i++) { +#if CONFIG_X86_PAE + if (pgd_none(*pgd)) { + pmd = (pmd_t *) alloc_bootmem_low_pages(PAGE_SIZE); + set_pgd(pgd, __pgd(__pa(pmd) + 0x1)); + if (pmd != pmd_offset(pgd, 0)) + printk("PAE BUG #02!\n"); + } + pmd = pmd_offset(pgd, vaddr); +#else + pmd = (pmd_t *)pgd; +#endif + for (; (j < PTRS_PER_PMD) && (vaddr != end); pmd++, j++) { + if (pmd_none(*pmd)) { + pte = (pte_t *) alloc_bootmem_low_pages(PAGE_SIZE); + clear_page(pte); + kpgd = pgd_offset_k((unsigned long)pte); + kpmd = pmd_offset(kpgd, (unsigned long)pte); + kpte = pte_offset(kpmd, (unsigned long)pte); + queue_l1_entry_update(__pa(kpte), + (*(unsigned long *)kpte)&~_PAGE_RW); + + set_pmd(pmd, __pmd(_KERNPG_TABLE + __pa(pte))); + + // XXX Xen below reqd ? + //if (pte != pte_offset(pmd, 0)) + // BUG(); + } + vaddr += PMD_SIZE; + } + j = 0; + } + + XENO_flush_page_update_queue(); +} + + +static void __init zone_sizes_init(void) +{ + unsigned long zones_size[MAX_NR_ZONES] = {0, 0, 0}; + unsigned int max_dma, high, low; + + max_dma = virt_to_phys((char *)MAX_DMA_ADDRESS) >> PAGE_SHIFT; + low = max_low_pfn; + high = highend_pfn; + + if (low < max_dma) + zones_size[ZONE_DMA] = low; + else { + zones_size[ZONE_DMA] = max_dma; + zones_size[ZONE_NORMAL] = low - max_dma; +#ifdef CONFIG_HIGHMEM + zones_size[ZONE_HIGHMEM] = high - low; +#endif + } + free_area_init(zones_size); +} + +/* + * paging_init() sets up the page tables - note that the first 8MB are + * already mapped by head.S. + * + * This routines also unmaps the page at virtual kernel address 0, so + * that we can trap those pesky NULL-reference errors in the kernel. + */ +void __init paging_init(void) +{ + unsigned long vaddr; + + zone_sizes_init(); + + /* + * Fixed mappings, only the page table structure has to be created - + * mappings will be set by set_fixmap(): + */ + vaddr = __fix_to_virt(__end_of_fixed_addresses - 1) & PMD_MASK; + fixrange_init(vaddr, HYPERVISOR_VIRT_START, init_mm.pgd); + + /* + * XXX We do this conversion early, so that all other page tables + * will automatically get this mapping. + */ + set_fixmap(FIX_BLKRING_BASE, start_info.blk_ring); + +#ifdef CONFIG_HIGHMEM +#error + kmap_init(); +#endif +} + +static inline int page_is_ram (unsigned long pagenr) +{ + return 1; +} + +#ifdef CONFIG_HIGHMEM +void __init one_highpage_init(struct page *page, int pfn, int bad_ppro) +{ + if (!page_is_ram(pfn)) { + SetPageReserved(page); + return; + } + + if (bad_ppro && page_kills_ppro(pfn)) { + SetPageReserved(page); + return; + } + + ClearPageReserved(page); + set_bit(PG_highmem, &page->flags); + atomic_set(&page->count, 1); + __free_page(page); + totalhigh_pages++; +} +#endif /* CONFIG_HIGHMEM */ + +static void __init set_max_mapnr_init(void) +{ +#ifdef CONFIG_HIGHMEM + highmem_start_page = mem_map + highstart_pfn; + max_mapnr = num_physpages = highend_pfn; + num_mappedpages = max_low_pfn; +#else + max_mapnr = num_mappedpages = num_physpages = max_low_pfn; +#endif +} + +static int __init free_pages_init(void) +{ + int bad_ppro, reservedpages, pfn; + + /* this will put all low memory onto the freelists */ + totalram_pages += free_all_bootmem(); + + reservedpages = 0; + for (pfn = 0; pfn < max_low_pfn; pfn++) { + /* + * Only count reserved RAM pages + */ + if (page_is_ram(pfn) && PageReserved(mem_map+pfn)) + reservedpages++; + } +#ifdef CONFIG_HIGHMEM + for (pfn = highend_pfn-1; pfn >= highstart_pfn; pfn--) + one_highpage_init((struct page *) (mem_map + pfn), pfn, bad_ppro); + totalram_pages += totalhigh_pages; +#endif + return reservedpages; +} + +void __init mem_init(void) +{ + int codesize, reservedpages, datasize, initsize; + + if (!mem_map) + BUG(); + + set_max_mapnr_init(); + + high_memory = (void *) __va(max_low_pfn * PAGE_SIZE); + + /* clear the zero-page */ + memset(empty_zero_page, 0, PAGE_SIZE); + + reservedpages = free_pages_init(); + + codesize = (unsigned long) &_etext - (unsigned long) &_text; + datasize = (unsigned long) &_edata - (unsigned long) &_etext; + initsize = (unsigned long) &__init_end - (unsigned long) &__init_begin; + + printk(KERN_INFO "Memory: %luk/%luk available (%dk kernel code, %dk reserved, %dk data, %dk init, %ldk highmem)\n", + (unsigned long) nr_free_pages() << (PAGE_SHIFT-10), + max_mapnr << (PAGE_SHIFT-10), + codesize >> 10, + reservedpages << (PAGE_SHIFT-10), + datasize >> 10, + initsize >> 10, + (unsigned long) (totalhigh_pages << (PAGE_SHIFT-10)) + ); + + boot_cpu_data.wp_works_ok = 1; +} + +void free_initmem(void) +{ + unsigned long addr; + + addr = (unsigned long)(&__init_begin); + for (; addr < (unsigned long)(&__init_end); addr += PAGE_SIZE) { + ClearPageReserved(virt_to_page(addr)); + set_page_count(virt_to_page(addr), 1); + free_page(addr); + totalram_pages++; + } + printk (KERN_INFO "Freeing unused kernel memory: %dk freed\n", (&__init_end - &__init_begin) >> 10); +} + +#ifdef CONFIG_BLK_DEV_INITRD +void free_initrd_mem(unsigned long start, unsigned long end) +{ + if (start < end) + printk (KERN_INFO "Freeing initrd memory: %ldk freed\n", (end - start) >> 10); + for (; start < end; start += PAGE_SIZE) { + ClearPageReserved(virt_to_page(start)); + set_page_count(virt_to_page(start), 1); + free_page(start); + totalram_pages++; + } +} +#endif + +void si_meminfo(struct sysinfo *val) +{ + val->totalram = totalram_pages; + val->sharedram = 0; + val->freeram = nr_free_pages(); + val->bufferram = atomic_read(&buffermem_pages); + val->totalhigh = totalhigh_pages; + val->freehigh = nr_free_highpages(); + val->mem_unit = PAGE_SIZE; + return; +} + +#if defined(CONFIG_X86_PAE) +struct kmem_cache_s *pae_pgd_cachep; +void __init pgtable_cache_init(void) +{ + /* + * PAE pgds must be 16-byte aligned: + */ + pae_pgd_cachep = kmem_cache_create("pae_pgd", 32, 0, + SLAB_HWCACHE_ALIGN | SLAB_MUST_HWCACHE_ALIGN, NULL, NULL); + if (!pae_pgd_cachep) + panic("init_pae(): Cannot alloc pae_pgd SLAB cache"); +} +#endif /* CONFIG_X86_PAE */ diff --git a/xenolinux-2.4.21-pre4-sparse/arch/xeno/mm/mmu_context.c b/xenolinux-2.4.21-pre4-sparse/arch/xeno/mm/mmu_context.c new file mode 100644 index 0000000000..b8f41fb269 --- /dev/null +++ b/xenolinux-2.4.21-pre4-sparse/arch/xeno/mm/mmu_context.c @@ -0,0 +1,26 @@ + +#include +#include + +int init_new_context(struct task_struct *tsk, struct mm_struct *mm) +{ + INIT_LIST_HEAD(&mm->context.direct_list); + return 0; +} + +/* just free all elements of list identifying directly mapped areas */ +void destroy_context(struct mm_struct *mm) +{ + direct_mmap_node_t * node; + struct list_head * curr; + struct list_head * direct_list = &mm->context.direct_list; + + curr = direct_list->next; + while(curr != direct_list){ + node = list_entry(curr, direct_mmap_node_t, list); + curr = curr->next; + list_del(&node->list); + kfree(node); + } + +} diff --git a/xenolinux-2.4.21-pre4-sparse/arch/xeno/mm/pageattr.c b/xenolinux-2.4.21-pre4-sparse/arch/xeno/mm/pageattr.c new file mode 100644 index 0000000000..8d3b3e3856 --- /dev/null +++ b/xenolinux-2.4.21-pre4-sparse/arch/xeno/mm/pageattr.c @@ -0,0 +1,175 @@ +/* + * Copyright 2002 Andi Kleen, SuSE Labs. + * Thanks to Ben LaHaise for precious feedback. + */ + +#include +#include +#include +#include +#include +#include +#include + +/* Should move most of this stuff into the appropiate includes */ +#define LARGE_PAGE_MASK (~(LARGE_PAGE_SIZE-1)) +#define LARGE_PAGE_SIZE (1UL << PMD_SHIFT) + +static inline pte_t *lookup_address(unsigned long address) +{ + pmd_t *pmd; + pgd_t *pgd = pgd_offset(&init_mm, address); + + if (pgd_none(*pgd)) + return NULL; + if (pgd_val(*pgd) & _PAGE_PSE) + return (pte_t *)pgd; + pmd = pmd_offset(pgd, address); + if (pmd_none(*pmd)) + return NULL; + if (pmd_val(*pmd) & _PAGE_PSE) + return (pte_t *)pmd; + return pte_offset(pmd, address); +} + +static struct page *split_large_page(unsigned long address, pgprot_t prot) +{ + int i; + unsigned long addr; + struct page *base = alloc_pages(GFP_KERNEL, 0); + pte_t *pbase; + if (!base) + return NULL; + address = __pa(address); + addr = address & LARGE_PAGE_MASK; + pbase = (pte_t *)page_address(base); + for (i = 0; i < PTRS_PER_PTE; i++, addr += PAGE_SIZE) { + pbase[i] = mk_pte_phys(addr, + addr == address ? prot : PAGE_KERNEL); + } + return base; +} + +static void flush_kernel_map(void * address) +{ + if (!test_bit(X86_FEATURE_SELFSNOOP, boot_cpu_data.x86_capability)) { + /* Could use CLFLUSH here if the CPU supports it (Hammer,P4) */ + if (boot_cpu_data.x86_model >= 4) + asm volatile("wbinvd":::"memory"); + } + + /* Do global flush here to work around large page flushing errata + in some early Athlons */ + __flush_tlb_all(); +} + +static void set_pmd_pte(pte_t *kpte, unsigned long address, pte_t pte) +{ + set_pte_atomic(kpte, pte); /* change init_mm */ +#ifndef CONFIG_X86_PAE + { + struct list_head *l; + spin_lock(&mmlist_lock); + list_for_each(l, &init_mm.mmlist) { + struct mm_struct *mm = list_entry(l, struct mm_struct, mmlist); + pmd_t *pmd = pmd_offset(pgd_offset(mm, address), address); + set_pte_atomic((pte_t *)pmd, pte); + } + spin_unlock(&mmlist_lock); + } +#endif +} + +/* no more special protections in this 2/4MB area - revert to a + large page again. */ +static inline void revert_page(struct page *kpte_page, unsigned long address) +{ + pte_t *linear = (pte_t *) + pmd_offset(pgd_offset(&init_mm, address), address); + set_pmd_pte(linear, address, + mk_pte_phys(__pa(address & LARGE_PAGE_MASK), + MAKE_GLOBAL(_KERNPG_TABLE|_PAGE_PSE))); +} + +/* + * Change the page attributes of an page in the linear mapping. + * + * This should be used when a page is mapped with a different caching policy + * than write-back somewhere - some CPUs do not like it when mappings with + * different caching policies exist. This changes the page attributes of the + * in kernel linear mapping too. + * + * The caller needs to ensure that there are no conflicting mappings elsewhere. + * This function only deals with the kernel linear map. + * When page is in highmem it must never be kmap'ed. + */ +static int +__change_page_attr(struct page *page, pgprot_t prot, struct page **oldpage) +{ + pte_t *kpte; + unsigned long address; + struct page *kpte_page; + +#ifdef CONFIG_HIGHMEM + if (page >= highmem_start_page) + BUG(); +#endif + address = (unsigned long)page_address(page); + kpte = lookup_address(address); + if (!kpte) + return -EINVAL; + kpte_page = virt_to_page(((unsigned long)kpte) & PAGE_MASK); + if (pgprot_val(prot) != pgprot_val(PAGE_KERNEL)) { + if ((pte_val(*kpte) & _PAGE_PSE) == 0) { + pte_t old = *kpte; + pte_t standard = mk_pte(page, PAGE_KERNEL); + + set_pte_atomic(kpte, mk_pte(page, prot)); + if (pte_same(old,standard)) + atomic_inc(&kpte_page->count); + } else { + struct page *split = split_large_page(address, prot); + if (!split) + return -ENOMEM; + atomic_inc(&kpte_page->count); + set_pmd_pte(kpte,address,mk_pte(split, PAGE_KERNEL)); + } + } else if ((pte_val(*kpte) & _PAGE_PSE) == 0) { + set_pte_atomic(kpte, mk_pte(page, PAGE_KERNEL)); + atomic_dec(&kpte_page->count); + } + + if (cpu_has_pse && (atomic_read(&kpte_page->count) == 1)) { + *oldpage = kpte_page; + revert_page(kpte_page, address); + } + return 0; +} + +int change_page_attr(struct page *page, int numpages, pgprot_t prot) +{ + int err = 0; + struct page *fpage; + int i; + + down_write(&init_mm.mmap_sem); + for (i = 0; i < numpages; i++, page++) { + fpage = NULL; + err = __change_page_attr(page, prot, &fpage); + if (err) + break; + if (fpage || i == numpages-1) { + void *address = page_address(page); +#ifdef CONFIG_SMP + smp_call_function(flush_kernel_map, address, 1, 1); +#endif + flush_kernel_map(address); + if (fpage) + __free_page(fpage); + } + } + up_write(&init_mm.mmap_sem); + return err; +} + +EXPORT_SYMBOL(change_page_attr); diff --git a/xenolinux-2.4.21-pre4-sparse/arch/xeno/vmlinux.lds b/xenolinux-2.4.21-pre4-sparse/arch/xeno/vmlinux.lds new file mode 100644 index 0000000000..7c4c4f8e9c --- /dev/null +++ b/xenolinux-2.4.21-pre4-sparse/arch/xeno/vmlinux.lds @@ -0,0 +1,82 @@ +/* ld script to make i386 Linux kernel + * Written by Martin Mares ; + */ +OUTPUT_FORMAT("elf32-i386", "elf32-i386", "elf32-i386") +OUTPUT_ARCH(i386) +ENTRY(_start) +SECTIONS +{ + . = 0xC0000000 + 0x000000; + _text = .; /* Text and read-only data */ + .text : { + *(.text) + *(.fixup) + *(.gnu.warning) + } = 0x9090 + + _etext = .; /* End of text section */ + + .rodata : { *(.rodata) *(.rodata.*) } + .kstrtab : { *(.kstrtab) } + + . = ALIGN(16); /* Exception table */ + __start___ex_table = .; + __ex_table : { *(__ex_table) } + __stop___ex_table = .; + + __start___ksymtab = .; /* Kernel symbol table */ + __ksymtab : { *(__ksymtab) } + __stop___ksymtab = .; + + .data : { /* Data */ + *(.data) + CONSTRUCTORS + } + + _edata = .; /* End of data section */ + + . = ALIGN(8192); /* init_task */ + .data.init_task : { *(.data.init_task) } + + . = ALIGN(4096); /* Init code and data */ + __init_begin = .; + .text.init : { *(.text.init) } + .data.init : { *(.data.init) } + . = ALIGN(16); + __setup_start = .; + .setup.init : { *(.setup.init) } + __setup_end = .; + __initcall_start = .; + .initcall.init : { *(.initcall.init) } + __initcall_end = .; + . = ALIGN(4096); + __init_end = .; + + . = ALIGN(4096); + .data.page_aligned : { *(.data.idt) } + + . = ALIGN(32); + .data.cacheline_aligned : { *(.data.cacheline_aligned) } + + __bss_start = .; /* BSS */ + .bss : { + *(.bss) + } + _end = . ; + + /* Sections to be discarded */ + /DISCARD/ : { + *(.text.exit) + *(.data.exit) + *(.exitcall.exit) + } + + /* Stabs debugging sections. */ + .stab 0 : { *(.stab) } + .stabstr 0 : { *(.stabstr) } + .stab.excl 0 : { *(.stab.excl) } + .stab.exclstr 0 : { *(.stab.exclstr) } + .stab.index 0 : { *(.stab.index) } + .stab.indexstr 0 : { *(.stab.indexstr) } + .comment 0 : { *(.comment) } +} -- cgit v1.2.3