diff options
Diffstat (limited to 'xenolinux-2.4.22-sparse/arch')
42 files changed, 10881 insertions, 0 deletions
diff --git a/xenolinux-2.4.22-sparse/arch/xeno/Makefile b/xenolinux-2.4.22-sparse/arch/xeno/Makefile new file mode 100644 index 0000000000..89777dc9e9 --- /dev/null +++ b/xenolinux-2.4.22-sparse/arch/xeno/Makefile @@ -0,0 +1,107 @@ +# +# xeno/Makefile +# +# This file is included by the global makefile so that you can add your own +# architecture-specific flags and dependencies. Remember to do have actions +# for "archclean" and "archdep" for cleaning up and making dependencies for +# this architecture +# +# This file is subject to the terms and conditions of the GNU General Public +# License. See the file "COPYING" in the main directory of this archive +# for more details. +# +# Copyright (C) 1994 by Linus Torvalds +# +# 19990713 Artur Skawina <skawina@geocities.com> +# Added '-march' and '-mpreferred-stack-boundary' support +# + +LD=$(CROSS_COMPILE)ld -m elf_i386 +OBJCOPY=$(CROSS_COMPILE)objcopy -O binary -R .note -R .comment -S +LDFLAGS=-e stext +LINKFLAGS =-T $(TOPDIR)/arch/xeno/vmlinux.lds $(LDFLAGS) + +CFLAGS += -pipe + +check_gcc = $(shell if $(CC) $(1) -S -o /dev/null -xc /dev/null > /dev/null 2>&1; then echo "$(1)"; else echo "$(2)"; fi) + +# prevent gcc from keeping the stack 16 byte aligned +CFLAGS += $(call check_gcc,-mpreferred-stack-boundary=2,) + +ifdef CONFIG_M686 +CFLAGS += -march=i686 +endif + +ifdef CONFIG_MPENTIUMIII +CFLAGS += -march=i686 +endif + +ifdef CONFIG_MPENTIUM4 +CFLAGS += -march=i686 +endif + +ifdef CONFIG_MK7 +CFLAGS += $(call check_gcc,-march=athlon,-march=i686 -malign-functions=4) +endif + +HEAD := arch/xeno/kernel/head.o arch/xeno/kernel/init_task.o + +SUBDIRS += arch/xeno/kernel arch/xeno/mm arch/xeno/lib +SUBDIRS += arch/xeno/drivers/console arch/xeno/drivers/network +SUBDIRS += arch/xeno/drivers/block arch/xeno/drivers/balloon +ifdef CONFIG_XENO_PRIV +SUBDIRS += arch/xeno/drivers/dom0 +endif + +CORE_FILES += arch/xeno/kernel/kernel.o arch/xeno/mm/mm.o +CORE_FILES += arch/xeno/drivers/console/con.o +CORE_FILES += arch/xeno/drivers/block/blk.o +CORE_FILES += arch/xeno/drivers/network/net.o +ifdef CONFIG_XENO_PRIV +CORE_FILES += arch/xeno/drivers/dom0/dom0.o +endif +CORE_FILES += arch/xeno/drivers/balloon/balloon_driver.o +LIBS := $(TOPDIR)/arch/xeno/lib/lib.a $(LIBS) $(TOPDIR)/arch/xeno/lib/lib.a + +arch/xeno/kernel: dummy + $(MAKE) linuxsubdirs SUBDIRS=arch/xeno/kernel + +arch/xeno/mm: dummy + $(MAKE) linuxsubdirs SUBDIRS=arch/xeno/mm + +arch/xeno/drivers/console: dummy + $(MAKE) linuxsubdirs SUBDIRS=arch/xeno/drivers/console + +arch/xeno/drivers/network: dummy + $(MAKE) linuxsubdirs SUBDIRS=arch/xeno/drivers/network + +arch/xeno/drivers/block: dummy + $(MAKE) linuxsubdirs SUBDIRS=arch/xeno/drivers/block + +arch/xeno/drivers/dom0: dummy + $(MAKE) linuxsubdirs SUBDIRS=arch/xeno/drivers/dom0 + +arch/xeno/drivers/balloon: dummy + $(MAKE) linuxsubdirs SUBDIRS=arch/xeno/drivers/balloon + +MAKEBOOT = $(MAKE) -C arch/$(ARCH)/boot + +vmlinux: arch/xeno/vmlinux.lds + +FORCE: ; + +.PHONY: bzImage compressed clean archclean archmrproper archdep + +bzImage: vmlinux + @$(MAKEBOOT) image.gz + +install: bzImage + cp -a arch/$(ARCH)/boot/image.gz ../install/boot/xenolinux.gz + +archclean: + @$(MAKEBOOT) clean + +archmrproper: + +archdep: + @$(MAKEBOOT) dep diff --git a/xenolinux-2.4.22-sparse/arch/xeno/boot/Makefile b/xenolinux-2.4.22-sparse/arch/xeno/boot/Makefile new file mode 100644 index 0000000000..252daf50bf --- /dev/null +++ b/xenolinux-2.4.22-sparse/arch/xeno/boot/Makefile @@ -0,0 +1,22 @@ +# +# arch/xeno/boot/Makefile +# + +image.gz: image + gzip -f -9 < $< > $@ + +image: $(TOPDIR)/vmlinux + # Guest OS header -- first 8 bytes are identifier 'XenoGues'. + echo -e -n 'XenoGues' >$@ + # Guest OS header -- next 4 bytes are load address (0xC0000000). + echo -e -n '\000\000\000\300' >>$@ + $(OBJCOPY) $< image.body + # Guest OS header is immediately followed by raw OS image. + # Start address must be at byte 0. + cat image.body >>$@ + rm -f image.body + +dep: + +clean: + rm -f image image.gz diff --git a/xenolinux-2.4.22-sparse/arch/xeno/config.in b/xenolinux-2.4.22-sparse/arch/xeno/config.in new file mode 100644 index 0000000000..67bdbb8654 --- /dev/null +++ b/xenolinux-2.4.22-sparse/arch/xeno/config.in @@ -0,0 +1,173 @@ +# +# For a description of the syntax of this configuration file, +# see Documentation/kbuild/config-language.txt. +# +mainmenu_name "Linux Kernel Configuration" + +define_bool CONFIG_XENO y + +define_bool CONFIG_X86 y +define_bool CONFIG_ISA y +define_bool CONFIG_SBUS n + +define_bool CONFIG_UID16 y + +mainmenu_option next_comment +comment 'Privileged guest OS' +bool 'Support for privileged operations (domain 0)' CONFIG_XENO_PRIV +endmenu + +mainmenu_option next_comment +comment 'Code maturity level options' +bool 'Prompt for development and/or incomplete code/drivers' CONFIG_EXPERIMENTAL +endmenu + +mainmenu_option next_comment +comment 'Loadable module support' +bool 'Enable loadable module support' CONFIG_MODULES +if [ "$CONFIG_MODULES" = "y" ]; then + bool ' Set version information on all module symbols' CONFIG_MODVERSIONS + bool ' Kernel module loader' CONFIG_KMOD +fi +endmenu + +mainmenu_option next_comment +comment 'Processor type and features' +choice 'Processor family' \ + "Pentium-Pro/Celeron/Pentium-II CONFIG_M686 \ + Pentium-III/Celeron(Coppermine) CONFIG_MPENTIUMIII \ + Pentium-4 CONFIG_MPENTIUM4 \ + Athlon/Duron/K7 CONFIG_MK7" Pentium-Pro + + define_bool CONFIG_X86_WP_WORKS_OK y + define_bool CONFIG_X86_INVLPG y + define_bool CONFIG_X86_CMPXCHG y + define_bool CONFIG_X86_XADD y + define_bool CONFIG_X86_BSWAP y + define_bool CONFIG_X86_POPAD_OK y + define_bool CONFIG_RWSEM_GENERIC_SPINLOCK n + define_bool CONFIG_RWSEM_XCHGADD_ALGORITHM y + + define_bool CONFIG_X86_GOOD_APIC y + define_bool CONFIG_X86_PGE y + define_bool CONFIG_X86_USE_PPRO_CHECKSUM y + define_bool CONFIG_X86_TSC y + +if [ "$CONFIG_M686" = "y" ]; then + define_int CONFIG_X86_L1_CACHE_SHIFT 5 +fi +if [ "$CONFIG_MPENTIUMIII" = "y" ]; then + define_int CONFIG_X86_L1_CACHE_SHIFT 5 +fi +if [ "$CONFIG_MPENTIUM4" = "y" ]; then + define_int CONFIG_X86_L1_CACHE_SHIFT 7 +fi +if [ "$CONFIG_MK7" = "y" ]; then + define_int CONFIG_X86_L1_CACHE_SHIFT 6 + define_bool CONFIG_X86_USE_3DNOW y +fi + +choice 'High Memory Support' \ + "off CONFIG_NOHIGHMEM \ + 4GB CONFIG_HIGHMEM4G \ + 64GB CONFIG_HIGHMEM64G" off +if [ "$CONFIG_HIGHMEM4G" = "y" ]; then + define_bool CONFIG_HIGHMEM y +fi +if [ "$CONFIG_HIGHMEM64G" = "y" ]; then + define_bool CONFIG_HIGHMEM y + define_bool CONFIG_X86_PAE y +fi + +#bool 'Symmetric multi-processing support' CONFIG_SMP +#if [ "$CONFIG_SMP" = "y" -a "$CONFIG_X86_CMPXCHG" = "y" ]; then +# define_bool CONFIG_HAVE_DEC_LOCK y +#fi +endmenu + +mainmenu_option next_comment +comment 'General setup' + +bool 'Networking support' CONFIG_NET + +bool 'System V IPC' CONFIG_SYSVIPC +bool 'BSD Process Accounting' CONFIG_BSD_PROCESS_ACCT +bool 'Sysctl support' CONFIG_SYSCTL +if [ "$CONFIG_PROC_FS" = "y" ]; then + choice 'Kernel core (/proc/kcore) format' \ + "ELF CONFIG_KCORE_ELF \ + A.OUT CONFIG_KCORE_AOUT" ELF +fi +tristate 'Kernel support for a.out binaries' CONFIG_BINFMT_AOUT +tristate 'Kernel support for ELF binaries' CONFIG_BINFMT_ELF +tristate 'Kernel support for MISC binaries' CONFIG_BINFMT_MISC + +endmenu + +if [ "$CONFIG_NET" = "y" ]; then + source net/Config.in +fi + + +# +# Block device driver configuration +# +mainmenu_option next_comment +comment 'Block devices' +tristate 'Loopback device support' CONFIG_BLK_DEV_LOOP +dep_tristate 'Network block device support' CONFIG_BLK_DEV_NBD $CONFIG_NET +tristate 'RAM disk support' CONFIG_BLK_DEV_RAM +if [ "$CONFIG_BLK_DEV_RAM" = "y" -o "$CONFIG_BLK_DEV_RAM" = "m" ]; then + int ' Default RAM disk size' CONFIG_BLK_DEV_RAM_SIZE 4096 +fi +dep_bool ' Initial RAM disk (initrd) support' CONFIG_BLK_DEV_INITRD $CONFIG_BLK_DEV_RAM +bool 'Per partition statistics in /proc/partitions' CONFIG_BLK_STATS +bool 'XenoLinux virtual block device support' CONFIG_XENOLINUX_BLOCK +endmenu +define_bool CONFIG_BLK_DEV_IDE_MODES n +define_bool CONFIG_BLK_DEV_HD n + + +mainmenu_option next_comment +comment 'Character devices' + +bool 'Xen console support' CONFIG_XEN_CONSOLE +comment 'The options below are alpha-stage and will probably not work' +bool 'Virtual terminal' CONFIG_VT +if [ "$CONFIG_VT" = "y" ]; then + bool ' Support for console on virtual terminal' CONFIG_VT_CONSOLE + bool ' Support for VGA Video' CONFIG_VGA_CONSOLE + bool ' Support for Dummy Video (for testing)' CONFIG_DUMMY_CONSOLE + bool ' PS/2 mouse (aka "auxiliary device") support' CONFIG_PSMOUSE +fi + +bool 'Unix98 PTY support' CONFIG_UNIX98_PTYS +if [ "$CONFIG_UNIX98_PTYS" = "y" ]; then + int 'Maximum number of Unix98 PTYs in use (0-2048)' CONFIG_UNIX98_PTY_COUNT 256 +fi + +endmenu + +source fs/Config.in + +mainmenu_option next_comment +comment 'Kernel hacking' + +bool 'Kernel debugging' CONFIG_DEBUG_KERNEL +if [ "$CONFIG_DEBUG_KERNEL" != "n" ]; then + bool ' Debug high memory support' CONFIG_DEBUG_HIGHMEM + bool ' Debug memory allocations' CONFIG_DEBUG_SLAB + bool ' Memory mapped I/O debugging' CONFIG_DEBUG_IOVIRT + bool ' Magic SysRq key' CONFIG_MAGIC_SYSRQ + bool ' Spinlock debugging' CONFIG_DEBUG_SPINLOCK + bool ' Verbose BUG() reporting (adds 70K)' CONFIG_DEBUG_BUGVERBOSE + bool ' Load all symbols for debugging' CONFIG_KALLSYMS + bool ' Compile the kernel with frame pointers' CONFIG_FRAME_POINTER +fi + +source drivers/acpi/Config.in + +endmenu + +source crypto/Config.in +source lib/Config.in diff --git a/xenolinux-2.4.22-sparse/arch/xeno/defconfig b/xenolinux-2.4.22-sparse/arch/xeno/defconfig new file mode 100644 index 0000000000..5e5ee7b3a1 --- /dev/null +++ b/xenolinux-2.4.22-sparse/arch/xeno/defconfig @@ -0,0 +1,319 @@ +# +# Automatically generated make config: don't edit +# +CONFIG_XENO=y +CONFIG_X86=y +CONFIG_ISA=y +# CONFIG_SBUS is not set +CONFIG_UID16=y + +# +# Privileged guest OS +# +CONFIG_XENO_PRIV=y + +# +# Code maturity level options +# +# CONFIG_EXPERIMENTAL is not set + +# +# Loadable module support +# +CONFIG_MODULES=y +CONFIG_MODVERSIONS=y +CONFIG_KMOD=y + +# +# Processor type and features +# +CONFIG_M686=y +# CONFIG_MPENTIUMIII is not set +# CONFIG_MPENTIUM4 is not set +# CONFIG_MK7 is not set +CONFIG_X86_WP_WORKS_OK=y +CONFIG_X86_INVLPG=y +CONFIG_X86_CMPXCHG=y +CONFIG_X86_XADD=y +CONFIG_X86_BSWAP=y +CONFIG_X86_POPAD_OK=y +# CONFIG_RWSEM_GENERIC_SPINLOCK is not set +CONFIG_RWSEM_XCHGADD_ALGORITHM=y +CONFIG_X86_GOOD_APIC=y +CONFIG_X86_PGE=y +CONFIG_X86_USE_PPRO_CHECKSUM=y +CONFIG_X86_TSC=y +CONFIG_X86_L1_CACHE_SHIFT=5 +CONFIG_NOHIGHMEM=y +# CONFIG_HIGHMEM4G is not set +# CONFIG_HIGHMEM64G is not set + +# +# General setup +# +CONFIG_NET=y +CONFIG_SYSVIPC=y +# CONFIG_BSD_PROCESS_ACCT is not set +CONFIG_SYSCTL=y +CONFIG_KCORE_ELF=y +# CONFIG_KCORE_AOUT is not set +CONFIG_BINFMT_AOUT=y +CONFIG_BINFMT_ELF=y +# CONFIG_BINFMT_MISC is not set + +# +# Networking options +# +CONFIG_PACKET=y +CONFIG_PACKET_MMAP=y +# CONFIG_NETLINK_DEV is not set +# CONFIG_NETFILTER is not set +CONFIG_FILTER=y +CONFIG_UNIX=y +CONFIG_INET=y +# CONFIG_IP_MULTICAST is not set +# CONFIG_IP_ADVANCED_ROUTER is not set +CONFIG_IP_PNP=y +# CONFIG_IP_PNP_DHCP is not set +# CONFIG_IP_PNP_BOOTP is not set +# CONFIG_IP_PNP_RARP is not set +# CONFIG_NET_IPIP is not set +# CONFIG_NET_IPGRE is not set +# CONFIG_INET_ECN is not set +# CONFIG_SYN_COOKIES is not set +# CONFIG_VLAN_8021Q is not set + +# +# +# +# CONFIG_IPX is not set +# CONFIG_ATALK is not set + +# +# Appletalk devices +# +# CONFIG_DEV_APPLETALK is not set +# CONFIG_DECNET is not set +# CONFIG_BRIDGE is not set + +# +# QoS and/or fair queueing +# +# CONFIG_NET_SCHED is not set + +# +# Network testing +# +# CONFIG_NET_PKTGEN is not set + +# +# Block devices +# +# CONFIG_BLK_DEV_FD is not set +# CONFIG_BLK_DEV_XD is not set +# CONFIG_PARIDE is not set +# CONFIG_BLK_CPQ_DA is not set +# CONFIG_BLK_CPQ_CISS_DA is not set +# CONFIG_CISS_SCSI_TAPE is not set +# CONFIG_BLK_DEV_DAC960 is not set +# CONFIG_BLK_DEV_UMEM is not set +CONFIG_BLK_DEV_LOOP=y +CONFIG_BLK_DEV_NBD=y +CONFIG_BLK_DEV_RAM=y +CONFIG_BLK_DEV_RAM_SIZE=4096 +CONFIG_BLK_DEV_INITRD=y +# CONFIG_BLK_STATS is not set +CONFIG_XENOLINUX_BLOCK=y +# CONFIG_BLK_DEV_IDE_MODES is not set +# CONFIG_BLK_DEV_HD is not set + +# +# Console drivers +# +CONFIG_VGA_CONSOLE=y +# CONFIG_VIDEO_SELECT is not set +CONFIG_DUMMY_CONSOLE=y + + +# +# Character devices +# +CONFIG_XEN_CONSOLE=y +CONFIG_VT=y +CONFIG_VT_CONSOLE=y +# CONFIG_UNIX98_PTYS is not set + +# +# Mice +# +# CONFIG_BUSMOUSE is not set +CONFIG_MOUSE=y +CONFIG_PSMOUSE=y +# CONFIG_82C710_MOUSE is not set +# CONFIG_PC110_PAD is not set +# CONFIG_MK712_MOUSE is not set + + +# +# File systems +# +# CONFIG_QUOTA is not set +CONFIG_AUTOFS_FS=y +CONFIG_AUTOFS4_FS=y +# CONFIG_REISERFS_FS is not set +# CONFIG_REISERFS_CHECK is not set +# CONFIG_REISERFS_PROC_INFO is not set +# CONFIG_ADFS_FS is not set +# CONFIG_ADFS_FS_RW is not set +# CONFIG_AFFS_FS is not set +# CONFIG_HFS_FS is not set +# CONFIG_BEFS_FS is not set +# CONFIG_BEFS_DEBUG is not set +# CONFIG_BFS_FS is not set +CONFIG_EXT3_FS=y +CONFIG_JBD=y +# CONFIG_JBD_DEBUG is not set +# CONFIG_FAT_FS is not set +# CONFIG_MSDOS_FS is not set +# CONFIG_UMSDOS_FS is not set +# CONFIG_VFAT_FS is not set +# CONFIG_EFS_FS is not set +# CONFIG_JFFS_FS is not set +# CONFIG_JFFS2_FS is not set +# CONFIG_CRAMFS is not set +CONFIG_TMPFS=y +CONFIG_RAMFS=y +CONFIG_ISO9660_FS=y +CONFIG_JOLIET=y +CONFIG_ZISOFS=y +# CONFIG_JFS_FS is not set +# CONFIG_JFS_DEBUG is not set +# CONFIG_JFS_STATISTICS is not set +# CONFIG_MINIX_FS is not set +# CONFIG_VXFS_FS is not set +# CONFIG_NTFS_FS is not set +# CONFIG_NTFS_RW is not set +# CONFIG_HPFS_FS is not set +CONFIG_PROC_FS=y +# CONFIG_DEVFS_FS is not set +# CONFIG_DEVFS_MOUNT is not set +# CONFIG_DEVFS_DEBUG is not set +# CONFIG_DEVPTS_FS is not set +# CONFIG_QNX4FS_FS is not set +# CONFIG_QNX4FS_RW is not set +# CONFIG_ROMFS_FS is not set +CONFIG_EXT2_FS=y +# CONFIG_SYSV_FS is not set +# CONFIG_UDF_FS is not set +# CONFIG_UDF_RW is not set +# CONFIG_UFS_FS is not set +# CONFIG_UFS_FS_WRITE is not set + +# +# Network File Systems +# +# CONFIG_CODA_FS is not set +# CONFIG_INTERMEZZO_FS is not set +CONFIG_NFS_FS=y +CONFIG_NFS_V3=y +CONFIG_ROOT_NFS=y +CONFIG_NFSD=y +CONFIG_NFSD_V3=y +# CONFIG_NFSD_TCP is not set +CONFIG_SUNRPC=y +CONFIG_LOCKD=y +CONFIG_LOCKD_V4=y +# CONFIG_SMB_FS is not set +# CONFIG_NCP_FS is not set +# CONFIG_NCPFS_PACKET_SIGNING is not set +# CONFIG_NCPFS_IOCTL_LOCKING is not set +# CONFIG_NCPFS_STRONG is not set +# CONFIG_NCPFS_NFS_NS is not set +# CONFIG_NCPFS_OS2_NS is not set +# CONFIG_NCPFS_SMALLDOS is not set +# CONFIG_NCPFS_NLS is not set +# CONFIG_NCPFS_EXTRAS is not set +CONFIG_ZISOFS_FS=y + +# +# Partition Types +# +CONFIG_PARTITION_ADVANCED=y +# CONFIG_ACORN_PARTITION is not set +# CONFIG_OSF_PARTITION is not set +# CONFIG_AMIGA_PARTITION is not set +# CONFIG_ATARI_PARTITION is not set +# CONFIG_MAC_PARTITION is not set +CONFIG_XENO_PARTITION=y +CONFIG_MSDOS_PARTITION=y +# CONFIG_BSD_DISKLABEL is not set +# CONFIG_MINIX_SUBPARTITION is not set +# CONFIG_SOLARIS_X86_PARTITION is not set +# CONFIG_UNIXWARE_DISKLABEL is not set +# CONFIG_SGI_PARTITION is not set +# CONFIG_ULTRIX_PARTITION is not set +# CONFIG_SUN_PARTITION is not set +# CONFIG_EFI_PARTITION is not set +# CONFIG_SMB_NLS is not set +CONFIG_NLS=y + +# +# Native Language Support +# +CONFIG_NLS_DEFAULT="iso8559-1" +# CONFIG_NLS_CODEPAGE_437 is not set +# CONFIG_NLS_CODEPAGE_737 is not set +# CONFIG_NLS_CODEPAGE_775 is not set +# CONFIG_NLS_CODEPAGE_850 is not set +# CONFIG_NLS_CODEPAGE_852 is not set +# CONFIG_NLS_CODEPAGE_855 is not set +# CONFIG_NLS_CODEPAGE_857 is not set +# CONFIG_NLS_CODEPAGE_860 is not set +# CONFIG_NLS_CODEPAGE_861 is not set +# CONFIG_NLS_CODEPAGE_862 is not set +# CONFIG_NLS_CODEPAGE_863 is not set +# CONFIG_NLS_CODEPAGE_864 is not set +# CONFIG_NLS_CODEPAGE_865 is not set +# CONFIG_NLS_CODEPAGE_866 is not set +# CONFIG_NLS_CODEPAGE_869 is not set +# CONFIG_NLS_CODEPAGE_936 is not set +# CONFIG_NLS_CODEPAGE_950 is not set +# CONFIG_NLS_CODEPAGE_932 is not set +# CONFIG_NLS_CODEPAGE_949 is not set +# CONFIG_NLS_CODEPAGE_874 is not set +# CONFIG_NLS_ISO8859_8 is not set +# CONFIG_NLS_CODEPAGE_1250 is not set +# CONFIG_NLS_CODEPAGE_1251 is not set +CONFIG_NLS_ISO8859_1=y +# CONFIG_NLS_ISO8859_2 is not set +# CONFIG_NLS_ISO8859_3 is not set +# CONFIG_NLS_ISO8859_4 is not set +# CONFIG_NLS_ISO8859_5 is not set +# CONFIG_NLS_ISO8859_6 is not set +# CONFIG_NLS_ISO8859_7 is not set +# CONFIG_NLS_ISO8859_9 is not set +# CONFIG_NLS_ISO8859_13 is not set +# CONFIG_NLS_ISO8859_14 is not set +# CONFIG_NLS_ISO8859_15 is not set +# CONFIG_NLS_KOI8_R is not set +# CONFIG_NLS_KOI8_U is not set +# CONFIG_NLS_UTF8 is not set + +# +# Kernel hacking +# +CONFIG_DEBUG_KERNEL=y +# CONFIG_DEBUG_HIGHMEM is not set +# CONFIG_DEBUG_SLAB is not set +# CONFIG_DEBUG_IOVIRT is not set +# CONFIG_MAGIC_SYSRQ is not set +# CONFIG_DEBUG_SPINLOCK is not set +# CONFIG_DEBUG_BUGVERBOSE is not set +CONFIG_KALLSYMS=y +# CONFIG_FRAME_POINTER is not set + +# +# Library routines +# +CONFIG_ZLIB_INFLATE=y +# CONFIG_ZLIB_DEFLATE is not set diff --git a/xenolinux-2.4.22-sparse/arch/xeno/drivers/balloon/Makefile b/xenolinux-2.4.22-sparse/arch/xeno/drivers/balloon/Makefile new file mode 100644 index 0000000000..f780a515e0 --- /dev/null +++ b/xenolinux-2.4.22-sparse/arch/xeno/drivers/balloon/Makefile @@ -0,0 +1,3 @@ +O_TARGET := balloon_driver.o +obj-y := balloon.o +include $(TOPDIR)/Rules.make diff --git a/xenolinux-2.4.22-sparse/arch/xeno/drivers/balloon/balloon.c b/xenolinux-2.4.22-sparse/arch/xeno/drivers/balloon/balloon.c new file mode 100644 index 0000000000..e1a6d30374 --- /dev/null +++ b/xenolinux-2.4.22-sparse/arch/xeno/drivers/balloon/balloon.c @@ -0,0 +1,282 @@ +/****************************************************************************** + * balloon.c + * + * Xeno balloon driver - enables returning/claiming memory to/from xen + * + * Copyright (c) 2003, B Dragovic + */ + +#include <linux/config.h> +#include <linux/module.h> +#include <linux/kernel.h> +#include <linux/sched.h> +#include <linux/errno.h> +#include <linux/proc_fs.h> + +#include <linux/mm.h> +#include <linux/mman.h> +#include <linux/smp_lock.h> +#include <linux/pagemap.h> + +#include <asm/hypervisor.h> +#include <asm/pgalloc.h> +#include <asm/pgtable.h> +#include <asm/uaccess.h> +#include <asm/tlb.h> + +#include "dom_mem_ops.h" + +/* USER DEFINES -- THESE SHOULD BE COPIED TO USER-SPACE TOOLS */ +#define USER_INFLATE_BALLOON 1 /* return mem to hypervisor */ +#define USER_DEFLATE_BALLOON 2 /* claim mem from hypervisor */ +typedef struct user_balloon_op { + unsigned int op; + unsigned long size; +} user_balloon_op_t; +/* END OF USER DEFINE */ + +/* Dead entry written into ballon-owned entries in the PMT. */ +#define DEAD 0xdeadbeef + +#define BALLOON_ENTRY "balloon" +extern struct proc_dir_entry *xeno_base; + +static struct proc_dir_entry *balloon_pde; +unsigned long credit; + +static inline unsigned long get_ppte(unsigned long addr) +{ + unsigned long ppte; + pgd_t *pgd; pmd_t *pmd; pte_t *ptep; + pgd = pgd_offset_k(addr); + + if ( pgd_none(*pgd) || pgd_bad(*pgd) ) BUG(); + + pmd = pmd_offset(pgd, addr); + if ( pmd_none(*pmd) || pmd_bad(*pmd) ) BUG(); + + ptep = pte_offset(pmd, addr); + ppte = (unsigned long)__pa(ptep); + + return ppte; +} + +/* main function for relinquishing bit of memory */ +static unsigned long inflate_balloon(unsigned long num_pages) +{ + dom_mem_op_t dom_mem_op; + unsigned long *parray; + unsigned long *currp; + unsigned long curraddr; + unsigned long ret = 0; + unsigned long vaddr; + unsigned long i, j; + + parray = (unsigned long *)kmalloc(num_pages * + sizeof(unsigned long), GFP_KERNEL); + currp = parray; + + for ( i = 0; i < num_pages; i++ ) + { + /* try to obtain a free page, has to be done with GFP_ATOMIC + * as we do not want to sleep indefinately. + */ + vaddr = __get_free_page(GFP_ATOMIC); + + /* if allocation fails, free all reserved pages */ + if(!vaddr){ + printk("Unable to inflate balloon by %ld, only %ld pages free.", + num_pages, i); + currp = parray; + for(j = 0; j < i; j++){ + free_page(*currp++); + } + goto cleanup; + } + + *currp++ = vaddr; + } + + + currp = parray; + for ( i = 0; i < num_pages; i++ ) + { + curraddr = *currp; + *currp = virt_to_machine(*currp) >> PAGE_SHIFT; + queue_l1_entry_update(get_ppte(curraddr) | PGREQ_NORMAL, 0); + phys_to_machine_mapping[__pa(curraddr) >> PAGE_SHIFT] = DEAD; + currp++; + } + + XENO_flush_page_update_queue(); + + dom_mem_op.op = BALLOON_INFLATE_OP; + dom_mem_op.u.balloon_inflate.size = num_pages; + dom_mem_op.u.balloon_inflate.pages = parray; + if ( (ret = HYPERVISOR_dom_mem_op(&dom_mem_op)) != num_pages ) + { + printk("Unable to inflate balloon, error %lx\n", ret); + goto cleanup; + } + + credit += num_pages; + ret = num_pages; + + cleanup: + kfree(parray); + + return ret; +} + +/* install new mem pages obtained by deflate_balloon. function walks + * phys->machine mapping table looking for DEAD entries and populates + * them. + */ +static unsigned long process_new_pages(unsigned long * parray, + unsigned long num) +{ + /* currently, this function is rather simplistic as + * it is assumed that domain reclaims only number of + * pages previously released. this is to change soon + * and the code to extend page tables etc. will be + * incorporated here. + */ + + unsigned long tot_pages = start_info.nr_pages; + unsigned long * curr = parray; + unsigned long num_installed; + unsigned long i; + + num_installed = 0; + for ( i = 0; (i < tot_pages) && (num_installed < num); i++ ) + { + if ( phys_to_machine_mapping[i] == DEAD ) + { + printk(KERN_ALERT "bd240 debug: proc_new_pages: i %lx, mpt %lx, %lx\n", i, i << PAGE_SHIFT, get_ppte((unsigned long)__va(i << PAGE_SHIFT)) | PGREQ_NORMAL); + phys_to_machine_mapping[i] = *curr; + queue_l1_entry_update((i << PAGE_SHIFT) | PGREQ_MPT_UPDATE, i); + queue_l1_entry_update( + get_ppte((unsigned long)__va(i << PAGE_SHIFT)) | PGREQ_NORMAL, + ((*curr) << PAGE_SHIFT) | L1_PROT); + + *curr = (unsigned long)__va(i << PAGE_SHIFT); + curr++; + num_installed++; + } + } + + /* now, this is tricky (and will also change for machine addrs that + * are mapped to not previously released addresses). we free pages + * that were allocated by get_free_page (the mappings are different + * now, of course). + */ + curr = parray; + for ( i = 0; i < num_installed; i++ ) + { + free_page(*curr); + curr++; + } + + return num_installed; +} + +unsigned long deflate_balloon(unsigned long num_pages) +{ + dom_mem_op_t dom_mem_op; + unsigned long ret; + unsigned long * parray; + + printk(KERN_ALERT "bd240 debug: deflate balloon called for %lx pages\n", num_pages); + + if ( num_pages > credit ) + { + printk("Can not allocate more pages than previously released.\n"); + return -EAGAIN; + } + + parray = (unsigned long *)kmalloc(num_pages * sizeof(unsigned long), + GFP_KERNEL); + + dom_mem_op.op = BALLOON_DEFLATE_OP; + dom_mem_op.u.balloon_deflate.size = num_pages; + dom_mem_op.u.balloon_deflate.pages = parray; + if((ret = HYPERVISOR_dom_mem_op(&dom_mem_op)) != num_pages){ + printk("Unable to deflate balloon, error %lx\n", ret); + goto cleanup; + } + + if((ret = process_new_pages(parray, num_pages)) < num_pages){ + printk("Unable to deflate balloon by specified %lx pages, only %lx.\n", + num_pages, ret); + goto cleanup; + } + + ret = num_pages; + credit -= num_pages; + + cleanup: + kfree(parray); + + return ret; +} + +static int balloon_write(struct file *file, const char *buffer, + u_long count, void *data) +{ + user_balloon_op_t bop; + + /* Only admin can play with the balloon :) */ + if ( !capable(CAP_SYS_ADMIN) ) + return -EPERM; + + if ( copy_from_user(&bop, buffer, sizeof(bop)) ) + return -EFAULT; + + switch ( bop.op ) + { + case USER_INFLATE_BALLOON: + if ( inflate_balloon(bop.size) < bop.size ) + return -EAGAIN; + break; + + case USER_DEFLATE_BALLOON: + deflate_balloon(bop.size); + break; + + default: + printk("Unknown command to balloon driver."); + return -EFAULT; + } + + return sizeof(bop); +} + +/* + * main balloon driver initialization function. + */ +static int __init init_module(void) +{ + printk(KERN_ALERT "Starting Xeno Balloon driver\n"); + + credit = 0; + + balloon_pde = create_proc_entry(BALLOON_ENTRY, 0600, xeno_base); + if ( balloon_pde == NULL ) + { + printk(KERN_ALERT "Unable to create balloon driver proc entry!"); + return -1; + } + + balloon_pde->write_proc = balloon_write; + + return 0; +} + +static void __exit cleanup_module(void) +{ +} + +module_init(init_module); +module_exit(cleanup_module); + + diff --git a/xenolinux-2.4.22-sparse/arch/xeno/drivers/balloon/dom_mem_ops.h b/xenolinux-2.4.22-sparse/arch/xeno/drivers/balloon/dom_mem_ops.h new file mode 100644 index 0000000000..c473f193e7 --- /dev/null +++ b/xenolinux-2.4.22-sparse/arch/xeno/drivers/balloon/dom_mem_ops.h @@ -0,0 +1,32 @@ +/****************************************************************************** + * dom_mem_ops.h + * + * Header file supporting domain related memory operations. N.B. keep in sync + * with xen version. + * + * Copyright (c) 2003, B Dragovic + */ + +#define L1_PROT (_PAGE_PRESENT|_PAGE_RW|_PAGE_USER|_PAGE_ACCESSED) +#define BALLOON_DEFLATE_OP 0 +#define BALLOON_INFLATE_OP 1 + +typedef struct balloon_deflate_op { + unsigned long size; + unsigned long * pages; +} balloon_def_op_t; + +typedef struct balloon_inflate_op { + unsigned long size; + unsigned long * pages; +} balloon_inf_op_t; + +typedef struct dom_mem_ops +{ + unsigned int op; + union + { + balloon_def_op_t balloon_deflate; + balloon_inf_op_t balloon_inflate; + }u; +} dom_mem_op_t; diff --git a/xenolinux-2.4.22-sparse/arch/xeno/drivers/block/Makefile b/xenolinux-2.4.22-sparse/arch/xeno/drivers/block/Makefile new file mode 100644 index 0000000000..6423104172 --- /dev/null +++ b/xenolinux-2.4.22-sparse/arch/xeno/drivers/block/Makefile @@ -0,0 +1,3 @@ +O_TARGET := blk.o +obj-y := xl_block.o xl_ide.o xl_scsi.o xl_segment.o xl_segment_proc.o +include $(TOPDIR)/Rules.make diff --git a/xenolinux-2.4.22-sparse/arch/xeno/drivers/block/xl_block.c b/xenolinux-2.4.22-sparse/arch/xeno/drivers/block/xl_block.c new file mode 100644 index 0000000000..743ecd9be0 --- /dev/null +++ b/xenolinux-2.4.22-sparse/arch/xeno/drivers/block/xl_block.c @@ -0,0 +1,616 @@ +/****************************************************************************** + * xl_block.c + * + * Xenolinux virtual block-device driver. + * + */ + +#include "xl_block.h" +#include <linux/blk.h> +#include <linux/cdrom.h> + +typedef unsigned char byte; /* from linux/ide.h */ + +#define XLBLK_RESPONSE_IRQ _EVENT_BLKDEV +#define DEBUG_IRQ _EVENT_DEBUG + +static blk_ring_t *blk_ring; +static unsigned int resp_cons; /* Response consumer for comms ring. */ +static unsigned int req_prod; /* Private request producer. */ +static xen_disk_info_t xlblk_disk_info; +static int xlblk_control_msg_pending; + +#define RING_FULL (BLK_RING_INC(req_prod) == resp_cons) + +/* + * Request queues with outstanding work, but ring is currently full. + * We need no special lock here, as we always access this with the + * io_request_lock held. We only need a small maximum list. + */ +#define MAX_PENDING 8 +static request_queue_t *pending_queues[MAX_PENDING]; +static int nr_pending; + +static kdev_t sg_dev; +static int sg_operation = -1; +static unsigned long sg_next_sect; +#define DISABLE_SCATTERGATHER() (sg_operation = -1) + +static inline void signal_requests_to_xen(void) +{ + DISABLE_SCATTERGATHER(); + blk_ring->req_prod = req_prod; + HYPERVISOR_block_io_op(); +} + + +inline kdev_t physdev_to_xldev(unsigned short physdev) +{ + switch (physdev & XENDEV_TYPE_MASK) { + case XENDEV_IDE: + if ( (physdev & XENDEV_IDX_MASK) < XLIDE_DEVS_PER_MAJOR) { + return MKDEV(XLIDE_MAJOR_0, + (physdev & XENDEV_IDX_MASK) << XLIDE_PARTN_SHIFT); + } else if ( (physdev & XENDEV_IDX_MASK) < (XLIDE_DEVS_PER_MAJOR * 2)) { + return MKDEV(XLIDE_MAJOR_1, + (physdev & XENDEV_IDX_MASK) << XLIDE_PARTN_SHIFT); + } + break; + case XENDEV_SCSI: + return MKDEV(XLSCSI_MAJOR, + (physdev & XENDEV_IDX_MASK) << XLSCSI_PARTN_SHIFT); + case XENDEV_VIRTUAL: + return MKDEV(XLVIRT_MAJOR, + (physdev & XENDEV_IDX_MASK) << XLVIRT_PARTN_SHIFT); + } + + return 0; +} + + +/* Convert from a XenoLinux major device to the Xen-level 'physical' device */ +inline unsigned short xldev_to_physdev(kdev_t xldev) +{ + unsigned short physdev = 0; + + switch ( MAJOR(xldev) ) + { + case XLIDE_MAJOR_0: + physdev = XENDEV_IDE + (0*XLIDE_DEVS_PER_MAJOR) + + (MINOR(xldev) >> XLIDE_PARTN_SHIFT); + break; + + case XLIDE_MAJOR_1: + physdev = XENDEV_IDE + (1*XLIDE_DEVS_PER_MAJOR) + + (MINOR(xldev) >> XLIDE_PARTN_SHIFT); + break; + + case XLSCSI_MAJOR: + physdev = XENDEV_SCSI + (MINOR(xldev) >> XLSCSI_PARTN_SHIFT); + break; + + case XLVIRT_MAJOR: + physdev = XENDEV_VIRTUAL + (MINOR(xldev) >> XLVIRT_PARTN_SHIFT); + break; + } + + return physdev; +} + + +static inline struct gendisk *xldev_to_gendisk(kdev_t xldev) +{ + struct gendisk *gd = NULL; + + switch ( MAJOR(xldev) ) + { + case XLIDE_MAJOR_0: + gd = xlide_gendisk[0]; + break; + + case XLIDE_MAJOR_1: + gd = xlide_gendisk[1]; + break; + + case XLSCSI_MAJOR: + gd = xlscsi_gendisk; + break; + + case XLVIRT_MAJOR: + gd = xlsegment_gendisk; + break; + } + + if ( gd == NULL ) BUG(); + + return gd; +} + + +static inline xl_disk_t *xldev_to_xldisk(kdev_t xldev) +{ + struct gendisk *gd = xldev_to_gendisk(xldev); + return (xl_disk_t *)gd->real_devices + + (MINOR(xldev) >> PARTN_SHIFT(xldev)); +} + + +int xenolinux_block_open(struct inode *inode, struct file *filep) +{ + xl_disk_t *disk = xldev_to_xldisk(inode->i_rdev); + disk->usage++; + DPRINTK("xenolinux_block_open\n"); + return 0; +} + + +int xenolinux_block_release(struct inode *inode, struct file *filep) +{ + xl_disk_t *disk = xldev_to_xldisk(inode->i_rdev); + disk->usage--; + DPRINTK("xenolinux_block_release\n"); + return 0; +} + +/* + * handle ioctl calls + * + * individual ioctls are defined in /usr/include/linux/fs.h + */ + +int xenolinux_block_ioctl(struct inode *inode, struct file *filep, + unsigned command, unsigned long argument) +{ + kdev_t dev = inode->i_rdev; + struct hd_geometry *geo = (struct hd_geometry *)argument; + struct gendisk *gd; + struct hd_struct *part; + + DPRINTK("xenolinux_block_ioctl\n"); + + /* check permissions */ + if (!capable(CAP_SYS_ADMIN)) return -EPERM; + if (!inode) return -EINVAL; + + DPRINTK_IOCTL("command: 0x%x, argument: 0x%lx, dev: 0x%04x\n", + command, (long) argument, dev); + + gd = xldev_to_gendisk(dev); + part = &gd->part[MINOR(dev)]; + + switch ( command ) + { + case BLKGETSIZE: + DPRINTK_IOCTL(" BLKGETSIZE: %x %lx\n", BLKGETSIZE, part->nr_sects); + return put_user(part->nr_sects, (unsigned long *) argument); + + case BLKRRPART: /* re-read partition table */ + DPRINTK_IOCTL(" BLKRRPART: %x\n", BLKRRPART); + return xenolinux_block_revalidate(dev); + + case BLKSSZGET: + switch ( MAJOR(dev) ) + { + case XLIDE_MAJOR_0: + DPRINTK_IOCTL(" BLKSSZGET: %x 0x%x\n", BLKSSZGET, + xlide_hwsect(MINOR(dev))); + return xlide_hwsect(MINOR(dev)); + + case XLSCSI_MAJOR: + DPRINTK_IOCTL(" BLKSSZGET: %x 0x%x\n", BLKSSZGET, + xlscsi_hwsect(MINOR(dev))); + return xlscsi_hwsect(MINOR(dev)); + + case XLVIRT_MAJOR: + DPRINTK_IOCTL(" BLKSSZGET: %x 0x%x\n", BLKSSZGET, + xlsegment_hwsect(MINOR(dev))); + return xlsegment_hwsect(MINOR(dev)); + + default: + printk(KERN_ALERT "BLKSSZGET ioctl() on bogus disk!\n"); + return 0; + } + + case BLKBSZGET: /* get block size */ + DPRINTK_IOCTL(" BLKBSZGET: %x\n", BLKBSZGET); + break; + + case BLKBSZSET: /* set block size */ + DPRINTK_IOCTL(" BLKBSZSET: %x\n", BLKBSZSET); + break; + + case BLKRASET: /* set read-ahead */ + DPRINTK_IOCTL(" BLKRASET: %x\n", BLKRASET); + break; + + case BLKRAGET: /* get read-ahead */ + DPRINTK_IOCTL(" BLKRAFET: %x\n", BLKRAGET); + break; + + case HDIO_GETGEO: + /* note: these values are complete garbage */ + DPRINTK_IOCTL(" HDIO_GETGEO: %x\n", HDIO_GETGEO); + if (!argument) return -EINVAL; + if (put_user(0x00, (unsigned long *) &geo->start)) return -EFAULT; + if (put_user(0xff, (byte *)&geo->heads)) return -EFAULT; + if (put_user(0x3f, (byte *)&geo->sectors)) return -EFAULT; + if (put_user(0x106, (unsigned short *)&geo->cylinders)) return -EFAULT; + return 0; + + case HDIO_GETGEO_BIG: + /* note: these values are complete garbage */ + DPRINTK_IOCTL(" HDIO_GETGEO_BIG: %x\n", HDIO_GETGEO_BIG); + if (!argument) return -EINVAL; + if (put_user(0x00, (unsigned long *) &geo->start)) return -EFAULT; + if (put_user(0xff, (byte *)&geo->heads)) return -EFAULT; + if (put_user(0x3f, (byte *)&geo->sectors)) return -EFAULT; + if (put_user(0x106, (unsigned int *) &geo->cylinders)) return -EFAULT; + return 0; + + case CDROMMULTISESSION: + printk("FIXME: support multisession CDs later\n"); + memset((struct cdrom_multisession *)argument, 0, + sizeof(struct cdrom_multisession)); + return 0; + + default: + printk("ioctl %08x not supported by xl_block\n", command); + return -ENOSYS; + } + + return 0; +} + +int xenolinux_block_check(kdev_t dev) +{ + DPRINTK("xenolinux_block_check\n"); + return 0; +} + +int xenolinux_block_revalidate(kdev_t dev) +{ + struct gendisk *gd = xldev_to_gendisk(dev); + xl_disk_t *disk = xldev_to_xldisk(dev); + unsigned long flags; + int i, partn_shift = PARTN_SHIFT(dev); + int xdev = dev & XENDEV_IDX_MASK; + + DPRINTK("xenolinux_block_revalidate: %d %d %d\n", + dev, xdev, XENDEV_IDX_MASK); + + spin_lock_irqsave(&io_request_lock, flags); + if ( disk->usage > 1 ) + { + spin_unlock_irqrestore(&io_request_lock, flags); + return -EBUSY; + } + spin_unlock_irqrestore(&io_request_lock, flags); + + for ( i = 0; i < (1 << partn_shift); i++ ) + { + invalidate_device(xdev + i, 1); + gd->part[xdev + i].start_sect = 0; + gd->part[xdev + i].nr_sects = 0; + } + + grok_partitions(gd, MINOR(dev) >> partn_shift, + 1 << partn_shift, disk->capacity); + + return 0; +} + + +/* + * hypervisor_request + * + * request block io + * + * id: for guest use only. + * operation: XEN_BLOCK_{READ,WRITE,PROBE*,SEG*} + * buffer: buffer to read/write into. this should be a + * virtual address in the guest os. + */ +static int hypervisor_request(unsigned long id, + int operation, + char * buffer, + unsigned long sector_number, + unsigned short nr_sectors, + kdev_t device) +{ + unsigned long buffer_ma = phys_to_machine(virt_to_phys(buffer)); + kdev_t phys_device = (kdev_t) 0; + struct gendisk *gd; + blk_ring_req_entry_t *req; + struct buffer_head *bh; + + if ( nr_sectors >= (1<<9) ) BUG(); + if ( (buffer_ma & ((1<<9)-1)) != 0 ) BUG(); + + switch ( operation ) + { + case XEN_BLOCK_SEG_CREATE: + case XEN_BLOCK_SEG_DELETE: + case XEN_BLOCK_PHYSDEV_GRANT: + case XEN_BLOCK_PHYSDEV_PROBE: + case XEN_BLOCK_PROBE_BLK: + case XEN_BLOCK_PROBE_SEG: + case XEN_BLOCK_PROBE_SEG_ALL: + if ( RING_FULL ) return 1; + phys_device = (kdev_t) 0; + sector_number = 0; + DISABLE_SCATTERGATHER(); + break; + + case XEN_BLOCK_READ: + case XEN_BLOCK_WRITE: + phys_device = xldev_to_physdev(device); + gd = xldev_to_gendisk(device); + + sector_number += gd->part[MINOR(device)].start_sect; + if ( (sg_operation == operation) && + (sg_dev == phys_device) && + (sg_next_sect == sector_number) ) + { + req = &blk_ring->ring[(req_prod-1)&(BLK_RING_SIZE-1)].req; + bh = (struct buffer_head *)id; + bh->b_reqnext = (struct buffer_head *)req->id; + req->id = id; + req->buffer_and_sects[req->nr_segments] = buffer_ma | nr_sectors; + if ( ++req->nr_segments < MAX_BLK_SEGS ) + sg_next_sect += nr_sectors; + else + DISABLE_SCATTERGATHER(); + return 0; + } + else if ( RING_FULL ) + { + return 1; + } + else + { + sg_operation = operation; + sg_dev = phys_device; + sg_next_sect = sector_number + nr_sectors; + } + break; + + default: + panic("unknown op %d\n", operation); + } + + /* Fill out a communications ring structure. */ + req = &blk_ring->ring[req_prod].req; + req->id = id; + req->operation = operation; + req->sector_number = sector_number; + req->device = phys_device; + req->nr_segments = 1; + req->buffer_and_sects[0] = buffer_ma | nr_sectors; + req_prod = BLK_RING_INC(req_prod); + + return 0; +} + + +/* + * do_xlblk_request + * read a block; request is in a request queue + */ +void do_xlblk_request(request_queue_t *rq) +{ + struct request *req; + struct buffer_head *bh, *next_bh; + int rw, nsect, full, queued = 0; + + DPRINTK("xlblk.c::do_xlblk_request for '%s'\n", DEVICE_NAME); + + while ( !rq->plugged && !list_empty(&rq->queue_head)) + { + if ( (req = blkdev_entry_next_request(&rq->queue_head)) == NULL ) + goto out; + + DPRINTK("do_xlblk_request %p: cmd %i, sec %lx, (%li/%li) bh:%p\n", + req, req->cmd, req->sector, + req->current_nr_sectors, req->nr_sectors, req->bh); + + rw = req->cmd; + if ( rw == READA ) rw = READ; + if ((rw != READ) && (rw != WRITE)) + panic("XenoLinux Virtual Block Device: bad cmd: %d\n", rw); + + req->errors = 0; + + bh = req->bh; + while ( bh != NULL ) + { + next_bh = bh->b_reqnext; + bh->b_reqnext = NULL; + + full = hypervisor_request( + (unsigned long)bh, + (rw == READ) ? XEN_BLOCK_READ : XEN_BLOCK_WRITE, + bh->b_data, bh->b_rsector, bh->b_size>>9, bh->b_rdev); + + if ( full ) + { + bh->b_reqnext = next_bh; + pending_queues[nr_pending++] = rq; + if ( nr_pending >= MAX_PENDING ) BUG(); + goto out; + } + + queued++; + + /* Dequeue the buffer head from the request. */ + nsect = bh->b_size >> 9; + bh = req->bh = next_bh; + + if ( bh != NULL ) + { + /* There's another buffer head to do. Update the request. */ + req->hard_sector += nsect; + req->hard_nr_sectors -= nsect; + req->sector = req->hard_sector; + req->nr_sectors = req->hard_nr_sectors; + req->current_nr_sectors = bh->b_size >> 9; + req->buffer = bh->b_data; + } + else + { + /* That was the last buffer head. Finalise the request. */ + if ( end_that_request_first(req, 1, "XenBlk") ) BUG(); + blkdev_dequeue_request(req); + end_that_request_last(req); + } + } + } + + out: + if ( queued != 0 ) signal_requests_to_xen(); +} + + +static void xlblk_response_int(int irq, void *dev_id, struct pt_regs *ptregs) +{ + int i; + unsigned long flags; + struct buffer_head *bh, *next_bh; + + spin_lock_irqsave(&io_request_lock, flags); + + for ( i = resp_cons; + i != blk_ring->resp_prod; + i = BLK_RING_INC(i) ) + { + blk_ring_resp_entry_t *bret = &blk_ring->ring[i].resp; + switch (bret->operation) + { + case XEN_BLOCK_READ: + case XEN_BLOCK_WRITE: + if ( bret->status ) + DPRINTK("Bad return from blkdev data request: %lx\n", + bret->status); + for ( bh = (struct buffer_head *)bret->id; + bh != NULL; + bh = next_bh ) + { + next_bh = bh->b_reqnext; + bh->b_reqnext = NULL; + bh->b_end_io(bh, !bret->status); + } + break; + + case XEN_BLOCK_SEG_CREATE: + case XEN_BLOCK_SEG_DELETE: + case XEN_BLOCK_PROBE_SEG: + case XEN_BLOCK_PROBE_SEG_ALL: + case XEN_BLOCK_PROBE_BLK: + case XEN_BLOCK_PHYSDEV_GRANT: + case XEN_BLOCK_PHYSDEV_PROBE: + xlblk_control_msg_pending = bret->status; + break; + + default: + BUG(); + } + } + + resp_cons = i; + + /* We kick pending request queues if the ring is reasonably empty. */ + if ( (nr_pending != 0) && + (((req_prod - resp_cons) & (BLK_RING_SIZE - 1)) < + (BLK_RING_SIZE >> 1)) ) + { + /* Attempt to drain the queue, but bail if the ring becomes full. */ + while ( nr_pending != 0 ) + { + do_xlblk_request(pending_queues[--nr_pending]); + if ( RING_FULL ) break; + } + } + + spin_unlock_irqrestore(&io_request_lock, flags); +} + + +/* Send a synchronous message to Xen. */ +int xenolinux_control_msg(int operation, char *buffer, int size) +{ + unsigned long flags; + char *aligned_buf; + + /* We copy from an aligned buffer, as interface needs sector alignment. */ + aligned_buf = (char *)get_free_page(GFP_KERNEL); + if ( aligned_buf == NULL ) BUG(); + memcpy(aligned_buf, buffer, size); + + xlblk_control_msg_pending = 2; + spin_lock_irqsave(&io_request_lock, flags); + /* Note that size gets rounded up to a sector-sized boundary. */ + if ( hypervisor_request(0, operation, aligned_buf, 0, (size+511)/512, 0) ) + return -EAGAIN; + signal_requests_to_xen(); + spin_unlock_irqrestore(&io_request_lock, flags); + while ( xlblk_control_msg_pending == 2 ) barrier(); + + memcpy(buffer, aligned_buf, size); + free_page((unsigned long)aligned_buf); + + return xlblk_control_msg_pending ? -EINVAL : 0; +} + + +int __init xlblk_init(void) +{ + int error; + + xlblk_control_msg_pending = 0; + nr_pending = 0; + + /* This mapping was created early at boot time. */ + blk_ring = (blk_ring_t *)fix_to_virt(FIX_BLKRING_BASE); + blk_ring->req_prod = blk_ring->resp_prod = resp_cons = req_prod = 0; + + error = request_irq(XLBLK_RESPONSE_IRQ, xlblk_response_int, + SA_SAMPLE_RANDOM, "blkdev", NULL); + if ( error ) + { + printk(KERN_ALERT "Could not allocate receive interrupt\n"); + goto fail; + } + + /* Probe for disk information. */ + memset(&xlblk_disk_info, 0, sizeof(xlblk_disk_info)); + error = xenolinux_control_msg(XEN_BLOCK_PROBE_BLK, + (char *)&xlblk_disk_info, + sizeof(xen_disk_info_t)); + if ( error ) + { + printk(KERN_ALERT "Could not probe disks (%d)\n", error); + free_irq(XLBLK_RESPONSE_IRQ, NULL); + goto fail; + } + + /* Pass the information to our fake IDE and SCSI susbystems. */ + xlide_init(&xlblk_disk_info); + xlscsi_init(&xlblk_disk_info); + + return 0; + + fail: + return error; +} + +static void __exit xlblk_cleanup(void) +{ + xlide_cleanup(); + xlscsi_cleanup(); + free_irq(XLBLK_RESPONSE_IRQ, NULL); +} + + +#ifdef MODULE +module_init(xlblk_init); +module_exit(xlblk_cleanup); +#endif diff --git a/xenolinux-2.4.22-sparse/arch/xeno/drivers/block/xl_block.h b/xenolinux-2.4.22-sparse/arch/xeno/drivers/block/xl_block.h new file mode 100644 index 0000000000..00056bf09e --- /dev/null +++ b/xenolinux-2.4.22-sparse/arch/xeno/drivers/block/xl_block.h @@ -0,0 +1,110 @@ +/****************************************************************************** + * xl_block.h + * + * Shared definitions between all levels of XenoLinux Virtual block devices. + */ + +#ifndef __XL_BLOCK_H__ +#define __XL_BLOCK_H__ + +#include <linux/config.h> +#include <linux/module.h> + +#include <linux/kernel.h> +#include <linux/sched.h> +#include <linux/slab.h> +#include <linux/string.h> +#include <linux/errno.h> + +#include <linux/fs.h> +#include <linux/hdreg.h> +#include <linux/blkdev.h> +#include <linux/major.h> + +#include <asm/hypervisor-ifs/block.h> +#include <asm/hypervisor-ifs/hypervisor-if.h> +#include <asm/io.h> +#include <asm/atomic.h> +#include <asm/uaccess.h> + +#if 0 +#define DPRINTK(_f, _a...) printk ( KERN_ALERT _f , ## _a ) +#else +#define DPRINTK(_f, _a...) ((void)0) +#endif + +#if 0 +#define DPRINTK_IOCTL(_f, _a...) printk ( KERN_ALERT _f , ## _a ) +#else +#define DPRINTK_IOCTL(_f, _a...) ((void)0) +#endif + +/* XL IDE and SCSI use same major/minor numbers as normal Linux devices. */ +#define XLIDE_MAJOR_0 IDE0_MAJOR +#define XLIDE_MAJOR_1 IDE1_MAJOR +#define XLSCSI_MAJOR SCSI_DISK0_MAJOR + +#define XLIDE_PARTN_SHIFT 6 +#define XLSCSI_PARTN_SHIFT 4 +#define XLVIRT_PARTN_SHIFT 4 + +static inline int PARTN_SHIFT(kdev_t dev) +{ + switch ( MAJOR(dev) ) + { + case XLIDE_MAJOR_0: + case XLIDE_MAJOR_1: + return XLIDE_PARTN_SHIFT; + case XLSCSI_MAJOR: + return XLSCSI_PARTN_SHIFT; + case XLVIRT_MAJOR: + return XLVIRT_PARTN_SHIFT; + default: + BUG(); + } +} + +#define XLIDE_DEVS_PER_MAJOR 2 +#define XLSCSI_DEVS_PER_MAJOR 16 +#define XLVIRT_DEVS_PER_MAJOR 16 + +/* + * We have one of these per XL-IDE, XL-SCSI, and XL-VIRT device. + * They hang in an array off the gendisk structure. We may end up putting + * all kinds of interesting stuff here :-) + */ +typedef struct xl_disk { + int usage; + unsigned long capacity; +} xl_disk_t; + +/* Generic layer. */ +extern int xenolinux_control_msg(int operration, char *buffer, int size); +extern int xenolinux_block_open(struct inode *inode, struct file *filep); +extern int xenolinux_block_release(struct inode *inode, struct file *filep); +extern int xenolinux_block_ioctl(struct inode *inode, struct file *filep, + unsigned command, unsigned long argument); +extern int xenolinux_block_check(kdev_t dev); +extern int xenolinux_block_revalidate(kdev_t dev); +extern void do_xlblk_request (request_queue_t *rq); + +/* Fake IDE subsystem. */ +extern int xlide_init(xen_disk_info_t *xdi); +extern int xlide_hwsect(int minor); +extern void xlide_cleanup(void); +extern struct gendisk *xlide_gendisk[]; + +/* Fake SCSI subsystem. */ +extern int xlscsi_init(xen_disk_info_t *xdi); +extern int xlscsi_hwsect(int minor); +extern void xlscsi_cleanup(void); +extern struct gendisk *xlscsi_gendisk; + +/* Virtual block-device subsystem. */ +extern int xlsegment_hwsect(int minor); +extern struct gendisk *xlsegment_gendisk; + +extern unsigned short xldev_to_physdev(kdev_t xldev); +extern kdev_t physdev_to_xldev(unsigned short physdev); + +#endif /* __XL_BLOCK_H__ */ diff --git a/xenolinux-2.4.22-sparse/arch/xeno/drivers/block/xl_ide.c b/xenolinux-2.4.22-sparse/arch/xeno/drivers/block/xl_ide.c new file mode 100644 index 0000000000..714c6bf082 --- /dev/null +++ b/xenolinux-2.4.22-sparse/arch/xeno/drivers/block/xl_ide.c @@ -0,0 +1,204 @@ +/****************************************************************************** + * xl_ide.c + * + * Xenolinux virtual IDE block-device driver. + */ + +#include "xl_block.h" +#include <linux/blk.h> + +#define XLIDE_MAX 256 +#define XLIDE_MAJOR_NAME "hd" +static int xlide_blksize_size[XLIDE_MAX]; +static int xlide_hardsect_size[XLIDE_MAX]; +static int xlide_max_sectors[XLIDE_MAX]; + +#define XLIDE_NR_MAJORS 2 + +struct gendisk *xlide_gendisk[XLIDE_NR_MAJORS] = { NULL }; + +static struct block_device_operations xlide_block_fops = +{ + open: xenolinux_block_open, + release: xenolinux_block_release, + ioctl: xenolinux_block_ioctl, + check_media_change: xenolinux_block_check, + revalidate: xenolinux_block_revalidate, +}; + +int xlide_hwsect(int minor) +{ + return xlide_hardsect_size[minor]; +} + +static int get_major(int major) +{ + int r = register_blkdev(major, XLIDE_MAJOR_NAME, &xlide_block_fops); + if ( r < 0 ) + printk (KERN_ALERT "XL IDE: can't get major %d\n", XLIDE_MAJOR_0); + return r; +} + +static void setup_major(struct gendisk **pgd, + xen_disk_info_t *xdi, int base, int major) +{ + int i, minors, disk, units = XLIDE_DEVS_PER_MAJOR; + unsigned short minor; + unsigned char buf[64]; + struct gendisk *gd; + + blk_size[major] = NULL; + blksize_size[major] = xlide_blksize_size + base*(1<<XLIDE_PARTN_SHIFT); + hardsect_size[major] = xlide_hardsect_size + base*(1<<XLIDE_PARTN_SHIFT); + max_sectors[major] = xlide_max_sectors + base*(1<<XLIDE_PARTN_SHIFT); + read_ahead[major] = 8; + + blk_init_queue(BLK_DEFAULT_QUEUE(major), do_xlblk_request); + + /* + * Turn off barking 'headactive' mode. We dequeue buffer heads as soon as + * we pass them down to Xen. + */ + blk_queue_headactive(BLK_DEFAULT_QUEUE(major), 0); + + /* Construct an appropriate gendisk structure. */ + minors = units * (1<<XLIDE_PARTN_SHIFT); + gd = kmalloc(sizeof(struct gendisk), GFP_KERNEL); + gd->sizes = kmalloc(minors * sizeof(int), GFP_KERNEL); + gd->part = kmalloc(minors * sizeof(struct hd_struct), GFP_KERNEL); + gd->major = major; + gd->major_name = XLIDE_MAJOR_NAME; + gd->minor_shift = XLIDE_PARTN_SHIFT; + gd->max_p = 1<<XLIDE_PARTN_SHIFT; + gd->nr_real = units; + gd->real_devices = kmalloc(units * sizeof(xl_disk_t), GFP_KERNEL); + gd->next = NULL; + gd->fops = &xlide_block_fops; + gd->de_arr = kmalloc(sizeof(*gd->de_arr) * units, GFP_KERNEL); + gd->flags = kmalloc(sizeof(*gd->flags) * units, GFP_KERNEL); + memset(gd->sizes, 0, minors * sizeof(int)); + memset(gd->part, 0, minors * sizeof(struct hd_struct)); + memset(gd->de_arr, 0, sizeof(*gd->de_arr) * units); + memset(gd->flags, 0, sizeof(*gd->flags) * units); + memset(gd->real_devices, 0, sizeof(xl_disk_t) * units); + *pgd = gd; + add_gendisk(gd); + + /* Now register each disk in turn. */ + for ( i = 0; i < xdi->count; i++ ) + { + disk = xdi->disks[i].device & XENDEV_IDX_MASK; + minor = (disk-base) << XLIDE_PARTN_SHIFT; + + if ( !IS_IDE_XENDEV(xdi->disks[i].device) || + (disk < base) || (disk >= (base + XLIDE_DEVS_PER_MAJOR)) ) + continue; + + ((xl_disk_t *)gd->real_devices)[disk].capacity = + xdi->disks[i].capacity; + + switch (xdi->disks[i].type) + { + case XD_TYPE_CDROM: + set_device_ro(MKDEV(major, minor), 1); + + case XD_TYPE_FLOPPY: + case XD_TYPE_TAPE: + gd->flags[disk] = GENHD_FL_REMOVABLE; + printk(KERN_ALERT "Skipping partition check on %s /dev/%s\n", + xdi->disks[i].type==XD_TYPE_CDROM ? "cdrom" : + (xdi->disks[i].type==XD_TYPE_TAPE ? "tape" : "floppy"), + disk_name(gd, minor, buf)); + break; + + case XD_TYPE_DISK: + register_disk(gd, + MKDEV(major, minor), + 1<<XLIDE_PARTN_SHIFT, + &xlide_block_fops, xdi->disks[i].capacity); + break; + + default: + printk(KERN_ALERT "XenoLinux: unknown ide device type %d\n", + xdi->disks[i].type); + break; + } + } + + return; +} + + +int xlide_init(xen_disk_info_t *xdi) +{ + int i, units; + + /* If we don't have any usable IDE devices we may as well bail now. */ + units = 0; + for ( i = 0; i < xdi->count; i++ ) + if ( IS_IDE_XENDEV(xdi->disks[i].device) && + ((xdi->disks[i].device & XENDEV_IDX_MASK) < + (XLIDE_NR_MAJORS*XLIDE_DEVS_PER_MAJOR)) ) + units++; + if ( units == 0 ) return 0; + + SET_MODULE_OWNER(&xlide_block_fops); + + if ( get_major(XLIDE_MAJOR_0) < 0 ) + return 0; + if ( get_major(XLIDE_MAJOR_1) < 0 ) + { + (void)unregister_blkdev(XLIDE_MAJOR_0, XLIDE_MAJOR_NAME); + return 0; + } + + /* Initialize global arrays. */ + for ( i = 0; i < XLIDE_MAX; i++ ) + { + xlide_blksize_size[i] = 512; + xlide_hardsect_size[i] = 512; + xlide_max_sectors[i] = 128; + } + + setup_major(&xlide_gendisk[0], xdi, 0*XLIDE_DEVS_PER_MAJOR, XLIDE_MAJOR_0); + setup_major(&xlide_gendisk[1], xdi, 1*XLIDE_DEVS_PER_MAJOR, XLIDE_MAJOR_1); + + return 0; +} + + +static void cleanup_major(int major) +{ + blk_cleanup_queue(BLK_DEFAULT_QUEUE(major)); + + read_ahead[major] = 0; + + if ( blksize_size[major] != NULL ) + { + kfree(blksize_size[major]); + blksize_size[major] = NULL; + } + + if ( hardsect_size[major] != NULL ) + { + kfree(hardsect_size[major]); + hardsect_size[major] = NULL; + } + + if ( max_sectors[major] != NULL ) + { + kfree(max_sectors[major]); + max_sectors[major] = NULL; + } + + (void)unregister_blkdev(major, XLIDE_MAJOR_NAME); +} + +void xlide_cleanup(void) +{ + if ( xlide_gendisk[0] == NULL ) return; + xlide_gendisk[0] = NULL; + cleanup_major(XLIDE_MAJOR_0); + cleanup_major(XLIDE_MAJOR_1); +} + diff --git a/xenolinux-2.4.22-sparse/arch/xeno/drivers/block/xl_scsi.c b/xenolinux-2.4.22-sparse/arch/xeno/drivers/block/xl_scsi.c new file mode 100644 index 0000000000..c0f389f181 --- /dev/null +++ b/xenolinux-2.4.22-sparse/arch/xeno/drivers/block/xl_scsi.c @@ -0,0 +1,168 @@ +/****************************************************************************** + * xl_scsi.c + * + * Xenolinux virtual SCSI block-device driver. + */ + +#include "xl_block.h" + +#define MAJOR_NR XLSCSI_MAJOR +#include <linux/blk.h> + +#define XLSCSI_MAX 256 +#define XLSCSI_MAJOR_NAME "sd" +static int xlscsi_blksize_size[XLSCSI_MAX]; +static int xlscsi_hardsect_size[XLSCSI_MAX]; +static int xlscsi_max_sectors[XLSCSI_MAX]; + +struct gendisk *xlscsi_gendisk = NULL; + +static struct block_device_operations xlscsi_block_fops = +{ + open: xenolinux_block_open, + release: xenolinux_block_release, + ioctl: xenolinux_block_ioctl, + check_media_change: xenolinux_block_check, + revalidate: xenolinux_block_revalidate, +}; + + +/* tiny inteface fn */ +int xlscsi_hwsect(int minor) +{ + return xlscsi_hardsect_size[minor]; +} + + +int xlscsi_init(xen_disk_info_t *xdi) +{ + int i, result, units, minors, disk; + struct gendisk *gd; + + /* If we don't have any usable SCSI devices we may as well bail now. */ + units = 0; + for ( i = 0; i < xdi->count; i++ ) + if ( IS_SCSI_XENDEV(xdi->disks[i].device) && + ((xdi->disks[i].device & XENDEV_IDX_MASK) < + XLSCSI_DEVS_PER_MAJOR) ) + units++; + if ( units == 0 ) return 0; + + SET_MODULE_OWNER(&xlscsi_block_fops); + + result = register_blkdev(XLSCSI_MAJOR, XLSCSI_MAJOR_NAME, + &xlscsi_block_fops); + if ( result < 0 ) + { + printk (KERN_ALERT "XL SCSI: can't get major %d\n", XLSCSI_MAJOR); + return result; + } + + /* Initialize global arrays. */ + for ( i = 0; i < XLSCSI_MAX; i++ ) + { + xlscsi_blksize_size[i] = 1024; //XXX 512; + xlscsi_hardsect_size[i] = 512; + xlscsi_max_sectors[i] = 128*8; //XXX 128 + } + + blk_size[XLSCSI_MAJOR] = NULL; + blksize_size[XLSCSI_MAJOR] = xlscsi_blksize_size; + hardsect_size[XLSCSI_MAJOR] = xlscsi_hardsect_size; + max_sectors[XLSCSI_MAJOR] = xlscsi_max_sectors; + read_ahead[XLSCSI_MAJOR] = 0; //XXX8; + + blk_init_queue(BLK_DEFAULT_QUEUE(XLSCSI_MAJOR), do_xlblk_request); + + /* + * Turn off barking 'headactive' mode. We dequeue buffer heads as + * soon as we pass them down to Xen. + */ + blk_queue_headactive(BLK_DEFAULT_QUEUE(XLSCSI_MAJOR), 0); + + units = XLSCSI_MAX >> XLSCSI_PARTN_SHIFT; + + /* Construct an appropriate gendisk structure. */ + minors = units * (1<<XLSCSI_PARTN_SHIFT); + gd = kmalloc(sizeof(struct gendisk), GFP_KERNEL); + gd->sizes = kmalloc(minors * sizeof(int), GFP_KERNEL); + gd->part = kmalloc(minors * sizeof(struct hd_struct), GFP_KERNEL); + gd->major = XLSCSI_MAJOR; + gd->major_name = XLSCSI_MAJOR_NAME; + gd->minor_shift = XLSCSI_PARTN_SHIFT; + gd->max_p = 1<<XLSCSI_PARTN_SHIFT; + gd->nr_real = units; + gd->real_devices = kmalloc(units * sizeof(xl_disk_t), GFP_KERNEL); + gd->next = NULL; + gd->fops = &xlscsi_block_fops; + gd->de_arr = kmalloc(sizeof(*gd->de_arr) * units, GFP_KERNEL); + gd->flags = kmalloc(sizeof(*gd->flags) * units, GFP_KERNEL); + memset(gd->sizes, 0, minors * sizeof(int)); + memset(gd->part, 0, minors * sizeof(struct hd_struct)); + memset(gd->de_arr, 0, sizeof(*gd->de_arr) * units); + memset(gd->flags, 0, sizeof(*gd->flags) * units); + memset(gd->real_devices, 0, sizeof(xl_disk_t) * units); + xlscsi_gendisk = gd; + add_gendisk(gd); + + /* Now register each disk in turn. */ + for ( i = 0; i < xdi->count; i++ ) + { + disk = xdi->disks[i].device & XENDEV_IDX_MASK; + + if ( !IS_SCSI_XENDEV(xdi->disks[i].device) || + (disk >= XLSCSI_DEVS_PER_MAJOR) ) + continue; + + ((xl_disk_t *)gd->real_devices)[disk].capacity = + xdi->disks[i].capacity; + register_disk(gd, + MKDEV(XLSCSI_MAJOR, disk<<XLSCSI_PARTN_SHIFT), + 1<<XLSCSI_PARTN_SHIFT, + &xlscsi_block_fops, + xdi->disks[i].capacity); + } + + printk(KERN_ALERT + "XenoLinux Virtual SCSI Device Driver installed [device: %d]\n", + XLSCSI_MAJOR); + + return 0; +} + + +void xlscsi_cleanup(void) +{ + if ( xlscsi_gendisk == NULL ) return; + + blk_cleanup_queue(BLK_DEFAULT_QUEUE(XLSCSI_MAJOR)); + + xlscsi_gendisk = NULL; + + read_ahead[XLSCSI_MAJOR] = 0; + + if ( blksize_size[XLSCSI_MAJOR] != NULL ) + { + kfree(blksize_size[XLSCSI_MAJOR]); + blksize_size[XLSCSI_MAJOR] = NULL; + } + + if ( hardsect_size[XLSCSI_MAJOR] != NULL ) + { + kfree(hardsect_size[XLSCSI_MAJOR]); + hardsect_size[XLSCSI_MAJOR] = NULL; + } + + if ( max_sectors[XLSCSI_MAJOR] != NULL ) + { + kfree(max_sectors[XLSCSI_MAJOR]); + max_sectors[XLSCSI_MAJOR] = NULL; + } + + if ( unregister_blkdev(XLSCSI_MAJOR, XLSCSI_MAJOR_NAME) != 0 ) + { + printk(KERN_ALERT + "XenoLinux Virtual SCSI Device Driver uninstalled w/ errs\n"); + } +} + diff --git a/xenolinux-2.4.22-sparse/arch/xeno/drivers/block/xl_segment.c b/xenolinux-2.4.22-sparse/arch/xeno/drivers/block/xl_segment.c new file mode 100644 index 0000000000..e746e2db3d --- /dev/null +++ b/xenolinux-2.4.22-sparse/arch/xeno/drivers/block/xl_segment.c @@ -0,0 +1,182 @@ +/****************************************************************************** + * xl_segment.c + * + * Xenolinux virtual block-device driver (xvd). + * + */ + +#include "xl_block.h" + +#define MAJOR_NR XLVIRT_MAJOR +#include <linux/blk.h> + +/* Copied from linux/ide.h */ +typedef unsigned char byte; + +#define XLVIRT_MAX 256 +#define XLVIRT_MAJOR_NAME "xvd" +static int xlseg_blksize_size[XLVIRT_MAX]; +static int xlseg_hardsect_size[XLVIRT_MAX]; +static int xlseg_max_sectors[XLVIRT_MAX]; + +struct gendisk *xlsegment_gendisk = NULL; + +static xen_disk_info_t xlseg_disk_info; + +static struct block_device_operations xlsegment_block_fops = +{ + open: xenolinux_block_open, + release: xenolinux_block_release, + ioctl: xenolinux_block_ioctl, + check_media_change: xenolinux_block_check, + revalidate: xenolinux_block_revalidate, +}; + + +int xlsegment_hwsect(int minor) +{ + return xlseg_hardsect_size[minor]; +} + + +int __init xlseg_init(void) +{ + int i, result, units, minors, disk; + xen_disk_info_t *xdi = &xlseg_disk_info; + struct gendisk *gd; + + SET_MODULE_OWNER(&xlsegment_block_fops); + + /* Probe for disk information. */ + memset(xdi, 0, sizeof(*xdi)); + xenolinux_control_msg(XEN_BLOCK_PROBE_SEG, (char *)xdi, sizeof(*xdi)); + + DPRINTK("xvd block device probe:\n"); + for ( i = 0; i < xdi->count; i++ ) + { + DPRINTK(" %2d: device: %d, capacity: %ld\n", + i, xdi->disks[i].device, xdi->disks[i].capacity); + } + + result = register_blkdev(XLVIRT_MAJOR, XLVIRT_MAJOR_NAME, + &xlsegment_block_fops); + if ( result < 0 ) + { + printk(KERN_ALERT "XL Segment: can't get major %d\n", XLVIRT_MAJOR); + return result; + } + + /* Initialize global arrays. */ + for (i = 0; i < XLVIRT_MAX; i++) + { + xlseg_blksize_size[i] = 512; + xlseg_hardsect_size[i] = 512; + xlseg_max_sectors[i] = 128; + } + + blk_size[XLVIRT_MAJOR] = NULL; + blksize_size[XLVIRT_MAJOR] = xlseg_blksize_size; + hardsect_size[XLVIRT_MAJOR] = xlseg_hardsect_size; + max_sectors[XLVIRT_MAJOR] = xlseg_max_sectors; + read_ahead[XLVIRT_MAJOR] = 8; + + blk_init_queue(BLK_DEFAULT_QUEUE(XLVIRT_MAJOR), do_xlblk_request); + + /* + * Turn off barking 'headactive' mode. We dequeue buffer heads as + * soon as we pass them down to Xen. + */ + blk_queue_headactive(BLK_DEFAULT_QUEUE(XLVIRT_MAJOR), 0); + + units = XLVIRT_MAX >> XLVIRT_PARTN_SHIFT; + + /* Construct an appropriate gendisk structure. */ + minors = units * (1<<XLVIRT_PARTN_SHIFT); + gd = kmalloc(sizeof(struct gendisk), GFP_KERNEL); + gd->sizes = kmalloc(minors * sizeof(int), GFP_KERNEL); + gd->part = kmalloc(minors * sizeof(struct hd_struct), GFP_KERNEL); + gd->major = XLVIRT_MAJOR; + gd->major_name = XLVIRT_MAJOR_NAME; + gd->minor_shift = XLVIRT_PARTN_SHIFT; + gd->max_p = 1<<XLVIRT_PARTN_SHIFT; + gd->nr_real = units; + gd->real_devices = kmalloc(units * sizeof(xl_disk_t), GFP_KERNEL); + gd->next = NULL; + gd->fops = &xlsegment_block_fops; + gd->de_arr = kmalloc(sizeof(*gd->de_arr) * units, GFP_KERNEL); + gd->flags = kmalloc(sizeof(*gd->flags) * units, GFP_KERNEL); + memset(gd->sizes, 0, minors * sizeof(int)); + memset(gd->part, 0, minors * sizeof(struct hd_struct)); + memset(gd->de_arr, 0, sizeof(*gd->de_arr) * units); + memset(gd->flags, 0, sizeof(*gd->flags) * units); + memset(gd->real_devices, 0, sizeof(xl_disk_t) * units); + xlsegment_gendisk = gd; + add_gendisk(gd); + + /* Now register each disk in turn. */ + for ( i = 0; i < xdi->count; i++ ) + { + disk = xdi->disks[i].device & XENDEV_IDX_MASK; + + if ( !IS_VIRTUAL_XENDEV(xdi->disks[i].device) || + (disk >= XLVIRT_DEVS_PER_MAJOR) ) + continue; + + ((xl_disk_t *)gd->real_devices)[disk].capacity = + xdi->disks[i].capacity; + register_disk(gd, + MKDEV(XLVIRT_MAJOR, disk<<XLVIRT_PARTN_SHIFT), + 1<<XLVIRT_PARTN_SHIFT, + &xlsegment_block_fops, + xdi->disks[i].capacity); + } + + printk(KERN_ALERT + "XenoLinux Virtual Segment Device Driver installed [device: %d]\n", + XLVIRT_MAJOR); + + return 0; +} + + +static void __exit xlseg_cleanup(void) +{ + if ( xlsegment_gendisk == NULL ) return; + + blk_cleanup_queue(BLK_DEFAULT_QUEUE(XLVIRT_MAJOR)); + + xlsegment_gendisk = NULL; + + read_ahead[XLVIRT_MAJOR] = 0; + + if ( blksize_size[XLVIRT_MAJOR] != NULL ) + { + kfree(blksize_size[XLVIRT_MAJOR]); + blksize_size[XLVIRT_MAJOR] = NULL; + } + + if ( hardsect_size[XLVIRT_MAJOR] != NULL ) + { + kfree(hardsect_size[XLVIRT_MAJOR]); + hardsect_size[XLVIRT_MAJOR] = NULL; + } + + if ( max_sectors[XLVIRT_MAJOR] != NULL ) + { + kfree(max_sectors[XLVIRT_MAJOR]); + max_sectors[XLVIRT_MAJOR] = NULL; + } + + if ( unregister_blkdev(XLVIRT_MAJOR, XLVIRT_MAJOR_NAME) != 0 ) + { + printk(KERN_ALERT + "XenoLinux Virtual Segment Device Driver" + " uninstalled w/ errs\n"); + } +} + + +#ifdef MODULE +module_init(xlseg_init); +module_exit(xlseg_cleanup); +#endif diff --git a/xenolinux-2.4.22-sparse/arch/xeno/drivers/block/xl_segment_proc.c b/xenolinux-2.4.22-sparse/arch/xeno/drivers/block/xl_segment_proc.c new file mode 100644 index 0000000000..464707362a --- /dev/null +++ b/xenolinux-2.4.22-sparse/arch/xeno/drivers/block/xl_segment_proc.c @@ -0,0 +1,333 @@ +/* + * xl_segment_proc.c + * + * XenoLinux virtual disk proc interface . + */ + +#include "xl_block.h" +#include <linux/proc_fs.h> +#include <linux/delay.h> +#include <linux/seq_file.h> +#include <asm/hypervisor-ifs/segment.h> + +static struct proc_dir_entry *vhd; + +static void *proc_vhd_next(struct seq_file *s, void *v, loff_t *pos) +{ + xen_segment_info_t *data; + + if ( pos != NULL ) + ++(*pos); + + data = v; + return data->count-- ? NULL : v; +} + +static void *proc_vhd_start(struct seq_file *s, loff_t *ppos) +{ + loff_t pos = *ppos; + xen_segment_info_t *data; + + data = kmalloc(sizeof(*data), GFP_KERNEL); + xenolinux_control_msg(XEN_BLOCK_PROBE_SEG_ALL, (char *)data, sizeof(*data)); + data->count -= pos; + + if (data->count > 0) + return data; + + kfree(data); + return NULL; +} + +static int proc_vhd_show(struct seq_file *s, void *v) +{ + xen_segment_info_t *data = v; + + seq_printf (s, + "%x %x %10.10s %x\n", + data->segments[data->count - 1].domain, + data->segments[data->count - 1].seg_nr, + data->segments[data->count - 1].key, + data->segments[data->count - 1].mode); + + return 0; +} + +static void proc_vhd_stop(struct seq_file *s, void *v) +{ + kfree(v); +} + +static struct seq_operations proc_vhd_op = { + .start = proc_vhd_start, + .next = proc_vhd_next, + .show = proc_vhd_show, + .stop = proc_vhd_stop +}; + +static int proc_open_vhd(struct inode *inode, struct file *file) +{ + return seq_open(file, &proc_vhd_op); +} + + +#define isdelim(c) \ + (c==' '||c==','||c=='\n'||c=='\r'||c=='\t'||c==':'||c=='('||c==')' ? 1 : 0) + +char *get_string(char *string) /* a bit like strtok */ +{ + static char *temp; + int loop = 0; + + if (string != NULL) + temp = string; + else + string = temp; + + try_again: + + while (!isdelim(string[loop])) + { + if (string[loop] == '\0') + return NULL; + loop++; + } + + string[loop] = '\0'; + temp = (string + loop + 1); + + if (loop == 0) + { + string = temp; + goto try_again; + } + + return string; +} + + +#define isdigit(c) (c >= '0' && c <= '9' ? 1 : 0) +unsigned long to_number(char *string) /* atoi */ +{ + unsigned long value = 0; + + if (string == NULL) return 0; + + while (!isdigit(*string) && *string != '\0') string++; + + while (isdigit(*string)) + { + value = value * 10 + (*string - '0'); + string++; + } + + return value; +} + +static int proc_write_vhd(struct file *file, const char *buffer, + size_t count, loff_t *offp) +{ + char *local = kmalloc((count + 1) * sizeof(char), GFP_KERNEL); + char *string; + int loop; + xv_disk_t xvd; + int res; + + if (!local) + return -ENOMEM; + + memset (&xvd, 0, sizeof(xvd)); + + if (copy_from_user(local, buffer, count)) + { + res = -EFAULT; + goto out; + } + local[count] = '\0'; + + res = count; + string = get_string(local); /* domain specifier */ + if (string == NULL) + { + goto out; + } + if (*string != 'd' && *string != 'D') + { + printk (KERN_ALERT + "error: domain specifier missing [%s]. should be \"domain\".\n", + string); + goto out; + } + + string = get_string(NULL); /* domain number */ + if (string == NULL) + { + printk (KERN_ALERT "error: domain number missing\n"); + goto out; + } + xvd.domain = (int) to_number(string); + + string = get_string(NULL); + if (string && (strcmp(string, "RO") == 0 || strcmp(string, "ro") == 0)) + { + xvd.mode = XEN_DISK_READ_ONLY; + } + else if (string && (strcmp(string, "RW") == 0 || strcmp(string, "rw") == 0)) + { + xvd.mode = XEN_DISK_READ_WRITE; + } + else + { + printk (KERN_ALERT + "error: bad mode [%s]. should be \"rw\" or \"ro\".\n", + string); + goto out; + } + + string = get_string(NULL); /* look for Segment */ + if (string == NULL || (*string != 's' && *string != 'S')) + { + printk (KERN_ALERT + "error: segment specifier missing [%s]. should be \"segment\".\n", + string); + goto out; + } + + string = get_string(NULL); /* segment number */ + if (string == NULL) + { + printk (KERN_ALERT "error: segment number missing\n"); + goto out; + } + xvd.segment = (int) to_number(string); + + string = get_string(NULL); /* look for key */ + if (string == NULL || (*string != 'k' && *string != 'K')) + { + printk (KERN_ALERT + "error: key specifier missing [%s]. should be \"key\".\n", + string); + goto out; + } + string = get_string(NULL); + if (string == NULL || strlen(string) != XEN_SEGMENT_KEYSIZE) + { + printk (KERN_ALERT "error: key missing\n"); + goto out; + } + memcpy(xvd.key, string, XEN_SEGMENT_KEYSIZE); + + string = get_string(NULL); /* look for Extents */ + if (string == NULL || (*string != 'e' && *string != 'E')) + { + printk (KERN_ALERT + "error: extents specifier missing [%s]. should be \"extents\".\n", + string); + goto out; + } + + string = get_string(NULL); /* number of extents */ + if (string == NULL) + { + printk (KERN_ALERT "error: number of extents missing\n"); + goto out; + } + xvd.ext_count = (int) to_number(string); + + /* ignore parenthesis */ + + for (loop = 0; loop < xvd.ext_count; loop++) + { + string = get_string(NULL); /* look for Disk */ + if (string == NULL || (*string != 'd' && *string != 'D')) + { + printk (KERN_ALERT + "hmm, extent disk specifier missing [%s]. should be \"disk\".\n", + string); + goto out; + } + string = get_string(NULL); /* disk number */ + if (string == NULL) + { + printk (KERN_ALERT "error: disk number missing\n"); + goto out; + } + xvd.extents[loop].disk = xldev_to_physdev((int) to_number(string)); + + string = get_string(NULL); /* look for Offset */ + if (string == NULL || (*string != 'o' && *string != 'O')) + { + printk (KERN_ALERT + "error: disk offset missing [%s]. should be \"offset\".\n", + string); + goto out; + } + string = get_string(NULL); /* offset */ + if (string == NULL) + { + printk (KERN_ALERT "error: offset missing\n"); + goto out; + } + xvd.extents[loop].offset = to_number(string); + + string = get_string(NULL); /* look for Size */ + if (string == NULL || (*string != 's' && *string != 'S')) + { + printk (KERN_ALERT + "error: extent size missing [%s]. should be \"size\".\n", + string); + goto out; + } + string = get_string(NULL); /* size */ + if (string == NULL) + { + printk (KERN_ALERT "error: extent size missing\n"); + goto out; + } + xvd.extents[loop].size = to_number(string); + } + + xenolinux_control_msg(XEN_BLOCK_SEG_CREATE, (char *)&xvd, sizeof(xvd)); + + out: + kfree(local); + + return res; +} + +static struct file_operations proc_vhd_operations = { + open: proc_open_vhd, + read: seq_read, + llseek: seq_lseek, + release: seq_release, + write: proc_write_vhd +}; + +/******************************************************************/ + +int __init xlseg_proc_init(void) +{ + if ( !(start_info.flags & SIF_PRIVILEGED) ) + return 0; + + vhd = create_proc_entry("xeno/vhd", 0600, NULL); + if ( vhd == NULL ) + panic ("xlseg_init: unable to create vhd proc entry\n"); + + vhd->data = NULL; + vhd->proc_fops = &proc_vhd_operations; + vhd->owner = THIS_MODULE; + + printk(KERN_ALERT "XenoLinux Virtual Disk Device Monitor installed\n"); + return 0; +} + +static void __exit xlseg_proc_cleanup(void) +{ + printk(KERN_ALERT "XenoLinux Virtual Disk Device Monitor uninstalled\n"); +} + +#ifdef MODULE +module_init(xlseg_proc_init); +module_exit(xlseg_proc_cleanup); +#endif diff --git a/xenolinux-2.4.22-sparse/arch/xeno/drivers/console/Makefile b/xenolinux-2.4.22-sparse/arch/xeno/drivers/console/Makefile new file mode 100644 index 0000000000..546180a3c2 --- /dev/null +++ b/xenolinux-2.4.22-sparse/arch/xeno/drivers/console/Makefile @@ -0,0 +1,3 @@ +O_TARGET := con.o +obj-$(CONFIG_XEN_CONSOLE) := console.o +include $(TOPDIR)/Rules.make diff --git a/xenolinux-2.4.22-sparse/arch/xeno/drivers/console/console.c b/xenolinux-2.4.22-sparse/arch/xeno/drivers/console/console.c new file mode 100644 index 0000000000..e69cd2488e --- /dev/null +++ b/xenolinux-2.4.22-sparse/arch/xeno/drivers/console/console.c @@ -0,0 +1,229 @@ +/****************************************************************************** + * console.c + */ + +#include <linux/config.h> +#include <linux/module.h> +#include <linux/errno.h> +#include <linux/signal.h> +#include <linux/sched.h> +#include <linux/interrupt.h> +#include <linux/tty.h> +#include <linux/tty_flip.h> +#include <linux/serial.h> +#include <linux/major.h> +#include <linux/ptrace.h> +#include <linux/ioport.h> +#include <linux/mm.h> +#include <linux/slab.h> +#include <linux/init.h> +#include <linux/console.h> + +#include <asm/io.h> +#include <asm/irq.h> +#include <asm/uaccess.h> +#include <asm/hypervisor.h> + +#define XENO_TTY_MINOR 123 + +/*** useful function for console debugging -- goes straight to Xen ****/ + +asmlinkage int xprintk(const char *fmt, ...) +{ + va_list args; + unsigned long flags; + int printed_len; + static char printk_buf[1024]; + + /* Emit the output into the temporary buffer */ + va_start(args, fmt); + printed_len = vsnprintf(printk_buf, sizeof(printk_buf), fmt, args); + va_end(args); + + // Useful Hack if things are going wrong very early in the day + (void)HYPERVISOR_console_write(printk_buf, sizeof(printk_buf)); +} + + + +/******************** Kernel console driver ********************************/ + +static void xen_console_write(struct console *co, const char *s, unsigned count) +{ +#define STRLEN 256 + static char str[STRLEN]; + static int pos = 0; + int len; + + /* We buffer output until we see a newline, or until the buffer is full. */ + while ( count != 0 ) + { + len = ((STRLEN - pos) > count) ? count : STRLEN - pos; + memcpy(str + pos, s, len); + pos += len; + s += len; + count -= len; + if ( (pos == STRLEN) || (str[pos-1] == '\n') ) + { + (void)HYPERVISOR_console_write(str, pos); + pos = 0; + } + } +} + +static kdev_t xen_console_device(struct console *c) +{ + /* + * This is the magic that binds our "struct console" to our + * "tty_struct", defined below. + */ + return MKDEV(TTY_MAJOR, XENO_TTY_MINOR); +} + +static struct console xen_console_info = { + name: "xencons", /* Used to be xen_console, but we're only + actually allowed 8 charcters including + the terminator... */ + write: xen_console_write, + device: xen_console_device, + flags: CON_PRINTBUFFER, + index: -1, +}; + +void xen_console_init(void) +{ + xprintk("xen_console_init\n"); + register_console(&xen_console_info); +} + + +/******************** Initial /dev/console *********************************/ + + +static struct tty_driver xeno_console_driver; +static int xeno_console_refcount; +static struct tty_struct *xeno_console_table[1]; +static struct termios *xeno_console_termios[1]; +static struct termios *xeno_console_termios_locked[1]; + +static int xeno_console_write_room(struct tty_struct *tty) +{ + return INT_MAX; +} + +static int xeno_console_chars_in_buffer(struct tty_struct *tty) +{ + return 0; +} + +static inline int xeno_console_xmit(int ch) +{ + char _ch = ch; + xen_console_write(NULL, &_ch, 1); + return 1; +} + +static int xeno_console_write(struct tty_struct *tty, int from_user, + const u_char * buf, int count) +{ + int i; + + if ( from_user && verify_area(VERIFY_READ, buf, count) ) + { + return -EINVAL; + } + + for ( i = 0; i < count; i++ ) + { + char ch; + if ( from_user ) + { + __get_user(ch, buf + i); + } + else + { + ch = buf[i]; + } + xeno_console_xmit(ch); + } + return i; +} + +static void xeno_console_put_char(struct tty_struct *tty, u_char ch) +{ + xeno_console_xmit(ch); +} + +static int xeno_console_open(struct tty_struct *tty, struct file *filp) +{ + int line; + + MOD_INC_USE_COUNT; + line = MINOR(tty->device) - tty->driver.minor_start; + if ( line ) + { + MOD_DEC_USE_COUNT; + return -ENODEV; + } + + tty->driver_data = NULL; + + return 0; +} + +static void xeno_console_close(struct tty_struct *tty, struct file *filp) +{ + MOD_DEC_USE_COUNT; +} + +int __init xeno_con_init(void) +{ + memset(&xeno_console_driver, 0, sizeof(struct tty_driver)); + xeno_console_driver.magic = TTY_DRIVER_MAGIC; + xeno_console_driver.driver_name = "xeno_console"; + xeno_console_driver.name = "xencon"; + xeno_console_driver.major = TTY_MAJOR; + xeno_console_driver.minor_start = XENO_TTY_MINOR; + xeno_console_driver.num = 1; + xeno_console_driver.type = TTY_DRIVER_TYPE_SERIAL; + xeno_console_driver.subtype = SERIAL_TYPE_NORMAL; + xeno_console_driver.init_termios = tty_std_termios; + xeno_console_driver.flags = TTY_DRIVER_REAL_RAW | TTY_DRIVER_NO_DEVFS; + xeno_console_driver.refcount = &xeno_console_refcount; + xeno_console_driver.table = xeno_console_table; + xeno_console_driver.termios = xeno_console_termios; + xeno_console_driver.termios_locked = xeno_console_termios_locked; + /* Functions */ + xeno_console_driver.open = xeno_console_open; + xeno_console_driver.close = xeno_console_close; + xeno_console_driver.write = xeno_console_write; + xeno_console_driver.write_room = xeno_console_write_room; + xeno_console_driver.put_char = xeno_console_put_char; + xeno_console_driver.chars_in_buffer = xeno_console_chars_in_buffer; + + if ( tty_register_driver(&xeno_console_driver) ) + { + printk(KERN_ERR "Couldn't register Xeno console driver\n"); + } + else + { + printk("Xeno console successfully installed\n"); + } + + return 0; +} + +void __exit xeno_con_fini(void) +{ + int ret; + + ret = tty_unregister_driver(&xeno_console_driver); + if ( ret != 0 ) + { + printk(KERN_ERR "Unable to unregister Xeno console driver: %d\n", ret); + } +} + +module_init(xeno_con_init); +module_exit(xeno_con_fini); + diff --git a/xenolinux-2.4.22-sparse/arch/xeno/drivers/dom0/Makefile b/xenolinux-2.4.22-sparse/arch/xeno/drivers/dom0/Makefile new file mode 100644 index 0000000000..9030801f14 --- /dev/null +++ b/xenolinux-2.4.22-sparse/arch/xeno/drivers/dom0/Makefile @@ -0,0 +1,3 @@ +O_TARGET := dom0.o +obj-y := dom0_core.o vfr.o +include $(TOPDIR)/Rules.make diff --git a/xenolinux-2.4.22-sparse/arch/xeno/drivers/dom0/dom0_core.c b/xenolinux-2.4.22-sparse/arch/xeno/drivers/dom0/dom0_core.c new file mode 100644 index 0000000000..20db18af74 --- /dev/null +++ b/xenolinux-2.4.22-sparse/arch/xeno/drivers/dom0/dom0_core.c @@ -0,0 +1,161 @@ +/****************************************************************************** + * dom0_core.c + * + * Interface to privileged domain-0 commands. + * + * Copyright (c) 2002-2003, K A Fraser, B Dragovic + */ + +#include <linux/config.h> +#include <linux/module.h> +#include <linux/kernel.h> +#include <linux/sched.h> +#include <linux/slab.h> +#include <linux/string.h> +#include <linux/errno.h> +#include <linux/proc_fs.h> +#include <linux/mm.h> +#include <linux/mman.h> +#include <linux/swap.h> +#include <linux/smp_lock.h> +#include <linux/swapctl.h> +#include <linux/iobuf.h> +#include <linux/highmem.h> +#include <linux/pagemap.h> +#include <linux/seq_file.h> + +#include <asm/pgalloc.h> +#include <asm/pgtable.h> +#include <asm/uaccess.h> +#include <asm/tlb.h> +#include <asm/proc_cmd.h> +#include <asm/hypervisor-ifs/dom0_ops.h> + +#include "../block/xl_block.h" + +struct proc_dir_entry *xeno_base; +static struct proc_dir_entry *privcmd_intf; + + +static int privcmd_ioctl(struct inode *inode, struct file *file, + unsigned int cmd, unsigned long data) +{ + int ret = 0; + + switch ( cmd ) + { + case IOCTL_PRIVCMD_HYPERCALL: + { + privcmd_hypercall_t hypercall; + + if ( copy_from_user(&hypercall, (void *)data, sizeof(hypercall)) ) + return -EFAULT; + + __asm__ __volatile__ ( + "pushl %%ebx; pushl %%ecx; pushl %%edx; pushl %%esi; pushl %%edi; " + "movl 4(%%eax),%%ebx ;" + "movl 8(%%eax),%%ecx ;" + "movl 12(%%eax),%%edx ;" + "movl 16(%%eax),%%esi ;" + "movl 20(%%eax),%%edi ;" + "movl (%%eax),%%eax ;" + TRAP_INSTR "; " + "popl %%edi; popl %%esi; popl %%edx; popl %%ecx; popl %%ebx" + : "=a" (ret) : "0" (&hypercall) : "memory" ); + } + break; + + case IOCTL_PRIVCMD_BLKMSG: + { + privcmd_blkmsg_t blkmsg; + char *kbuf; + int ret; + + if ( copy_from_user(&blkmsg, (void *)data, sizeof(blkmsg)) ) + return -EFAULT; + + if ( blkmsg.buf_size > PAGE_SIZE ) + return -EINVAL; + + if ( (kbuf = kmalloc(blkmsg.buf_size, GFP_KERNEL)) == NULL ) + return -ENOMEM; + + if ( copy_from_user(kbuf, blkmsg.buf, blkmsg.buf_size) ) { + kfree(kbuf); + return -EFAULT; + } + + ret = xenolinux_control_msg((int)blkmsg.op, kbuf, blkmsg.buf_size); + if ( ret != 0 ) { + kfree(kbuf); + return ret; + } + + if ( copy_to_user(blkmsg.buf, kbuf, blkmsg.buf_size) ) { + kfree(kbuf); + return -EFAULT; + } + + kfree(kbuf); + } + break; + + case IOCTL_PRIVCMD_LINDEV_TO_XENDEV: + { + ret = (int)xldev_to_physdev((kdev_t)data); + } + break; + + case IOCTL_PRIVCMD_XENDEV_TO_LINDEV: + { + ret = (int)physdev_to_xldev((unsigned short)data); + } + break; + + default: + { + ret = -EINVAL; + } + break; + } + + return ret; +} + + +static struct file_operations privcmd_file_ops = { + ioctl : privcmd_ioctl +}; + + +static int __init init_module(void) +{ + if ( !(start_info.flags & SIF_PRIVILEGED) ) + return 0; + + /* xeno proc root setup */ + xeno_base = proc_mkdir("xeno", &proc_root); + + /* xeno control interface */ + privcmd_intf = create_proc_entry("privcmd", 0400, xeno_base); + if ( privcmd_intf != NULL ) + { + privcmd_intf->owner = THIS_MODULE; + privcmd_intf->nlink = 1; + privcmd_intf->proc_fops = &privcmd_file_ops; + } + + return 0; +} + + +static void __exit cleanup_module(void) +{ + if ( privcmd_intf == NULL ) return; + remove_proc_entry("xeno", &proc_root); + privcmd_intf = NULL; +} + + +module_init(init_module); +module_exit(cleanup_module); diff --git a/xenolinux-2.4.22-sparse/arch/xeno/drivers/dom0/vfr.c b/xenolinux-2.4.22-sparse/arch/xeno/drivers/dom0/vfr.c new file mode 100644 index 0000000000..8c9f6530e7 --- /dev/null +++ b/xenolinux-2.4.22-sparse/arch/xeno/drivers/dom0/vfr.c @@ -0,0 +1,323 @@ +/****************************************************************************** + * vfr.c + * + * Interface to the virtual firewall/router. + * + */ + +#include <linux/config.h> +#include <linux/module.h> +#include <linux/kernel.h> +#include <linux/sched.h> +#include <linux/slab.h> +#include <linux/string.h> +#include <linux/errno.h> +#include <linux/proc_fs.h> + +#include <asm/hypervisor-ifs/network.h> + +static struct proc_dir_entry *proc_vfr; + +static unsigned char readbuf[1024]; + +extern struct proc_dir_entry *xeno_base; + +/* Helpers, implemented at the bottom. */ +u32 getipaddr(const char *buff, unsigned int len); +u16 antous(const char *buff, int len); +int anton(const char *buff, int len); + +static int vfr_read_proc(char *page, char **start, off_t off, + int count, int *eof, void *data) +{ + strcpy(page, readbuf); + *readbuf = '\0'; + *eof = 1; + *start = page; + return strlen(page); +} + +/* The format for the vfr interface is as follows: + * + * COMMAND <field>=<val> [<field>=<val> [...]] + * + * where: + * + * COMMAND = { ACCEPT | COUNT } + * + * field=val pairs are as follows: + * + * field = { srcaddr | dstaddr } + * val is a dot seperated, numeric IP address. + * + * field = { srcport | dstport } + * val is a (16-bit) unsigned int + * + * field = { proto } + * val = { IP | TCP | UDP | ARP } + * + */ + +#define isspace(_x) ( ((_x)==' ') || ((_x)=='\t') || ((_x)=='\v') || \ + ((_x)=='\f') || ((_x)=='\r') || ((_x)=='\n') ) + +static int vfr_write_proc(struct file *file, const char *buffer, + u_long count, void *data) +{ + network_op_t op; + int ret, len; + int ts, te, tl; // token start, end, and length + int fs, fe, fl; // field. + + len = count; + ts = te = 0; + + memset(&op, 0, sizeof(network_op_t)); + + // get the command: + while ( count && isspace(buffer[ts]) ) { ts++; count--; } // skip spaces. + te = ts; + while ( count && !isspace(buffer[te]) ) { te++; count--; } // command end + if ( te <= ts ) goto bad; + tl = te - ts; + + if ( strncmp(&buffer[ts], "ADD", tl) == 0 ) + { + op.cmd = NETWORK_OP_ADDRULE; + } + else if ( strncmp(&buffer[ts], "DELETE", tl) == 0 ) + { + op.cmd = NETWORK_OP_DELETERULE; + } + else if ( strncmp(&buffer[ts], "PRINT", tl) == 0 ) + { + op.cmd = NETWORK_OP_GETRULELIST; + goto doneparsing; + } + + ts = te; + + // get the action + while ( count && (buffer[ts] == ' ') ) { ts++; count--; } // skip spaces. + te = ts; + while ( count && (buffer[te] != ' ') ) { te++; count--; } // command end + if ( te <= ts ) goto bad; + tl = te - ts; + + if ( strncmp(&buffer[ts], "ACCEPT", tl) == 0 ) + { + op.u.net_rule.action = NETWORK_ACTION_ACCEPT; + goto keyval; + } + if ( strncmp(&buffer[ts], "COUNT", tl) == 0 ) + { + op.u.net_rule.action = NETWORK_ACTION_COUNT; + goto keyval; + } + + // default case; + return (len); + + + // get the key=val pairs. + keyval: + while (count) + { + //get field + ts = te; while ( count && isspace(buffer[ts]) ) { ts++; count--; } + te = ts; + while ( count && !isspace(buffer[te]) && (buffer[te] != '=') ) + { te++; count--; } + if ( te <= ts ) + goto doneparsing; + tl = te - ts; + fs = ts; fe = te; fl = tl; // save the field markers. + // skip " = " (ignores extra equals.) + while ( count && (isspace(buffer[te]) || (buffer[te] == '=')) ) + { te++; count--; } + ts = te; + while ( count && !isspace(buffer[te]) ) { te++; count--; } + tl = te - ts; + + if ( (fl <= 0) || (tl <= 0) ) goto bad; + + /* NB. Prefix matches must go first! */ + if (strncmp(&buffer[fs], "src", fl) == 0) + { + op.u.net_rule.src_vif = VIF_ANY_INTERFACE; + } + else if (strncmp(&buffer[fs], "dst", fl) == 0) + { + op.u.net_rule.dst_vif = VIF_PHYSICAL_INTERFACE; + } + else if (strncmp(&buffer[fs], "srcaddr", fl) == 0) + { + op.u.net_rule.src_addr = getipaddr(&buffer[ts], tl); + } + else if (strncmp(&buffer[fs], "dstaddr", fl) == 0) + { + op.u.net_rule.dst_addr = getipaddr(&buffer[ts], tl); + } + else if (strncmp(&buffer[fs], "srcaddrmask", fl) == 0) + { + op.u.net_rule.src_addr_mask = getipaddr(&buffer[ts], tl); + } + else if (strncmp(&buffer[fs], "dstaddrmask", fl) == 0) + { + op.u.net_rule.dst_addr_mask = getipaddr(&buffer[ts], tl); + } + else if (strncmp(&buffer[fs], "srcport", fl) == 0) + { + op.u.net_rule.src_port = antous(&buffer[ts], tl); + } + else if (strncmp(&buffer[fs], "dstport", fl) == 0) + { + op.u.net_rule.dst_port = antous(&buffer[ts], tl); + } + else if (strncmp(&buffer[fs], "srcportmask", fl) == 0) + { + op.u.net_rule.src_port_mask = antous(&buffer[ts], tl); + } + else if (strncmp(&buffer[fs], "dstportmask", fl) == 0) + { + op.u.net_rule.dst_port_mask = antous(&buffer[ts], tl); + } + else if (strncmp(&buffer[fs], "srcdom", fl) == 0) + { + op.u.net_rule.src_vif |= anton(&buffer[ts], tl)<<VIF_DOMAIN_SHIFT; + } + else if (strncmp(&buffer[fs], "srcidx", fl) == 0) + { + op.u.net_rule.src_vif |= anton(&buffer[ts], tl); + } + else if (strncmp(&buffer[fs], "dstdom", fl) == 0) + { + op.u.net_rule.dst_vif |= anton(&buffer[ts], tl)<<VIF_DOMAIN_SHIFT; + } + else if (strncmp(&buffer[fs], "dstidx", fl) == 0) + { + op.u.net_rule.dst_vif |= anton(&buffer[ts], tl); + } + else if ( (strncmp(&buffer[fs], "proto", fl) == 0)) + { + if (strncmp(&buffer[ts], "any", tl) == 0) + op.u.net_rule.proto = NETWORK_PROTO_ANY; + if (strncmp(&buffer[ts], "ip", tl) == 0) + op.u.net_rule.proto = NETWORK_PROTO_IP; + if (strncmp(&buffer[ts], "tcp", tl) == 0) + op.u.net_rule.proto = NETWORK_PROTO_TCP; + if (strncmp(&buffer[ts], "udp", tl) == 0) + op.u.net_rule.proto = NETWORK_PROTO_UDP; + if (strncmp(&buffer[ts], "arp", tl) == 0) + op.u.net_rule.proto = NETWORK_PROTO_ARP; + } + } + + doneparsing: + ret = HYPERVISOR_network_op(&op); + return(len); + + bad: + return(len); + + +} + +static int __init init_module(void) +{ + *readbuf = '\0'; + proc_vfr = create_proc_entry ("vfr", 0600, xeno_base); + if ( proc_vfr != NULL ) + { + proc_vfr->owner = THIS_MODULE; + proc_vfr->nlink = 1; + proc_vfr->read_proc = vfr_read_proc; + proc_vfr->write_proc = vfr_write_proc; + printk("Successfully installed virtual firewall/router interface\n"); + } + return 0; +} + +static void __exit cleanup_module(void) +{ + if ( proc_vfr == NULL ) return; + remove_proc_entry("vfr", xeno_base); + proc_vfr = NULL; +} + +module_init(init_module); +module_exit(cleanup_module); + +/* Helper functions start here: */ + +int anton(const char *buff, int len) +{ + int ret; + char c; + int sign = 1; + + ret = 0; + + if (len == 0) return 0; + if (*buff == '-') { sign = -1; buff++; len--; } + + while ( (len) && ((c = *buff) >= '0') && (c <= '9') ) + { + ret *= 10; + ret += c - '0'; + buff++; len--; + } + + ret *= sign; + return ret; +} + +u16 antous(const char *buff, int len) +{ + u16 ret; + char c; + + ret = 0; + + while ( (len) && ((c = *buff) >= '0') && (c <= '9') ) + { + ret *= 10; + ret += c - '0'; + buff++; len--; + } + + return ret; +} + +u32 getipaddr(const char *buff, unsigned int len) +{ + char c; + u32 ret, val; + + ret = 0; val = 0; + + while ( len ) + { + if (!((((c = *buff) >= '0') && ( c <= '9')) || ( c == '.' ) ) ) + { + return(0); // malformed. + } + + if ( c == '.' ) { + if (val > 255) return (0); //malformed. + ret = ret << 8; + ret += val; + val = 0; + len--; buff++; + continue; + } + val *= 10; + val += c - '0'; + buff++; len--; + } + ret = ret << 8; + ret += val; + + return (ret); +} + diff --git a/xenolinux-2.4.22-sparse/arch/xeno/drivers/network/Makefile b/xenolinux-2.4.22-sparse/arch/xeno/drivers/network/Makefile new file mode 100644 index 0000000000..b44a288a5b --- /dev/null +++ b/xenolinux-2.4.22-sparse/arch/xeno/drivers/network/Makefile @@ -0,0 +1,3 @@ +O_TARGET := net.o +obj-y := network.o +include $(TOPDIR)/Rules.make diff --git a/xenolinux-2.4.22-sparse/arch/xeno/drivers/network/network.c b/xenolinux-2.4.22-sparse/arch/xeno/drivers/network/network.c new file mode 100644 index 0000000000..3d12699799 --- /dev/null +++ b/xenolinux-2.4.22-sparse/arch/xeno/drivers/network/network.c @@ -0,0 +1,582 @@ +/****************************************************************************** + * network.c + * + * Virtual network driver for XenoLinux. + * + * Copyright (c) 2002-2003, K A Fraser + */ + +#include <linux/config.h> +#include <linux/module.h> + +#include <linux/kernel.h> +#include <linux/sched.h> +#include <linux/slab.h> +#include <linux/string.h> +#include <linux/errno.h> + +#include <linux/netdevice.h> +#include <linux/inetdevice.h> +#include <linux/etherdevice.h> +#include <linux/skbuff.h> +#include <linux/init.h> + +#include <asm/io.h> +#include <net/sock.h> +#include <net/pkt_sched.h> + +#define NET_IRQ _EVENT_NET + +#define TX_MAX_ENTRIES (TX_RING_SIZE - 2) +#define RX_MAX_ENTRIES (RX_RING_SIZE - 2) + +#define TX_RING_INC(_i) (((_i)+1) & (TX_RING_SIZE-1)) +#define RX_RING_INC(_i) (((_i)+1) & (RX_RING_SIZE-1)) +#define TX_RING_ADD(_i,_j) (((_i)+(_j)) & (TX_RING_SIZE-1)) +#define RX_RING_ADD(_i,_j) (((_i)+(_j)) & (RX_RING_SIZE-1)) + +#define RX_BUF_SIZE ((PAGE_SIZE/2)+1) /* Fool the slab allocator :-) */ + +static void network_interrupt(int irq, void *dev_id, struct pt_regs *ptregs); +static void network_tx_buf_gc(struct net_device *dev); +static void network_alloc_rx_buffers(struct net_device *dev); +static void network_free_rx_buffers(struct net_device *dev); +static void cleanup_module(void); + +static struct list_head dev_list; + +/* + * Needed because network_close() is not properly implemented yet. So + * an open after a close needs to do much less than the initial open. + */ +static int opened_once_already = 0; + +struct net_private +{ + struct list_head list; + struct net_device *dev; + + struct net_device_stats stats; + atomic_t tx_entries; + unsigned int rx_resp_cons, tx_resp_cons, tx_full; + net_ring_t *net_ring; + net_idx_t *net_idx; + spinlock_t tx_lock; + unsigned int idx; /* Domain-specific index of this VIF. */ + + unsigned int rx_bufs_to_notify; + + /* + * {tx,rx}_skbs store outstanding skbuffs. The first entry in each + * array is an index into a chain of free entries. + */ + struct sk_buff *tx_skbs[TX_RING_SIZE]; + struct sk_buff *rx_skbs[RX_RING_SIZE]; +}; + +/* Access macros for acquiring freeing slots in {tx,rx}_skbs[]. */ +#define ADD_ID_TO_FREELIST(_list, _id) \ + (_list)[(_id)] = (_list)[0]; \ + (_list)[0] = (void *)(unsigned long)(_id); +#define GET_ID_FROM_FREELIST(_list) \ + ({ unsigned long _id = (unsigned long)(_list)[0]; \ + (_list)[0] = (_list)[_id]; \ + _id; }) + + +static void dbg_network_int(int irq, void *dev_id, struct pt_regs *ptregs) +{ + struct net_device *dev = (struct net_device *)dev_id; + struct net_private *np = dev->priv; + printk(KERN_ALERT "tx_full = %d, tx_entries = %d, tx_resp_cons = %d," + " tx_req_prod = %d, tx_resp_prod = %d, tx_event = %d, state=%d\n", + np->tx_full, atomic_read(&np->tx_entries), np->tx_resp_cons, + np->net_idx->tx_req_prod, np->net_idx->tx_resp_prod, + np->net_idx->tx_event, + test_bit(__LINK_STATE_XOFF, &dev->state)); + printk(KERN_ALERT "rx_resp_cons = %d," + " rx_req_prod = %d, rx_resp_prod = %d, rx_event = %d\n", + np->rx_resp_cons, np->net_idx->rx_req_prod, + np->net_idx->rx_resp_prod, np->net_idx->rx_event); +} + + +static int network_open(struct net_device *dev) +{ + struct net_private *np = dev->priv; + int i, error = 0; + + if ( opened_once_already ) + { + memset(&np->stats, 0, sizeof(np->stats)); + netif_start_queue(dev); + return 0; + } + + np->rx_bufs_to_notify = 0; + np->rx_resp_cons = np->tx_resp_cons = np->tx_full = 0; + memset(&np->stats, 0, sizeof(np->stats)); + spin_lock_init(&np->tx_lock); + atomic_set(&np->tx_entries, 0); + memset(np->net_ring, 0, sizeof(*np->net_ring)); + memset(np->net_idx, 0, sizeof(*np->net_idx)); + + /* Initialise {tx,rx}_skbs to be a free chain containing every entry. */ + for ( i = 0; i < TX_RING_SIZE; i++ ) + np->tx_skbs[i] = (void *)(i+1); + for ( i = 0; i < RX_RING_SIZE; i++ ) + np->rx_skbs[i] = (void *)(i+1); + + network_alloc_rx_buffers(dev); + + error = request_irq(NET_IRQ, network_interrupt, + SA_SAMPLE_RANDOM, "network", dev); + if ( error ) + { + printk(KERN_WARNING "%s: Could not allocate network interrupt\n", + dev->name); + network_free_rx_buffers(dev); + goto fail; + } + + error = request_irq(_EVENT_DEBUG, dbg_network_int, SA_SHIRQ, + "debug", dev); + if ( error ) + { + printk(KERN_WARNING "%s: Non-fatal error -- no debug interrupt\n", + dev->name); + } + + printk("XenoLinux Virtual Network Driver installed as %s\n", dev->name); + + netif_start_queue(dev); + + MOD_INC_USE_COUNT; + + opened_once_already = 1; + + return 0; + + fail: + kfree(np); + return error; +} + + +static void network_tx_buf_gc(struct net_device *dev) +{ + unsigned int i; + struct net_private *np = dev->priv; + struct sk_buff *skb; + unsigned int prod; + tx_entry_t *tx_ring = np->net_ring->tx_ring; + + do { + prod = np->net_idx->tx_resp_prod; + + for ( i = np->tx_resp_cons; i != prod; i = TX_RING_INC(i) ) + { + skb = np->tx_skbs[tx_ring[i].resp.id]; + ADD_ID_TO_FREELIST(np->tx_skbs, tx_ring[i].resp.id); + dev_kfree_skb_any(skb); + atomic_dec(&np->tx_entries); + } + + np->tx_resp_cons = prod; + + /* Set a new event, then check for race with update of tx_cons. */ + np->net_idx->tx_event = + TX_RING_ADD(prod, (atomic_read(&np->tx_entries)>>1) + 1); + smp_mb(); + } + while ( prod != np->net_idx->tx_resp_prod ); + + if ( np->tx_full && (atomic_read(&np->tx_entries) < TX_MAX_ENTRIES) ) + { + np->tx_full = 0; + netif_wake_queue(dev); + } +} + + +static inline pte_t *get_ppte(void *addr) +{ + pgd_t *pgd; pmd_t *pmd; pte_t *pte; + pgd = pgd_offset_k( (unsigned long)addr); + pmd = pmd_offset(pgd, (unsigned long)addr); + pte = pte_offset(pmd, (unsigned long)addr); + return pte; +} + + +static void network_alloc_rx_buffers(struct net_device *dev) +{ + unsigned int i, id; + struct net_private *np = dev->priv; + struct sk_buff *skb; + unsigned int end = RX_RING_ADD(np->rx_resp_cons, RX_MAX_ENTRIES); + + if ( (i = np->net_idx->rx_req_prod) == end ) + return; + + do { + skb = dev_alloc_skb(RX_BUF_SIZE); + if ( skb == NULL ) break; + skb->dev = dev; + + id = GET_ID_FROM_FREELIST(np->rx_skbs); + np->rx_skbs[id] = skb; + + np->net_ring->rx_ring[i].req.id = (unsigned short)id; + np->net_ring->rx_ring[i].req.addr = + virt_to_machine(get_ppte(skb->head)); + + np->rx_bufs_to_notify++; + } + while ( (i = RX_RING_INC(i)) != end ); + + /* + * We may have allocated buffers which have entries outstanding in the page + * update queue -- make sure we flush those first! + */ + flush_page_update_queue(); + + np->net_idx->rx_req_prod = i; + np->net_idx->rx_event = RX_RING_INC(np->rx_resp_cons); + + /* Batch Xen notifications. */ + if ( np->rx_bufs_to_notify > (RX_MAX_ENTRIES/4) ) + { + HYPERVISOR_net_update(); + np->rx_bufs_to_notify = 0; + } +} + + +static void network_free_rx_buffers(struct net_device *dev) +{ + unsigned int i; + struct net_private *np = dev->priv; + struct sk_buff *skb; + + for ( i = np->rx_resp_cons; + i != np->net_idx->rx_req_prod; + i = RX_RING_INC(i) ) + { + skb = np->rx_skbs[np->net_ring->rx_ring[i].req.id]; + dev_kfree_skb_any(skb); + } +} + +static int network_start_xmit(struct sk_buff *skb, struct net_device *dev) +{ + unsigned int i, id; + struct net_private *np = (struct net_private *)dev->priv; + + if ( np->tx_full ) + { + printk(KERN_ALERT "%s: full queue wasn't stopped!\n", dev->name); + netif_stop_queue(dev); + return -ENOBUFS; + } + + if ( (((unsigned long)skb->data & ~PAGE_MASK) + skb->len) >= PAGE_SIZE ) + { + struct sk_buff *new_skb = dev_alloc_skb(RX_BUF_SIZE); + if ( new_skb == NULL ) return 1; + skb_put(new_skb, skb->len); + memcpy(new_skb->data, skb->data, skb->len); + dev_kfree_skb(skb); + skb = new_skb; + } + + spin_lock_irq(&np->tx_lock); + + i = np->net_idx->tx_req_prod; + + id = GET_ID_FROM_FREELIST(np->tx_skbs); + np->tx_skbs[id] = skb; + + np->net_ring->tx_ring[i].req.id = (unsigned short)id; + np->net_ring->tx_ring[i].req.addr = + phys_to_machine(virt_to_phys(skb->data)); + np->net_ring->tx_ring[i].req.size = skb->len; + np->net_idx->tx_req_prod = TX_RING_INC(i); + atomic_inc(&np->tx_entries); + + network_tx_buf_gc(dev); + + if ( atomic_read(&np->tx_entries) >= TX_MAX_ENTRIES ) + { + np->tx_full = 1; + netif_stop_queue(dev); + } + + spin_unlock_irq(&np->tx_lock); + + np->stats.tx_bytes += skb->len; + np->stats.tx_packets++; + + /* Only notify Xen if there are no outstanding responses. */ + smp_mb(); + if ( np->net_idx->tx_resp_prod == i ) + HYPERVISOR_net_update(); + + return 0; +} + + +static void network_interrupt(int irq, void *dev_id, struct pt_regs *ptregs) +{ + unsigned int i; + unsigned long flags; + struct net_device *dev = (struct net_device *)dev_id; + struct net_private *np = dev->priv; + struct sk_buff *skb; + rx_resp_entry_t *rx; + + spin_lock_irqsave(&np->tx_lock, flags); + network_tx_buf_gc(dev); + spin_unlock_irqrestore(&np->tx_lock, flags); + + again: + for ( i = np->rx_resp_cons; + i != np->net_idx->rx_resp_prod; + i = RX_RING_INC(i) ) + { + rx = &np->net_ring->rx_ring[i].resp; + + skb = np->rx_skbs[rx->id]; + ADD_ID_TO_FREELIST(np->rx_skbs, rx->id); + + if ( rx->status != RING_STATUS_OK ) + { + printk(KERN_ALERT "bad buffer on RX ring!(%d)\n", rx->status); + dev_kfree_skb_any(skb); + continue; + } + + /* + * Set up shinfo -- from alloc_skb This was particularily nasty: the + * shared info is hidden at the back of the data area (presumably so it + * can be shared), but on page flip it gets very spunked. + */ + atomic_set(&(skb_shinfo(skb)->dataref), 1); + skb_shinfo(skb)->nr_frags = 0; + skb_shinfo(skb)->frag_list = NULL; + + phys_to_machine_mapping[virt_to_phys(skb->head) >> PAGE_SHIFT] = + (*(unsigned long *)get_ppte(skb->head)) >> PAGE_SHIFT; + + if ( rx->offset < 16 ) + { + printk(KERN_ALERT "need pkt offset >= 16 (got %d)\n", rx->offset); + dev_kfree_skb_any(skb); + continue; + } + + skb_reserve(skb, rx->offset - 16); + + skb_put(skb, rx->size); + skb->protocol = eth_type_trans(skb, dev); + + np->stats.rx_packets++; + + np->stats.rx_bytes += rx->size; + netif_rx(skb); + dev->last_rx = jiffies; + } + + np->rx_resp_cons = i; + + network_alloc_rx_buffers(dev); + + /* Deal with hypervisor racing our resetting of rx_event. */ + smp_mb(); + if ( np->net_idx->rx_resp_prod != i ) goto again; +} + + +int network_close(struct net_device *dev) +{ + netif_stop_queue(dev); + + /* + * XXXX This cannot be done safely until be have a proper interface + * for setting up and tearing down virtual interfaces on the fly. + * Currently the receive buffers are locked down by Xen and we have + * no sensible way of retrieving them. + */ +#if 0 + free_irq(NET_IRQ, dev); + + network_free_rx_buffers(dev); + kfree(np->net_ring->rx_ring); + kfree(np->net_ring->tx_ring); + + MOD_DEC_USE_COUNT; +#endif + + return 0; +} + + +static struct net_device_stats *network_get_stats(struct net_device *dev) +{ + struct net_private *np = (struct net_private *)dev->priv; + return &np->stats; +} + + +/* + * This notifier is installed for domain 0 only. + * All other domains have VFR rules installed on their behalf by domain 0 + * when they are created. For bootstrap, Xen creates wildcard rules for + * domain 0 -- this notifier is used to detect when we find our proper + * IP address, so we can poke down proper rules and remove the wildcards. + */ +static int inetdev_notify(struct notifier_block *this, + unsigned long event, + void *ptr) +{ + struct in_ifaddr *ifa = (struct in_ifaddr *)ptr; + struct net_device *dev = ifa->ifa_dev->dev; + struct list_head *ent; + struct net_private *np; + int idx = -1; + network_op_t op; + + list_for_each ( ent, &dev_list ) + { + np = list_entry(dev_list.next, struct net_private, list); + if ( np->dev == dev ) + idx = np->idx; + } + + if ( idx == -1 ) + goto out; + + memset(&op, 0, sizeof(op)); + op.u.net_rule.proto = NETWORK_PROTO_ANY; + op.u.net_rule.action = NETWORK_ACTION_ACCEPT; + + if ( event == NETDEV_UP ) + op.cmd = NETWORK_OP_ADDRULE; + else if ( event == NETDEV_DOWN ) + op.cmd = NETWORK_OP_DELETERULE; + else + goto out; + + op.u.net_rule.src_vif = idx; + op.u.net_rule.dst_vif = VIF_PHYSICAL_INTERFACE; + op.u.net_rule.src_addr = ntohl(ifa->ifa_address); + op.u.net_rule.src_addr_mask = ~0UL; + op.u.net_rule.dst_addr = 0; + op.u.net_rule.dst_addr_mask = 0; + (void)HYPERVISOR_network_op(&op); + + op.u.net_rule.src_vif = VIF_ANY_INTERFACE; + op.u.net_rule.dst_vif = idx; + op.u.net_rule.src_addr = 0; + op.u.net_rule.src_addr_mask = 0; + op.u.net_rule.dst_addr = ntohl(ifa->ifa_address); + op.u.net_rule.dst_addr_mask = ~0UL; + (void)HYPERVISOR_network_op(&op); + + out: + return NOTIFY_DONE; +} + +static struct notifier_block notifier_inetdev = { + .notifier_call = inetdev_notify, + .next = NULL, + .priority = 0 +}; + + +int __init init_module(void) +{ + int i, fixmap_idx=-1, err; + struct net_device *dev; + struct net_private *np; + + INIT_LIST_HEAD(&dev_list); + + /* + * Domain 0 must poke its own network rules as it discovers its IP + * addresses. All other domains have a privileged "parent" to do this for + * them at start of day. + */ + if ( start_info.dom_id == 0 ) + (void)register_inetaddr_notifier(¬ifier_inetdev); + + for ( i = 0; i < MAX_DOMAIN_VIFS; i++ ) + { + if ( start_info.net_rings[i] == 0 ) + continue; + + /* We actually only support up to 4 vifs right now. */ + if ( ++fixmap_idx == 4 ) + break; + + dev = alloc_etherdev(sizeof(struct net_private)); + if ( dev == NULL ) + { + err = -ENOMEM; + goto fail; + } + + set_fixmap(FIX_NETRING0_BASE+fixmap_idx, start_info.net_rings[i]); + + np = dev->priv; + np->net_ring = (net_ring_t *)fix_to_virt(FIX_NETRING0_BASE+fixmap_idx); + np->net_idx = &HYPERVISOR_shared_info->net_idx[i]; + np->idx = i; + + SET_MODULE_OWNER(dev); + dev->open = network_open; + dev->hard_start_xmit = network_start_xmit; + dev->stop = network_close; + dev->get_stats = network_get_stats; + + memcpy(dev->dev_addr, start_info.net_vmac[i], ETH_ALEN); + + if ( (err = register_netdev(dev)) != 0 ) + { + kfree(dev); + goto fail; + } + + np->dev = dev; + list_add(&np->list, &dev_list); + } + + return 0; + + fail: + cleanup_module(); + return err; +} + + +static void cleanup_module(void) +{ + struct net_private *np; + struct net_device *dev; + + while ( !list_empty(&dev_list) ) + { + np = list_entry(dev_list.next, struct net_private, list); + list_del(&np->list); + dev = np->dev; + unregister_netdev(dev); + kfree(dev); + } + + if ( start_info.dom_id == 0 ) + (void)unregister_inetaddr_notifier(¬ifier_inetdev); +} + + +module_init(init_module); +module_exit(cleanup_module); diff --git a/xenolinux-2.4.22-sparse/arch/xeno/kernel/Makefile b/xenolinux-2.4.22-sparse/arch/xeno/kernel/Makefile new file mode 100644 index 0000000000..a43a615e2f --- /dev/null +++ b/xenolinux-2.4.22-sparse/arch/xeno/kernel/Makefile @@ -0,0 +1,15 @@ + +.S.o: + $(CC) $(AFLAGS) -traditional -c $< -o $*.o + +all: kernel.o head.o init_task.o + +O_TARGET := kernel.o + +export-objs := i386_ksyms.o + +obj-y := process.o semaphore.o signal.o entry.o traps.o irq.o \ + ptrace.o ioport.o ldt.o setup.o time.o sys_i386.o \ + i386_ksyms.o i387.o hypervisor.o + +include $(TOPDIR)/Rules.make diff --git a/xenolinux-2.4.22-sparse/arch/xeno/kernel/entry.S b/xenolinux-2.4.22-sparse/arch/xeno/kernel/entry.S new file mode 100644 index 0000000000..9c909e3f7b --- /dev/null +++ b/xenolinux-2.4.22-sparse/arch/xeno/kernel/entry.S @@ -0,0 +1,779 @@ +/* + * linux/arch/i386/entry.S + * + * Copyright (C) 1991, 1992 Linus Torvalds + */ + +/* + * entry.S contains the system-call and fault low-level handling routines. + * This also contains the timer-interrupt handler, as well as all interrupts + * and faults that can result in a task-switch. + * + * NOTE: This code handles signal-recognition, which happens every time + * after a timer-interrupt and after each system call. + * + * I changed all the .align's to 4 (16 byte alignment), as that's faster + * on a 486. + * + * Stack layout in 'ret_from_system_call': + * ptrace needs to have all regs on the stack. + * if the order here is changed, it needs to be + * updated in fork.c:copy_process, signal.c:do_signal, + * ptrace.c and ptrace.h + * + * 0(%esp) - %ebx + * 4(%esp) - %ecx + * 8(%esp) - %edx + * C(%esp) - %esi + * 10(%esp) - %edi + * 14(%esp) - %ebp + * 18(%esp) - %eax + * 1C(%esp) - %ds + * 20(%esp) - %es + * 24(%esp) - orig_eax + * 28(%esp) - %eip + * 2C(%esp) - %cs + * 30(%esp) - %eflags + * 34(%esp) - %oldesp + * 38(%esp) - %oldss + * + * "current" is in register %ebx during any slow entries. + */ + +#include <linux/config.h> +#include <linux/sys.h> +#include <linux/linkage.h> +#include <asm/segment.h> +#include <asm/smp.h> + +EBX = 0x00 +ECX = 0x04 +EDX = 0x08 +ESI = 0x0C +EDI = 0x10 +EBP = 0x14 +EAX = 0x18 +DS = 0x1C +ES = 0x20 +ORIG_EAX = 0x24 +EIP = 0x28 +CS = 0x2C +EFLAGS = 0x30 +OLDESP = 0x34 +OLDSS = 0x38 + +CF_MASK = 0x00000001 +TF_MASK = 0x00000100 +IF_MASK = 0x00000200 +DF_MASK = 0x00000400 +NT_MASK = 0x00004000 + +/* + * these are offsets into the task-struct. + */ +state = 0 +flags = 4 +sigpending = 8 +addr_limit = 12 +exec_domain = 16 +need_resched = 20 +tsk_ptrace = 24 +processor = 52 + +ENOSYS = 38 + + +#define SAVE_ALL \ + cld; \ + pushl %es; \ + pushl %ds; \ + pushl %eax; \ + pushl %ebp; \ + pushl %edi; \ + pushl %esi; \ + pushl %edx; \ + pushl %ecx; \ + pushl %ebx; \ + movl $(__KERNEL_DS),%edx; \ + movl %edx,%ds; \ + movl %edx,%es; + +#define RESTORE_ALL \ + popl %ebx; \ + popl %ecx; \ + popl %edx; \ + popl %esi; \ + popl %edi; \ + popl %ebp; \ + popl %eax; \ +1: popl %ds; \ +2: popl %es; \ + addl $4,%esp; \ +3: iret; \ +.section .fixup,"ax"; \ +4: movl $0,(%esp); \ + jmp 1b; \ +5: movl $0,(%esp); \ + jmp 2b; \ +6: pushl %ss; \ + popl %ds; \ + pushl %ss; \ + popl %es; \ + pushl $11; \ + call do_exit; \ +.previous; \ +.section __ex_table,"a";\ + .align 4; \ + .long 1b,4b; \ + .long 2b,5b; \ + .long 3b,6b; \ +.previous + +#define GET_CURRENT(reg) \ + movl $-8192, reg; \ + andl %esp, reg + +ENTRY(lcall7) + pushfl # We get a different stack layout with call + pushl %eax # gates, which has to be cleaned up later.. + SAVE_ALL + movl EIP(%esp),%eax # due to call gates, this is eflags, not eip.. + movl CS(%esp),%edx # this is eip.. + movl EFLAGS(%esp),%ecx # and this is cs.. + movl %eax,EFLAGS(%esp) # + andl $~(NT_MASK|TF_MASK|DF_MASK), %eax + pushl %eax + popfl + movl %edx,EIP(%esp) # Now we move them to their "normal" places + movl %ecx,CS(%esp) # + movl %esp,%ebx + pushl %ebx + andl $-8192,%ebx # GET_CURRENT + movl exec_domain(%ebx),%edx # Get the execution domain + movl 4(%edx),%edx # Get the lcall7 handler for the domain + pushl $0x7 + call *%edx + addl $4, %esp + popl %eax + jmp ret_from_sys_call + +ENTRY(lcall27) + pushfl # We get a different stack layout with call + pushl %eax # gates, which has to be cleaned up later.. + SAVE_ALL + movl EIP(%esp),%eax # due to call gates, this is eflags, not eip.. + movl CS(%esp),%edx # this is eip.. + movl EFLAGS(%esp),%ecx # and this is cs.. + movl %eax,EFLAGS(%esp) # + andl $~(NT_MASK|TF_MASK|DF_MASK), %eax + pushl %eax + popfl + movl %edx,EIP(%esp) # Now we move them to their "normal" places + movl %ecx,CS(%esp) # + movl %esp,%ebx + pushl %ebx + andl $-8192,%ebx # GET_CURRENT + movl exec_domain(%ebx),%edx # Get the execution domain + movl 4(%edx),%edx # Get the lcall7 handler for the domain + pushl $0x27 + call *%edx + addl $4, %esp + popl %eax + jmp ret_from_sys_call + + +ENTRY(ret_from_fork) + pushl %ebx + call SYMBOL_NAME(schedule_tail) + addl $4, %esp + GET_CURRENT(%ebx) + testb $0x02,tsk_ptrace(%ebx) # PT_TRACESYS + jne tracesys_exit + jmp ret_from_sys_call + +/* + * Return to user mode is not as complex as all this looks, + * but we want the default path for a system call return to + * go as quickly as possible which is why some of this is + * less clear than it otherwise should be. + */ + +ENTRY(system_call) + pushl %eax # save orig_eax + SAVE_ALL + GET_CURRENT(%ebx) + testb $0x02,tsk_ptrace(%ebx) # PT_TRACESYS + jne tracesys + cmpl $(NR_syscalls),%eax + jae badsys + call *SYMBOL_NAME(sys_call_table)(,%eax,4) + movl %eax,EAX(%esp) # save the return value +ENTRY(ret_from_sys_call) + movl SYMBOL_NAME(HYPERVISOR_shared_info),%esi + btrl $EVENTS_MASTER_ENABLE_BIT,4(%esi) # make tests atomic +ret_syscall_tests: + cmpl $0,need_resched(%ebx) + jne reschedule + cmpl $0,sigpending(%ebx) + je safesti # ensure need_resched updates are seen +signal_return: + btsl $EVENTS_MASTER_ENABLE_BIT,4(%esi) # reenable event callbacks + movl %esp,%eax + xorl %edx,%edx + call SYMBOL_NAME(do_signal) + jmp ret_from_sys_call + + ALIGN +restore_all: + RESTORE_ALL + + ALIGN +tracesys: + movl $-ENOSYS,EAX(%esp) + call SYMBOL_NAME(syscall_trace) + movl ORIG_EAX(%esp),%eax + cmpl $(NR_syscalls),%eax + jae tracesys_exit + call *SYMBOL_NAME(sys_call_table)(,%eax,4) + movl %eax,EAX(%esp) # save the return value +tracesys_exit: + call SYMBOL_NAME(syscall_trace) + jmp ret_from_sys_call +badsys: + movl $-ENOSYS,EAX(%esp) + jmp ret_from_sys_call + + ALIGN +ENTRY(ret_from_intr) + GET_CURRENT(%ebx) +ret_from_exception: + movb CS(%esp),%al + testl $2,%eax + jne ret_from_sys_call + jmp restore_all + + ALIGN +reschedule: + btsl $EVENTS_MASTER_ENABLE_BIT,4(%esi) # reenable event callbacks + call SYMBOL_NAME(schedule) # test + jmp ret_from_sys_call + +ENTRY(divide_error) + pushl $0 # no error code + pushl $ SYMBOL_NAME(do_divide_error) + ALIGN +error_code: + pushl %ds + pushl %eax + xorl %eax,%eax + pushl %ebp + pushl %edi + pushl %esi + pushl %edx + decl %eax # eax = -1 + pushl %ecx + pushl %ebx + GET_CURRENT(%ebx) + cld + movl %es,%ecx + movl ORIG_EAX(%esp), %esi # get the error code + movl ES(%esp), %edi # get the function address + movl %eax, ORIG_EAX(%esp) + movl %ecx, ES(%esp) + movl %esp,%edx + pushl %esi # push the error code + pushl %edx # push the pt_regs pointer + movl $(__KERNEL_DS),%edx + movl %edx,%ds + movl %edx,%es + call *%edi + addl $8,%esp + jmp ret_from_exception + +# A note on the "critical region" in our callback handler. +# We want to avoid stacking callback handlers due to events occurring +# during handling of the last event. To do this, we keep events disabled +# until we've done all processing. HOWEVER, we must enable events before +# popping the stack frame (can't be done atomically) and so it would still +# be possible to get enough handler activations to overflow the stack. +# Although unlikely, bugs of that kind are hard to track down, so we'd +# like to avoid the possibility. +# So, on entry to the handler we detect whether we interrupted an +# existing activation in its critical region -- if so, we pop the current +# activation and restart the handler using the previous one. +ENTRY(hypervisor_callback) + pushl %eax + SAVE_ALL + GET_CURRENT(%ebx) + movl EIP(%esp),%eax + cmpl $scrit,%eax + jb 11f + cmpl $ecrit,%eax + jb critical_region_fixup +11: push %esp + call do_hypervisor_callback + add $4,%esp + movl SYMBOL_NAME(HYPERVISOR_shared_info),%esi + xorl %eax,%eax + movb CS(%esp),%cl + test $2,%cl # slow return to ring 2 or 3 + jne ret_syscall_tests +safesti:btsl $EVENTS_MASTER_ENABLE_BIT,4(%esi) # reenable event callbacks +scrit: /**** START OF CRITICAL REGION ****/ + cmpl %eax,(%esi) + jne 14f # process more events if necessary... + RESTORE_ALL +14: btrl $EVENTS_MASTER_ENABLE_BIT,4(%esi) + jmp 11b +ecrit: /**** END OF CRITICAL REGION ****/ +# [How we do the fixup]. We want to merge the current stack frame with the +# just-interrupted frame. How we do this depends on where in the critical +# region the interrupted handler was executing, and so how many saved +# registers are in each frame. We do this quickly using the lookup table +# 'critical_fixup_table'. For each byte offset in the critical region, it +# provides the number of bytes which have already been popped from the +# interrupted stack frame. +critical_region_fixup: + addl $critical_fixup_table-scrit,%eax + movzbl (%eax),%eax # %eax contains num bytes popped + mov %esp,%esi + add %eax,%esi # %esi points at end of src region + mov %esp,%edi + add $0x34,%edi # %edi points at end of dst region + mov %eax,%ecx + shr $2,%ecx # convert words to bytes + je 16f # skip loop if nothing to copy +15: subl $4,%esi # pre-decrementing copy loop + subl $4,%edi + movl (%esi),%eax + movl %eax,(%edi) + loop 15b +16: movl %edi,%esp # final %edi is top of merged stack + jmp 11b + +critical_fixup_table: + .byte 0x00,0x00 # cmpl %eax,(%esi) + .byte 0x00,0x00 # jne 14f + .byte 0x00 # pop %ebx + .byte 0x04 # pop %ecx + .byte 0x08 # pop %edx + .byte 0x0c # pop %esi + .byte 0x10 # pop %edi + .byte 0x14 # pop %ebp + .byte 0x18 # pop %eax + .byte 0x1c # pop %ds + .byte 0x20 # pop %es + .byte 0x24,0x24,0x24 # add $4,%esp + .byte 0x28 # iret + .byte 0x00,0x00,0x00,0x00,0x00 # btrl $31,4(%esi) + .byte 0x00,0x00 # jmp 11b + +# Hypervisor uses this for application faults while it executes. +ENTRY(failsafe_callback) +1: pop %ds +2: pop %es +3: pop %fs +4: pop %gs +5: iret +.section .fixup,"ax"; \ +6: movl $0,(%esp); \ + jmp 1b; \ +7: movl $0,(%esp); \ + jmp 2b; \ +8: movl $0,(%esp); \ + jmp 3b; \ +9: movl $0,(%esp); \ + jmp 4b; \ +10: pushl %ss; \ + popl %ds; \ + pushl %ss; \ + popl %es; \ + pushl $11; \ + call do_exit; \ +.previous; \ +.section __ex_table,"a";\ + .align 4; \ + .long 1b,6b; \ + .long 2b,7b; \ + .long 3b,8b; \ + .long 4b,9b; \ + .long 5b,10b; \ +.previous + +ENTRY(coprocessor_error) + pushl $0 + pushl $ SYMBOL_NAME(do_coprocessor_error) + jmp error_code + +ENTRY(simd_coprocessor_error) + pushl $0 + pushl $ SYMBOL_NAME(do_simd_coprocessor_error) + jmp error_code + +ENTRY(device_not_available) + pushl $-1 # mark this as an int + SAVE_ALL + GET_CURRENT(%ebx) + call SYMBOL_NAME(math_state_restore) + jmp ret_from_exception + +ENTRY(debug) + pushl $0 + pushl $ SYMBOL_NAME(do_debug) + jmp error_code + +ENTRY(int3) + pushl $0 + pushl $ SYMBOL_NAME(do_int3) + jmp error_code + +ENTRY(overflow) + pushl $0 + pushl $ SYMBOL_NAME(do_overflow) + jmp error_code + +ENTRY(bounds) + pushl $0 + pushl $ SYMBOL_NAME(do_bounds) + jmp error_code + +ENTRY(invalid_op) + pushl $0 + pushl $ SYMBOL_NAME(do_invalid_op) + jmp error_code + +ENTRY(coprocessor_segment_overrun) + pushl $0 + pushl $ SYMBOL_NAME(do_coprocessor_segment_overrun) + jmp error_code + +ENTRY(double_fault) + pushl $ SYMBOL_NAME(do_double_fault) + jmp error_code + +ENTRY(invalid_TSS) + pushl $ SYMBOL_NAME(do_invalid_TSS) + jmp error_code + +ENTRY(segment_not_present) + pushl $ SYMBOL_NAME(do_segment_not_present) + jmp error_code + +ENTRY(stack_segment) + pushl $ SYMBOL_NAME(do_stack_segment) + jmp error_code + +ENTRY(general_protection) + pushl $ SYMBOL_NAME(do_general_protection) + jmp error_code + +ENTRY(alignment_check) + pushl $ SYMBOL_NAME(do_alignment_check) + jmp error_code + +# This handler is special, because it gets an extra value on its stack, +# which is the linear faulting address. +ENTRY(page_fault) + pushl %ds + pushl %eax + xorl %eax,%eax + pushl %ebp + pushl %edi + pushl %esi + pushl %edx + decl %eax # eax = -1 + pushl %ecx + pushl %ebx + GET_CURRENT(%ebx) + cld + movl %es,%ecx + movl ORIG_EAX(%esp), %esi # get the error code + movl ES(%esp), %edi # get the faulting address + movl %eax, ORIG_EAX(%esp) + movl %ecx, ES(%esp) + movl %esp,%edx + pushl %edi # push the faulting address + pushl %esi # push the error code + pushl %edx # push the pt_regs pointer + movl $(__KERNEL_DS),%edx + movl %edx,%ds + movl %edx,%es + call SYMBOL_NAME(do_page_fault) + addl $12,%esp + jmp ret_from_exception + +ENTRY(machine_check) + pushl $0 + pushl $ SYMBOL_NAME(do_machine_check) + jmp error_code + +ENTRY(spurious_interrupt_bug) + pushl $0 + pushl $ SYMBOL_NAME(do_spurious_interrupt_bug) + jmp error_code + +.data +ENTRY(sys_call_table) + .long SYMBOL_NAME(sys_ni_syscall) /* 0 - old "setup()" system call*/ + .long SYMBOL_NAME(sys_exit) + .long SYMBOL_NAME(sys_fork) + .long SYMBOL_NAME(sys_read) + .long SYMBOL_NAME(sys_write) + .long SYMBOL_NAME(sys_open) /* 5 */ + .long SYMBOL_NAME(sys_close) + .long SYMBOL_NAME(sys_waitpid) + .long SYMBOL_NAME(sys_creat) + .long SYMBOL_NAME(sys_link) + .long SYMBOL_NAME(sys_unlink) /* 10 */ + .long SYMBOL_NAME(sys_execve) + .long SYMBOL_NAME(sys_chdir) + .long SYMBOL_NAME(sys_time) + .long SYMBOL_NAME(sys_mknod) + .long SYMBOL_NAME(sys_chmod) /* 15 */ + .long SYMBOL_NAME(sys_lchown16) + .long SYMBOL_NAME(sys_ni_syscall) /* old break syscall holder */ + .long SYMBOL_NAME(sys_stat) + .long SYMBOL_NAME(sys_lseek) + .long SYMBOL_NAME(sys_getpid) /* 20 */ + .long SYMBOL_NAME(sys_mount) + .long SYMBOL_NAME(sys_oldumount) + .long SYMBOL_NAME(sys_setuid16) + .long SYMBOL_NAME(sys_getuid16) + .long SYMBOL_NAME(sys_stime) /* 25 */ + .long SYMBOL_NAME(sys_ptrace) + .long SYMBOL_NAME(sys_alarm) + .long SYMBOL_NAME(sys_fstat) + .long SYMBOL_NAME(sys_pause) + .long SYMBOL_NAME(sys_utime) /* 30 */ + .long SYMBOL_NAME(sys_ni_syscall) /* old stty syscall holder */ + .long SYMBOL_NAME(sys_ni_syscall) /* old gtty syscall holder */ + .long SYMBOL_NAME(sys_access) + .long SYMBOL_NAME(sys_nice) + .long SYMBOL_NAME(sys_ni_syscall) /* 35 */ /* old ftime syscall holder */ + .long SYMBOL_NAME(sys_sync) + .long SYMBOL_NAME(sys_kill) + .long SYMBOL_NAME(sys_rename) + .long SYMBOL_NAME(sys_mkdir) + .long SYMBOL_NAME(sys_rmdir) /* 40 */ + .long SYMBOL_NAME(sys_dup) + .long SYMBOL_NAME(sys_pipe) + .long SYMBOL_NAME(sys_times) + .long SYMBOL_NAME(sys_ni_syscall) /* old prof syscall holder */ + .long SYMBOL_NAME(sys_brk) /* 45 */ + .long SYMBOL_NAME(sys_setgid16) + .long SYMBOL_NAME(sys_getgid16) + .long SYMBOL_NAME(sys_signal) + .long SYMBOL_NAME(sys_geteuid16) + .long SYMBOL_NAME(sys_getegid16) /* 50 */ + .long SYMBOL_NAME(sys_acct) + .long SYMBOL_NAME(sys_umount) /* recycled never used phys() */ + .long SYMBOL_NAME(sys_ni_syscall) /* old lock syscall holder */ + .long SYMBOL_NAME(sys_ioctl) + .long SYMBOL_NAME(sys_fcntl) /* 55 */ + .long SYMBOL_NAME(sys_ni_syscall) /* old mpx syscall holder */ + .long SYMBOL_NAME(sys_setpgid) + .long SYMBOL_NAME(sys_ni_syscall) /* old ulimit syscall holder */ + .long SYMBOL_NAME(sys_olduname) + .long SYMBOL_NAME(sys_umask) /* 60 */ + .long SYMBOL_NAME(sys_chroot) + .long SYMBOL_NAME(sys_ustat) + .long SYMBOL_NAME(sys_dup2) + .long SYMBOL_NAME(sys_getppid) + .long SYMBOL_NAME(sys_getpgrp) /* 65 */ + .long SYMBOL_NAME(sys_setsid) + .long SYMBOL_NAME(sys_sigaction) + .long SYMBOL_NAME(sys_sgetmask) + .long SYMBOL_NAME(sys_ssetmask) + .long SYMBOL_NAME(sys_setreuid16) /* 70 */ + .long SYMBOL_NAME(sys_setregid16) + .long SYMBOL_NAME(sys_sigsuspend) + .long SYMBOL_NAME(sys_sigpending) + .long SYMBOL_NAME(sys_sethostname) + .long SYMBOL_NAME(sys_setrlimit) /* 75 */ + .long SYMBOL_NAME(sys_old_getrlimit) + .long SYMBOL_NAME(sys_getrusage) + .long SYMBOL_NAME(sys_gettimeofday) + .long SYMBOL_NAME(sys_settimeofday) + .long SYMBOL_NAME(sys_getgroups16) /* 80 */ + .long SYMBOL_NAME(sys_setgroups16) + .long SYMBOL_NAME(old_select) + .long SYMBOL_NAME(sys_symlink) + .long SYMBOL_NAME(sys_lstat) + .long SYMBOL_NAME(sys_readlink) /* 85 */ + .long SYMBOL_NAME(sys_uselib) + .long SYMBOL_NAME(sys_swapon) + .long SYMBOL_NAME(sys_reboot) + .long SYMBOL_NAME(old_readdir) + .long SYMBOL_NAME(old_mmap) /* 90 */ + .long SYMBOL_NAME(sys_munmap) + .long SYMBOL_NAME(sys_truncate) + .long SYMBOL_NAME(sys_ftruncate) + .long SYMBOL_NAME(sys_fchmod) + .long SYMBOL_NAME(sys_fchown16) /* 95 */ + .long SYMBOL_NAME(sys_getpriority) + .long SYMBOL_NAME(sys_setpriority) + .long SYMBOL_NAME(sys_ni_syscall) /* old profil syscall holder */ + .long SYMBOL_NAME(sys_statfs) + .long SYMBOL_NAME(sys_fstatfs) /* 100 */ + .long SYMBOL_NAME(sys_ioperm) + .long SYMBOL_NAME(sys_socketcall) + .long SYMBOL_NAME(sys_syslog) + .long SYMBOL_NAME(sys_setitimer) + .long SYMBOL_NAME(sys_getitimer) /* 105 */ + .long SYMBOL_NAME(sys_newstat) + .long SYMBOL_NAME(sys_newlstat) + .long SYMBOL_NAME(sys_newfstat) + .long SYMBOL_NAME(sys_uname) + .long SYMBOL_NAME(sys_iopl) /* 110 */ + .long SYMBOL_NAME(sys_vhangup) + .long SYMBOL_NAME(sys_ni_syscall) /* old "idle" system call */ + .long SYMBOL_NAME(sys_ni_syscall) /* was VM86 */ + .long SYMBOL_NAME(sys_wait4) + .long SYMBOL_NAME(sys_swapoff) /* 115 */ + .long SYMBOL_NAME(sys_sysinfo) + .long SYMBOL_NAME(sys_ipc) + .long SYMBOL_NAME(sys_fsync) + .long SYMBOL_NAME(sys_sigreturn) + .long SYMBOL_NAME(sys_clone) /* 120 */ + .long SYMBOL_NAME(sys_setdomainname) + .long SYMBOL_NAME(sys_newuname) + .long SYMBOL_NAME(sys_modify_ldt) + .long SYMBOL_NAME(sys_adjtimex) + .long SYMBOL_NAME(sys_mprotect) /* 125 */ + .long SYMBOL_NAME(sys_sigprocmask) + .long SYMBOL_NAME(sys_create_module) + .long SYMBOL_NAME(sys_init_module) + .long SYMBOL_NAME(sys_delete_module) + .long SYMBOL_NAME(sys_get_kernel_syms) /* 130 */ + .long SYMBOL_NAME(sys_quotactl) + .long SYMBOL_NAME(sys_getpgid) + .long SYMBOL_NAME(sys_fchdir) + .long SYMBOL_NAME(sys_bdflush) + .long SYMBOL_NAME(sys_sysfs) /* 135 */ + .long SYMBOL_NAME(sys_personality) + .long SYMBOL_NAME(sys_ni_syscall) /* for afs_syscall */ + .long SYMBOL_NAME(sys_setfsuid16) + .long SYMBOL_NAME(sys_setfsgid16) + .long SYMBOL_NAME(sys_llseek) /* 140 */ + .long SYMBOL_NAME(sys_getdents) + .long SYMBOL_NAME(sys_select) + .long SYMBOL_NAME(sys_flock) + .long SYMBOL_NAME(sys_msync) + .long SYMBOL_NAME(sys_readv) /* 145 */ + .long SYMBOL_NAME(sys_writev) + .long SYMBOL_NAME(sys_getsid) + .long SYMBOL_NAME(sys_fdatasync) + .long SYMBOL_NAME(sys_sysctl) + .long SYMBOL_NAME(sys_mlock) /* 150 */ + .long SYMBOL_NAME(sys_munlock) + .long SYMBOL_NAME(sys_mlockall) + .long SYMBOL_NAME(sys_munlockall) + .long SYMBOL_NAME(sys_sched_setparam) + .long SYMBOL_NAME(sys_sched_getparam) /* 155 */ + .long SYMBOL_NAME(sys_sched_setscheduler) + .long SYMBOL_NAME(sys_sched_getscheduler) + .long SYMBOL_NAME(sys_sched_yield) + .long SYMBOL_NAME(sys_sched_get_priority_max) + .long SYMBOL_NAME(sys_sched_get_priority_min) /* 160 */ + .long SYMBOL_NAME(sys_sched_rr_get_interval) + .long SYMBOL_NAME(sys_nanosleep) + .long SYMBOL_NAME(sys_mremap) + .long SYMBOL_NAME(sys_setresuid16) + .long SYMBOL_NAME(sys_getresuid16) /* 165 */ + .long SYMBOL_NAME(sys_ni_syscall) /* was VM86 */ + .long SYMBOL_NAME(sys_query_module) + .long SYMBOL_NAME(sys_poll) + .long SYMBOL_NAME(sys_nfsservctl) + .long SYMBOL_NAME(sys_setresgid16) /* 170 */ + .long SYMBOL_NAME(sys_getresgid16) + .long SYMBOL_NAME(sys_prctl) + .long SYMBOL_NAME(sys_rt_sigreturn) + .long SYMBOL_NAME(sys_rt_sigaction) + .long SYMBOL_NAME(sys_rt_sigprocmask) /* 175 */ + .long SYMBOL_NAME(sys_rt_sigpending) + .long SYMBOL_NAME(sys_rt_sigtimedwait) + .long SYMBOL_NAME(sys_rt_sigqueueinfo) + .long SYMBOL_NAME(sys_rt_sigsuspend) + .long SYMBOL_NAME(sys_pread) /* 180 */ + .long SYMBOL_NAME(sys_pwrite) + .long SYMBOL_NAME(sys_chown16) + .long SYMBOL_NAME(sys_getcwd) + .long SYMBOL_NAME(sys_capget) + .long SYMBOL_NAME(sys_capset) /* 185 */ + .long SYMBOL_NAME(sys_sigaltstack) + .long SYMBOL_NAME(sys_sendfile) + .long SYMBOL_NAME(sys_ni_syscall) /* streams1 */ + .long SYMBOL_NAME(sys_ni_syscall) /* streams2 */ + .long SYMBOL_NAME(sys_vfork) /* 190 */ + .long SYMBOL_NAME(sys_getrlimit) + .long SYMBOL_NAME(sys_mmap2) + .long SYMBOL_NAME(sys_truncate64) + .long SYMBOL_NAME(sys_ftruncate64) + .long SYMBOL_NAME(sys_stat64) /* 195 */ + .long SYMBOL_NAME(sys_lstat64) + .long SYMBOL_NAME(sys_fstat64) + .long SYMBOL_NAME(sys_lchown) + .long SYMBOL_NAME(sys_getuid) + .long SYMBOL_NAME(sys_getgid) /* 200 */ + .long SYMBOL_NAME(sys_geteuid) + .long SYMBOL_NAME(sys_getegid) + .long SYMBOL_NAME(sys_setreuid) + .long SYMBOL_NAME(sys_setregid) + .long SYMBOL_NAME(sys_getgroups) /* 205 */ + .long SYMBOL_NAME(sys_setgroups) + .long SYMBOL_NAME(sys_fchown) + .long SYMBOL_NAME(sys_setresuid) + .long SYMBOL_NAME(sys_getresuid) + .long SYMBOL_NAME(sys_setresgid) /* 210 */ + .long SYMBOL_NAME(sys_getresgid) + .long SYMBOL_NAME(sys_chown) + .long SYMBOL_NAME(sys_setuid) + .long SYMBOL_NAME(sys_setgid) + .long SYMBOL_NAME(sys_setfsuid) /* 215 */ + .long SYMBOL_NAME(sys_setfsgid) + .long SYMBOL_NAME(sys_pivot_root) + .long SYMBOL_NAME(sys_mincore) + .long SYMBOL_NAME(sys_madvise) + .long SYMBOL_NAME(sys_getdents64) /* 220 */ + .long SYMBOL_NAME(sys_fcntl64) + .long SYMBOL_NAME(sys_ni_syscall) /* reserved for TUX */ + .long SYMBOL_NAME(sys_ni_syscall) /* Reserved for Security */ + .long SYMBOL_NAME(sys_gettid) + .long SYMBOL_NAME(sys_readahead) /* 225 */ + .long SYMBOL_NAME(sys_setxattr) + .long SYMBOL_NAME(sys_lsetxattr) + .long SYMBOL_NAME(sys_fsetxattr) + .long SYMBOL_NAME(sys_getxattr) + .long SYMBOL_NAME(sys_lgetxattr) /* 230 */ + .long SYMBOL_NAME(sys_fgetxattr) + .long SYMBOL_NAME(sys_listxattr) + .long SYMBOL_NAME(sys_llistxattr) + .long SYMBOL_NAME(sys_flistxattr) + .long SYMBOL_NAME(sys_removexattr) /* 235 */ + .long SYMBOL_NAME(sys_lremovexattr) + .long SYMBOL_NAME(sys_fremovexattr) + .long SYMBOL_NAME(sys_tkill) + .long SYMBOL_NAME(sys_sendfile64) + .long SYMBOL_NAME(sys_ni_syscall) /* 240 reserved for futex */ + .long SYMBOL_NAME(sys_ni_syscall) /* reserved for sched_setaffinity */ + .long SYMBOL_NAME(sys_ni_syscall) /* reserved for sched_getaffinity */ + .long SYMBOL_NAME(sys_ni_syscall) /* sys_set_thread_area */ + .long SYMBOL_NAME(sys_ni_syscall) /* sys_get_thread_area */ + .long SYMBOL_NAME(sys_ni_syscall) /* 245 sys_io_setup */ + .long SYMBOL_NAME(sys_ni_syscall) /* sys_io_destroy */ + .long SYMBOL_NAME(sys_ni_syscall) /* sys_io_getevents */ + .long SYMBOL_NAME(sys_ni_syscall) /* sys_io_submit */ + .long SYMBOL_NAME(sys_ni_syscall) /* sys_io_cancel */ + .long SYMBOL_NAME(sys_ni_syscall) /* 250 sys_alloc_hugepages */ + .long SYMBOL_NAME(sys_ni_syscall) /* sys_free_hugepages */ + .long SYMBOL_NAME(sys_ni_syscall) /* sys_exit_group */ + .long SYMBOL_NAME(sys_ni_syscall) /* sys_lookup_dcookie */ + .long SYMBOL_NAME(sys_ni_syscall) /* sys_epoll_create */ + .long SYMBOL_NAME(sys_ni_syscall) /* sys_epoll_ctl 255 */ + .long SYMBOL_NAME(sys_ni_syscall) /* sys_epoll_wait */ + .long SYMBOL_NAME(sys_ni_syscall) /* sys_remap_file_pages */ + .long SYMBOL_NAME(sys_ni_syscall) /* sys_set_tid_address */ + + .rept NR_syscalls-(.-sys_call_table)/4 + .long SYMBOL_NAME(sys_ni_syscall) + .endr diff --git a/xenolinux-2.4.22-sparse/arch/xeno/kernel/head.S b/xenolinux-2.4.22-sparse/arch/xeno/kernel/head.S new file mode 100644 index 0000000000..a89fd8eda4 --- /dev/null +++ b/xenolinux-2.4.22-sparse/arch/xeno/kernel/head.S @@ -0,0 +1,70 @@ + +.text +#include <linux/config.h> +#include <linux/threads.h> +#include <linux/linkage.h> +#include <asm/segment.h> +#include <asm/page.h> +#include <asm/pgtable.h> +#include <asm/desc.h> + +/* Offsets in start_info structure */ +#define SHARED_INFO 4 +#define MOD_START 12 +#define MOD_LEN 16 + +startup_32: + cld + + lss stack_start,%esp + + /* Copy initrd somewhere safe before it's clobbered by BSS. */ + mov MOD_LEN(%esi),%ecx + shr $2,%ecx + jz 2f /* bail from copy loop if no initrd */ + mov $SYMBOL_NAME(_end),%edi + add MOD_LEN(%esi),%edi + mov MOD_START(%esi),%eax + add MOD_LEN(%esi),%eax +1: sub $4,%eax + sub $4,%edi + mov (%eax),%ebx + mov %ebx,(%edi) + loop 1b + mov %edi,MOD_START(%esi) + + /* Clear BSS first so that there are no surprises... */ +2: xorl %eax,%eax + movl $SYMBOL_NAME(__bss_start),%edi + movl $SYMBOL_NAME(_end),%ecx + subl %edi,%ecx + rep stosb + + /* Copy the necessary stuff from start_info structure. */ + /* We need to copy shared_info early, so that sti/cli work */ + mov SHARED_INFO(%esi),%eax + mov %eax,SYMBOL_NAME(HYPERVISOR_shared_info) + mov $SYMBOL_NAME(start_info_union),%edi + mov $128,%ecx + rep movsl + + jmp SYMBOL_NAME(start_kernel) + +ENTRY(stack_start) + .long SYMBOL_NAME(init_task_union)+8192, __KERNEL_DS + +.org 0x1000 +ENTRY(empty_zero_page) + +.org 0x2000 +ENTRY(default_ldt) + +.org 0x3000 +ENTRY(cpu0_pte_quicklist) + +.org 0x3400 +ENTRY(cpu0_pgd_quicklist) + +.org 0x3800 +ENTRY(stext) +ENTRY(_stext) diff --git a/xenolinux-2.4.22-sparse/arch/xeno/kernel/hypervisor.c b/xenolinux-2.4.22-sparse/arch/xeno/kernel/hypervisor.c new file mode 100644 index 0000000000..3f414e9876 --- /dev/null +++ b/xenolinux-2.4.22-sparse/arch/xeno/kernel/hypervisor.c @@ -0,0 +1,128 @@ +/****************************************************************************** + * hypervisor.c + * + * Communication to/from hypervisor. + * + * Copyright (c) 2002, K A Fraser + */ + +#include <linux/config.h> +#include <asm/atomic.h> +#include <linux/irq.h> +#include <asm/hypervisor.h> +#include <asm/system.h> +#include <asm/ptrace.h> + +multicall_entry_t multicall_list[8]; +int nr_multicall_ents = 0; + +static unsigned long event_mask = 0; + +void frobb(void) {} + +void do_hypervisor_callback(struct pt_regs *regs) +{ + unsigned long events, flags; + shared_info_t *shared = HYPERVISOR_shared_info; + + do { + /* Specialised local_irq_save(). */ + flags = test_and_clear_bit(EVENTS_MASTER_ENABLE_BIT, + &shared->events_mask); + barrier(); + + events = xchg(&shared->events, 0); + events &= event_mask; + + __asm__ __volatile__ ( + " push %1 ;" + " sub $4,%%esp ;" + " jmp 2f ;" + "1: btrl %%eax,%0 ;" /* clear bit */ + " mov %%eax,(%%esp) ;" + " call do_IRQ ;" /* do_IRQ(event) */ + "2: bsfl %0,%%eax ;" /* %eax == bit # */ + " jnz 1b ;" + " add $8,%%esp ;" + /* we use %ebx because it is callee-saved */ + : : "b" (events), "r" (regs) + /* clobbered by callback function calls */ + : "eax", "ecx", "edx", "memory" ); + + /* Specialised local_irq_restore(). */ + if ( flags ) set_bit(EVENTS_MASTER_ENABLE_BIT, &shared->events_mask); + barrier(); + } + while ( shared->events ); +} + + + +/* + * Define interface to generic handling in irq.c + */ + +static void shutdown_hypervisor_event(unsigned int irq) +{ + clear_bit(irq, &event_mask); + clear_bit(irq, &HYPERVISOR_shared_info->events_mask); +} + +static void enable_hypervisor_event(unsigned int irq) +{ + set_bit(irq, &event_mask); + set_bit(irq, &HYPERVISOR_shared_info->events_mask); + if ( test_bit(EVENTS_MASTER_ENABLE_BIT, + &HYPERVISOR_shared_info->events_mask) ) + do_hypervisor_callback(NULL); +} + +static void disable_hypervisor_event(unsigned int irq) +{ + clear_bit(irq, &event_mask); + clear_bit(irq, &HYPERVISOR_shared_info->events_mask); +} + +static void ack_hypervisor_event(unsigned int irq) +{ + if ( !(event_mask & (1<<irq)) ) + { + printk("Unexpected hypervisor event %d\n", irq); + atomic_inc(&irq_err_count); + } + set_bit(irq, &HYPERVISOR_shared_info->events_mask); +} + +static unsigned int startup_hypervisor_event(unsigned int irq) +{ + enable_hypervisor_event(irq); + return 0; +} + +static void end_hypervisor_event(unsigned int irq) +{ +} + +static struct hw_interrupt_type hypervisor_irq_type = { + "Hypervisor-event", + startup_hypervisor_event, + shutdown_hypervisor_event, + enable_hypervisor_event, + disable_hypervisor_event, + ack_hypervisor_event, + end_hypervisor_event, + NULL +}; + +void __init init_IRQ(void) +{ + int i; + + for ( i = 0; i < NR_IRQS; i++ ) + { + irq_desc[i].status = IRQ_DISABLED; + irq_desc[i].action = 0; + irq_desc[i].depth = 1; + irq_desc[i].handler = &hypervisor_irq_type; + } +} diff --git a/xenolinux-2.4.22-sparse/arch/xeno/kernel/i386_ksyms.c b/xenolinux-2.4.22-sparse/arch/xeno/kernel/i386_ksyms.c new file mode 100644 index 0000000000..b62c171b3c --- /dev/null +++ b/xenolinux-2.4.22-sparse/arch/xeno/kernel/i386_ksyms.c @@ -0,0 +1,154 @@ +#include <linux/config.h> +#include <linux/module.h> +#include <linux/smp.h> +#include <linux/user.h> +#include <linux/elfcore.h> +#include <linux/mca.h> +#include <linux/sched.h> +#include <linux/in6.h> +#include <linux/interrupt.h> +#include <linux/smp_lock.h> +#include <linux/pm.h> +//XXX ??? #include <linux/pci.h> +#include <linux/apm_bios.h> +#include <linux/kernel.h> +#include <linux/string.h> +#include <linux/tty.h> + +#include <asm/semaphore.h> +#include <asm/processor.h> +#include <asm/i387.h> +#include <asm/uaccess.h> +#include <asm/checksum.h> +#include <asm/io.h> +#include <asm/hardirq.h> +#include <asm/delay.h> +#include <asm/irq.h> +#include <asm/mmx.h> +#include <asm/desc.h> +#include <asm/pgtable.h> +#include <asm/pgalloc.h> + +extern void dump_thread(struct pt_regs *, struct user *); +extern spinlock_t rtc_lock; + +#if defined(CONFIG_APMXXX) || defined(CONFIG_APM_MODULEXXX) +extern void machine_real_restart(unsigned char *, int); +EXPORT_SYMBOL(machine_real_restart); +extern void default_idle(void); +EXPORT_SYMBOL(default_idle); +#endif + +#ifdef CONFIG_SMP +extern void FASTCALL( __write_lock_failed(rwlock_t *rw)); +extern void FASTCALL( __read_lock_failed(rwlock_t *rw)); +#endif + +#if defined(CONFIG_BLK_DEV_IDE) || defined(CONFIG_BLK_DEV_HD) || defined(CONFIG_BLK_DEV_IDE_MODULE) || defined(CONFIG_BLK_DEV_HD_MODULE) +extern struct drive_info_struct drive_info; +EXPORT_SYMBOL(drive_info); +#endif + +// XXX extern unsigned long get_cmos_time(void); + +/* platform dependent support */ +EXPORT_SYMBOL(boot_cpu_data); +EXPORT_SYMBOL(dump_thread); +EXPORT_SYMBOL(dump_fpu); +EXPORT_SYMBOL(dump_extended_fpu); +EXPORT_SYMBOL(enable_irq); +EXPORT_SYMBOL(disable_irq); +EXPORT_SYMBOL(disable_irq_nosync); +EXPORT_SYMBOL(probe_irq_mask); +EXPORT_SYMBOL(kernel_thread); +EXPORT_SYMBOL(pm_idle); +EXPORT_SYMBOL(pm_power_off); +EXPORT_SYMBOL(apm_info); +//EXPORT_SYMBOL(gdt); +EXPORT_SYMBOL(empty_zero_page); + +#ifdef CONFIG_DEBUG_IOVIRT +EXPORT_SYMBOL(__io_virt_debug); +#endif + +EXPORT_SYMBOL_NOVERS(__down_failed); +EXPORT_SYMBOL_NOVERS(__down_failed_interruptible); +EXPORT_SYMBOL_NOVERS(__down_failed_trylock); +EXPORT_SYMBOL_NOVERS(__up_wakeup); +/* Networking helper routines. */ +EXPORT_SYMBOL(csum_partial_copy_generic); +/* Delay loops */ +EXPORT_SYMBOL(__ndelay); +EXPORT_SYMBOL(__udelay); +EXPORT_SYMBOL(__delay); +EXPORT_SYMBOL(__const_udelay); + +EXPORT_SYMBOL_NOVERS(__get_user_1); +EXPORT_SYMBOL_NOVERS(__get_user_2); +EXPORT_SYMBOL_NOVERS(__get_user_4); + +EXPORT_SYMBOL(strtok); +EXPORT_SYMBOL(strpbrk); +EXPORT_SYMBOL(strstr); + +EXPORT_SYMBOL(strncpy_from_user); +EXPORT_SYMBOL(__strncpy_from_user); +EXPORT_SYMBOL(clear_user); +EXPORT_SYMBOL(__clear_user); +EXPORT_SYMBOL(__generic_copy_from_user); +EXPORT_SYMBOL(__generic_copy_to_user); +EXPORT_SYMBOL(strnlen_user); + +#ifdef CONFIG_X86_USE_3DNOW +EXPORT_SYMBOL(_mmx_memcpy); +EXPORT_SYMBOL(mmx_clear_page); +EXPORT_SYMBOL(mmx_copy_page); +#endif + +#ifdef CONFIG_SMP +EXPORT_SYMBOL(cpu_data); +EXPORT_SYMBOL(kernel_flag_cacheline); +EXPORT_SYMBOL(smp_num_cpus); +EXPORT_SYMBOL(cpu_online_map); +EXPORT_SYMBOL_NOVERS(__write_lock_failed); +EXPORT_SYMBOL_NOVERS(__read_lock_failed); + +/* Global SMP irq stuff */ +EXPORT_SYMBOL(synchronize_irq); +EXPORT_SYMBOL(global_irq_holder); +EXPORT_SYMBOL(__global_cli); +EXPORT_SYMBOL(__global_sti); +EXPORT_SYMBOL(__global_save_flags); +EXPORT_SYMBOL(__global_restore_flags); +EXPORT_SYMBOL(smp_call_function); + +/* TLB flushing */ +EXPORT_SYMBOL(flush_tlb_page); +#endif + +#ifdef CONFIG_X86_IO_APIC +EXPORT_SYMBOL(IO_APIC_get_PCI_irq_vector); +#endif + +#ifdef CONFIG_VT +EXPORT_SYMBOL(screen_info); +#endif + +EXPORT_SYMBOL(get_wchan); + +EXPORT_SYMBOL(rtc_lock); + +#undef memcpy +#undef memset +extern void * memset(void *,int,__kernel_size_t); +extern void * memcpy(void *,const void *,__kernel_size_t); +EXPORT_SYMBOL_NOVERS(memcpy); +EXPORT_SYMBOL_NOVERS(memset); + +#ifdef CONFIG_HAVE_DEC_LOCK +EXPORT_SYMBOL(atomic_dec_and_lock); +#endif + +#ifdef CONFIG_MULTIQUAD +EXPORT_SYMBOL(xquad_portio); +#endif diff --git a/xenolinux-2.4.22-sparse/arch/xeno/kernel/ioport.c b/xenolinux-2.4.22-sparse/arch/xeno/kernel/ioport.c new file mode 100644 index 0000000000..ed6dbbc3c5 --- /dev/null +++ b/xenolinux-2.4.22-sparse/arch/xeno/kernel/ioport.c @@ -0,0 +1,48 @@ +#include <linux/sched.h> +#include <linux/kernel.h> +#include <linux/errno.h> +#include <linux/types.h> +#include <linux/stddef.h> +#include <asm/hypervisor-ifs/dom0_ops.h> + + +asmlinkage int sys_iopl(unsigned int new_io_pl) +{ + unsigned int old_io_pl = current->thread.io_pl; + dom0_op_t op; + + if ( !(start_info.flags & SIF_PRIVILEGED) ) + return -EPERM; + + if ( new_io_pl > 3 ) + return -EINVAL; + + /* Need "raw I/O" privileges for direct port access. */ + if ( (new_io_pl > old_io_pl) && !capable(CAP_SYS_RAWIO) ) + return -EPERM; + + /* Maintain OS privileges even if user attempts to relinquish them. */ + if ( (new_io_pl == 0) && (start_info.flags & SIF_PRIVILEGED) ) + new_io_pl = 1; + + /* Change our version of the privilege levels. */ + current->thread.io_pl = new_io_pl; + + /* Force the change at ring 0. */ + op.cmd = DOM0_IOPL; + op.u.iopl.domain = start_info.dom_id; + op.u.iopl.iopl = new_io_pl; + HYPERVISOR_dom0_op(&op); + + return 0; +} + + +asmlinkage int sys_ioperm(unsigned long from, unsigned long num, int turn_on) +{ + printk(KERN_INFO "ioperm not fully supported - %s\n", + turn_on ? "set iopl to 3" : "ignore resource release"); + return turn_on ? sys_iopl(3) : 0; +} + + diff --git a/xenolinux-2.4.22-sparse/arch/xeno/kernel/irq.c b/xenolinux-2.4.22-sparse/arch/xeno/kernel/irq.c new file mode 100644 index 0000000000..4f449691f0 --- /dev/null +++ b/xenolinux-2.4.22-sparse/arch/xeno/kernel/irq.c @@ -0,0 +1,1136 @@ +/* + * linux/arch/i386/kernel/irq.c + * + * Copyright (C) 1992, 1998 Linus Torvalds, Ingo Molnar + * + * This file contains the code used by various IRQ handling routines: + * asking for different IRQ's should be done through these routines + * instead of just grabbing them. Thus setups with different IRQ numbers + * shouldn't result in any weird surprises, and installing new handlers + * should be easier. + */ + +/* + * (mostly architecture independent, will move to kernel/irq.c in 2.5.) + * + * IRQs are in fact implemented a bit like signal handlers for the kernel. + * Naturally it's not a 1:1 relation, but there are similarities. + */ + +#include <linux/config.h> +#include <linux/ptrace.h> +#include <linux/errno.h> +#include <linux/signal.h> +#include <linux/sched.h> +#include <linux/ioport.h> +#include <linux/interrupt.h> +#include <linux/timex.h> +#include <linux/slab.h> +#include <linux/random.h> +#include <linux/smp_lock.h> +#include <linux/init.h> +#include <linux/kernel_stat.h> +#include <linux/irq.h> +#include <linux/proc_fs.h> + +#include <asm/atomic.h> +#include <asm/io.h> +#include <asm/smp.h> +#include <asm/system.h> +#include <asm/bitops.h> +#include <asm/uaccess.h> +#include <asm/pgalloc.h> +#include <asm/delay.h> +#include <asm/desc.h> +#include <asm/irq.h> + + + +/* + * Linux has a controller-independent x86 interrupt architecture. + * every controller has a 'controller-template', that is used + * by the main code to do the right thing. Each driver-visible + * interrupt source is transparently wired to the apropriate + * controller. Thus drivers need not be aware of the + * interrupt-controller. + * + * Various interrupt controllers we handle: 8259 PIC, SMP IO-APIC, + * PIIX4's internal 8259 PIC and SGI's Visual Workstation Cobalt (IO-)APIC. + * (IO-APICs assumed to be messaging to Pentium local-APICs) + * + * the code is designed to be easily extended with new/different + * interrupt controllers, without having to do assembly magic. + */ + +/* + * Controller mappings for all interrupt sources: + */ +irq_desc_t irq_desc[NR_IRQS] __cacheline_aligned = + { [0 ... NR_IRQS-1] = { 0, &no_irq_type, NULL, 0, SPIN_LOCK_UNLOCKED}}; + +static void register_irq_proc (unsigned int irq); + +/* + * Special irq handlers. + */ + +void no_action(int cpl, void *dev_id, struct pt_regs *regs) { } + +/* + * Generic no controller code + */ + +static void enable_none(unsigned int irq) { } +static unsigned int startup_none(unsigned int irq) { return 0; } +static void disable_none(unsigned int irq) { } +static void ack_none(unsigned int irq) +{ + printk("unexpected IRQ trap at vector %02x\n", irq); +} + +/* startup is the same as "enable", shutdown is same as "disable" */ +#define shutdown_none disable_none +#define end_none enable_none + +struct hw_interrupt_type no_irq_type = { + "none", + startup_none, + shutdown_none, + enable_none, + disable_none, + ack_none, + end_none +}; + +atomic_t irq_err_count; +#ifdef CONFIG_X86_IO_APIC +#ifdef APIC_MISMATCH_DEBUG +atomic_t irq_mis_count; +#endif +#endif + +/* + * Generic, controller-independent functions: + */ + +int get_irq_list(char *buf) +{ + int i, j; + struct irqaction * action; + char *p = buf; + + p += sprintf(p, " "); + for (j=0; j<smp_num_cpus; j++) + p += sprintf(p, "CPU%d ",j); + *p++ = '\n'; + + for (i = 0 ; i < NR_IRQS ; i++) { + action = irq_desc[i].action; + if (!action) + continue; + p += sprintf(p, "%3d: ",i); +#ifndef CONFIG_SMP + p += sprintf(p, "%10u ", kstat_irqs(i)); +#else + for (j = 0; j < smp_num_cpus; j++) + p += sprintf(p, "%10u ", + kstat.irqs[cpu_logical_map(j)][i]); +#endif + p += sprintf(p, " %14s", irq_desc[i].handler->typename); + p += sprintf(p, " %s", action->name); + + for (action=action->next; action; action = action->next) + p += sprintf(p, ", %s", action->name); + *p++ = '\n'; + } + p += sprintf(p, "NMI: "); + for (j = 0; j < smp_num_cpus; j++) + p += sprintf(p, "%10u ", + nmi_count(cpu_logical_map(j))); + p += sprintf(p, "\n"); +#if CONFIG_X86_LOCAL_APIC + p += sprintf(p, "LOC: "); + for (j = 0; j < smp_num_cpus; j++) + p += sprintf(p, "%10u ", + apic_timer_irqs[cpu_logical_map(j)]); + p += sprintf(p, "\n"); +#endif + p += sprintf(p, "ERR: %10u\n", atomic_read(&irq_err_count)); +#ifdef CONFIG_X86_IO_APIC +#ifdef APIC_MISMATCH_DEBUG + p += sprintf(p, "MIS: %10u\n", atomic_read(&irq_mis_count)); +#endif +#endif + return p - buf; +} + + +/* + * Global interrupt locks for SMP. Allow interrupts to come in on any + * CPU, yet make cli/sti act globally to protect critical regions.. + */ + +#ifdef CONFIG_SMP +unsigned char global_irq_holder = NO_PROC_ID; +unsigned volatile long global_irq_lock; /* pendantic: long for set_bit --RR */ + +extern void show_stack(unsigned long* esp); + +static void show(char * str) +{ + int i; + int cpu = smp_processor_id(); + + printk("\n%s, CPU %d:\n", str, cpu); + printk("irq: %d [",irqs_running()); + for(i=0;i < smp_num_cpus;i++) + printk(" %d",local_irq_count(i)); + printk(" ]\nbh: %d [",spin_is_locked(&global_bh_lock) ? 1 : 0); + for(i=0;i < smp_num_cpus;i++) + printk(" %d",local_bh_count(i)); + + printk(" ]\nStack dumps:"); + for(i = 0; i < smp_num_cpus; i++) { + unsigned long esp; + if (i == cpu) + continue; + printk("\nCPU %d:",i); + esp = init_tss[i].esp0; + if (!esp) { + /* tss->esp0 is set to NULL in cpu_init(), + * it's initialized when the cpu returns to user + * space. -- manfreds + */ + printk(" <unknown> "); + continue; + } + esp &= ~(THREAD_SIZE-1); + esp += sizeof(struct task_struct); + show_stack((void*)esp); + } + printk("\nCPU %d:",cpu); + show_stack(NULL); + printk("\n"); +} + +#define MAXCOUNT 100000000 + +/* + * I had a lockup scenario where a tight loop doing + * spin_unlock()/spin_lock() on CPU#1 was racing with + * spin_lock() on CPU#0. CPU#0 should have noticed spin_unlock(), but + * apparently the spin_unlock() information did not make it + * through to CPU#0 ... nasty, is this by design, do we have to limit + * 'memory update oscillation frequency' artificially like here? + * + * Such 'high frequency update' races can be avoided by careful design, but + * some of our major constructs like spinlocks use similar techniques, + * it would be nice to clarify this issue. Set this define to 0 if you + * want to check whether your system freezes. I suspect the delay done + * by SYNC_OTHER_CORES() is in correlation with 'snooping latency', but + * i thought that such things are guaranteed by design, since we use + * the 'LOCK' prefix. + */ +#define SUSPECTED_CPU_OR_CHIPSET_BUG_WORKAROUND 0 + +#if SUSPECTED_CPU_OR_CHIPSET_BUG_WORKAROUND +# define SYNC_OTHER_CORES(x) udelay(x+1) +#else +/* + * We have to allow irqs to arrive between __sti and __cli + */ +# define SYNC_OTHER_CORES(x) __asm__ __volatile__ ("nop") +#endif + +static inline void wait_on_irq(int cpu) +{ + int count = MAXCOUNT; + + for (;;) { + + /* + * Wait until all interrupts are gone. Wait + * for bottom half handlers unless we're + * already executing in one.. + */ + if (!irqs_running()) + if (local_bh_count(cpu) || !spin_is_locked(&global_bh_lock)) + break; + + /* Duh, we have to loop. Release the lock to avoid deadlocks */ + clear_bit(0,&global_irq_lock); + + for (;;) { + if (!--count) { + show("wait_on_irq"); + count = ~0; + } + __sti(); + SYNC_OTHER_CORES(cpu); + __cli(); + if (irqs_running()) + continue; + if (global_irq_lock) + continue; + if (!local_bh_count(cpu) && spin_is_locked(&global_bh_lock)) + continue; + if (!test_and_set_bit(0,&global_irq_lock)) + break; + } + } +} + +/* + * This is called when we want to synchronize with + * interrupts. We may for example tell a device to + * stop sending interrupts: but to make sure there + * are no interrupts that are executing on another + * CPU we need to call this function. + */ +void synchronize_irq(void) +{ + if (irqs_running()) { + /* Stupid approach */ + cli(); + sti(); + } +} + +static inline void get_irqlock(int cpu) +{ + if (test_and_set_bit(0,&global_irq_lock)) { + /* do we already hold the lock? */ + if ((unsigned char) cpu == global_irq_holder) + return; + /* Uhhuh.. Somebody else got it. Wait.. */ + do { + do { + rep_nop(); + } while (test_bit(0,&global_irq_lock)); + } while (test_and_set_bit(0,&global_irq_lock)); + } + /* + * We also to make sure that nobody else is running + * in an interrupt context. + */ + wait_on_irq(cpu); + + /* + * Ok, finally.. + */ + global_irq_holder = cpu; +} + +void __global_cli(void) +{ + panic("__global_cli"); +} + +void __global_sti(void) +{ + panic("__global_sti"); +} + +/* + * SMP flags value to restore to: + * 0 - global cli + * 1 - global sti + * 2 - local cli + * 3 - local sti + */ +unsigned long __global_save_flags(void) +{ + panic("__global_save_flags"); +} + +void __global_restore_flags(unsigned long flags) +{ + panic("__global_restore_flags"); +} + +#endif + +/* + * This should really return information about whether + * we should do bottom half handling etc. Right now we + * end up _always_ checking the bottom half, which is a + * waste of time and is not what some drivers would + * prefer. + */ +int handle_IRQ_event(unsigned int irq, struct pt_regs * regs, struct irqaction * action) +{ + int status; + int cpu = smp_processor_id(); + + irq_enter(cpu, irq); + + status = 1; /* Force the "do bottom halves" bit */ + + if (!(action->flags & SA_INTERRUPT)) + __sti(); + + do { + status |= action->flags; + action->handler(irq, action->dev_id, regs); + action = action->next; + } while (action); + if (status & SA_SAMPLE_RANDOM) + add_interrupt_randomness(irq); + __cli(); + + irq_exit(cpu, irq); + + return status; +} + +/* + * Generic enable/disable code: this just calls + * down into the PIC-specific version for the actual + * hardware disable after having gotten the irq + * controller lock. + */ + +/** + * disable_irq_nosync - disable an irq without waiting + * @irq: Interrupt to disable + * + * Disable the selected interrupt line. Disables and Enables are + * nested. + * Unlike disable_irq(), this function does not ensure existing + * instances of the IRQ handler have completed before returning. + * + * This function may be called from IRQ context. + */ + +inline void disable_irq_nosync(unsigned int irq) +{ + irq_desc_t *desc = irq_desc + irq; + unsigned long flags; + + spin_lock_irqsave(&desc->lock, flags); + if (!desc->depth++) { + desc->status |= IRQ_DISABLED; + desc->handler->disable(irq); + } + spin_unlock_irqrestore(&desc->lock, flags); +} + +/** + * disable_irq - disable an irq and wait for completion + * @irq: Interrupt to disable + * + * Disable the selected interrupt line. Enables and Disables are + * nested. + * This function waits for any pending IRQ handlers for this interrupt + * to complete before returning. If you use this function while + * holding a resource the IRQ handler may need you will deadlock. + * + * This function may be called - with care - from IRQ context. + */ + +void disable_irq(unsigned int irq) +{ + disable_irq_nosync(irq); + + if (!local_irq_count(smp_processor_id())) { + do { + barrier(); + cpu_relax(); + } while (irq_desc[irq].status & IRQ_INPROGRESS); + } +} + +/** + * enable_irq - enable handling of an irq + * @irq: Interrupt to enable + * + * Undoes the effect of one call to disable_irq(). If this + * matches the last disable, processing of interrupts on this + * IRQ line is re-enabled. + * + * This function may be called from IRQ context. + */ + +void enable_irq(unsigned int irq) +{ + irq_desc_t *desc = irq_desc + irq; + unsigned long flags; + + spin_lock_irqsave(&desc->lock, flags); + switch (desc->depth) { + case 1: { + unsigned int status = desc->status & ~IRQ_DISABLED; + desc->status = status; + if ((status & (IRQ_PENDING | IRQ_REPLAY)) == IRQ_PENDING) { + desc->status = status | IRQ_REPLAY; + hw_resend_irq(desc->handler,irq); + } + desc->handler->enable(irq); + /* fall-through */ + } + default: + desc->depth--; + break; + case 0: + printk("enable_irq(%u) unbalanced from %p\n", irq, + __builtin_return_address(0)); + } + spin_unlock_irqrestore(&desc->lock, flags); +} + +/* + * do_IRQ handles all normal device IRQ's (the special + * SMP cross-CPU interrupts have their own specific + * handlers). + */ +asmlinkage unsigned int do_IRQ(int irq, struct pt_regs *regs) +{ + /* + * We ack quickly, we don't want the irq controller + * thinking we're snobs just because some other CPU has + * disabled global interrupts (we have already done the + * INT_ACK cycles, it's too late to try to pretend to the + * controller that we aren't taking the interrupt). + * + * 0 return value means that this irq is already being + * handled by some other CPU. (or is disabled) + */ + int cpu = smp_processor_id(); + irq_desc_t *desc = irq_desc + irq; + struct irqaction * action; + unsigned int status; +#ifdef CONFIG_DEBUG_STACKOVERFLOW + long esp; + + /* Debugging check for stack overflow: is there less than 1KB free? */ + __asm__ __volatile__("andl %%esp,%0" : "=r" (esp) : "0" (8191)); + if (unlikely(esp < (sizeof(struct task_struct) + 1024))) { + extern void show_stack(unsigned long *); + + printk("do_IRQ: stack overflow: %ld\n", + esp - sizeof(struct task_struct)); + __asm__ __volatile__("movl %%esp,%0" : "=r" (esp)); + show_stack((void *)esp); + } +#endif + + kstat.irqs[cpu][irq]++; + spin_lock(&desc->lock); + desc->handler->ack(irq); + /* + REPLAY is when Linux resends an IRQ that was dropped earlier + WAITING is used by probe to mark irqs that are being tested + */ + status = desc->status & ~(IRQ_REPLAY | IRQ_WAITING); + status |= IRQ_PENDING; /* we _want_ to handle it */ + + /* + * If the IRQ is disabled for whatever reason, we cannot + * use the action we have. + */ + action = NULL; + if (!(status & (IRQ_DISABLED | IRQ_INPROGRESS))) { + action = desc->action; + status &= ~IRQ_PENDING; /* we commit to handling */ + status |= IRQ_INPROGRESS; /* we are handling it */ + } + desc->status = status; + + /* + * If there is no IRQ handler or it was disabled, exit early. + Since we set PENDING, if another processor is handling + a different instance of this same irq, the other processor + will take care of it. + */ + if (!action) + goto out; + + /* + * Edge triggered interrupts need to remember + * pending events. + * This applies to any hw interrupts that allow a second + * instance of the same irq to arrive while we are in do_IRQ + * or in the handler. But the code here only handles the _second_ + * instance of the irq, not the third or fourth. So it is mostly + * useful for irq hardware that does not mask cleanly in an + * SMP environment. + */ + for (;;) { + spin_unlock(&desc->lock); + handle_IRQ_event(irq, regs, action); + spin_lock(&desc->lock); + + if (!(desc->status & IRQ_PENDING)) + break; + desc->status &= ~IRQ_PENDING; + } + desc->status &= ~IRQ_INPROGRESS; +out: + /* + * The ->end() handler has to deal with interrupts which got + * disabled while the handler was running. + */ + desc->handler->end(irq); + spin_unlock(&desc->lock); + + if (softirq_pending(cpu)) + do_softirq(); + return 1; +} + +/** + * request_irq - allocate an interrupt line + * @irq: Interrupt line to allocate + * @handler: Function to be called when the IRQ occurs + * @irqflags: Interrupt type flags + * @devname: An ascii name for the claiming device + * @dev_id: A cookie passed back to the handler function + * + * This call allocates interrupt resources and enables the + * interrupt line and IRQ handling. From the point this + * call is made your handler function may be invoked. Since + * your handler function must clear any interrupt the board + * raises, you must take care both to initialise your hardware + * and to set up the interrupt handler in the right order. + * + * Dev_id must be globally unique. Normally the address of the + * device data structure is used as the cookie. Since the handler + * receives this value it makes sense to use it. + * + * If your interrupt is shared you must pass a non NULL dev_id + * as this is required when freeing the interrupt. + * + * Flags: + * + * SA_SHIRQ Interrupt is shared + * + * SA_INTERRUPT Disable local interrupts while processing + * + * SA_SAMPLE_RANDOM The interrupt can be used for entropy + * + */ + +int request_irq(unsigned int irq, + void (*handler)(int, void *, struct pt_regs *), + unsigned long irqflags, + const char * devname, + void *dev_id) +{ + int retval; + struct irqaction * action; + +#if 1 + /* + * Sanity-check: shared interrupts should REALLY pass in + * a real dev-ID, otherwise we'll have trouble later trying + * to figure out which interrupt is which (messes up the + * interrupt freeing logic etc). + */ + if (irqflags & SA_SHIRQ) { + if (!dev_id) + printk("Bad boy: %s (at 0x%x) called us without a dev_id!\n", devname, (&irq)[-1]); + } +#endif + + if (irq >= NR_IRQS) + return -EINVAL; + if (!handler) + return -EINVAL; + + action = (struct irqaction *) + kmalloc(sizeof(struct irqaction), GFP_KERNEL); + if (!action) + return -ENOMEM; + + action->handler = handler; + action->flags = irqflags; + action->mask = 0; + action->name = devname; + action->next = NULL; + action->dev_id = dev_id; + + retval = setup_irq(irq, action); + if (retval) + kfree(action); + return retval; +} + +/** + * free_irq - free an interrupt + * @irq: Interrupt line to free + * @dev_id: Device identity to free + * + * Remove an interrupt handler. The handler is removed and if the + * interrupt line is no longer in use by any driver it is disabled. + * On a shared IRQ the caller must ensure the interrupt is disabled + * on the card it drives before calling this function. The function + * does not return until any executing interrupts for this IRQ + * have completed. + * + * This function may be called from interrupt context. + * + * Bugs: Attempting to free an irq in a handler for the same irq hangs + * the machine. + */ + +void free_irq(unsigned int irq, void *dev_id) +{ + irq_desc_t *desc; + struct irqaction **p; + unsigned long flags; + + if (irq >= NR_IRQS) + return; + + desc = irq_desc + irq; + spin_lock_irqsave(&desc->lock,flags); + p = &desc->action; + for (;;) { + struct irqaction * action = *p; + if (action) { + struct irqaction **pp = p; + p = &action->next; + if (action->dev_id != dev_id) + continue; + + /* Found it - now remove it from the list of entries */ + *pp = action->next; + if (!desc->action) { + desc->status |= IRQ_DISABLED; + desc->handler->shutdown(irq); + } + spin_unlock_irqrestore(&desc->lock,flags); + +#ifdef CONFIG_SMP + /* Wait to make sure it's not being used on another CPU */ + while (desc->status & IRQ_INPROGRESS) { + barrier(); + cpu_relax(); + } +#endif + kfree(action); + return; + } + printk("Trying to free free IRQ%d\n",irq); + spin_unlock_irqrestore(&desc->lock,flags); + return; + } +} + +/* + * IRQ autodetection code.. + * + * This depends on the fact that any interrupt that + * comes in on to an unassigned handler will get stuck + * with "IRQ_WAITING" cleared and the interrupt + * disabled. + */ + +static DECLARE_MUTEX(probe_sem); + +/** + * probe_irq_on - begin an interrupt autodetect + * + * Commence probing for an interrupt. The interrupts are scanned + * and a mask of potential interrupt lines is returned. + * + */ + +unsigned long probe_irq_on(void) +{ + unsigned int i; + irq_desc_t *desc; + unsigned long val; + unsigned long delay; + + down(&probe_sem); + /* + * something may have generated an irq long ago and we want to + * flush such a longstanding irq before considering it as spurious. + */ + for (i = NR_IRQS-1; i > 0; i--) { + desc = irq_desc + i; + + spin_lock_irq(&desc->lock); + if (!irq_desc[i].action) + irq_desc[i].handler->startup(i); + spin_unlock_irq(&desc->lock); + } + + /* Wait for longstanding interrupts to trigger. */ + for (delay = jiffies + HZ/50; time_after(delay, jiffies); ) + /* about 20ms delay */ synchronize_irq(); + + /* + * enable any unassigned irqs + * (we must startup again here because if a longstanding irq + * happened in the previous stage, it may have masked itself) + */ + for (i = NR_IRQS-1; i > 0; i--) { + desc = irq_desc + i; + + spin_lock_irq(&desc->lock); + if (!desc->action) { + desc->status |= IRQ_AUTODETECT | IRQ_WAITING; + if (desc->handler->startup(i)) + desc->status |= IRQ_PENDING; + } + spin_unlock_irq(&desc->lock); + } + + /* + * Wait for spurious interrupts to trigger + */ + for (delay = jiffies + HZ/10; time_after(delay, jiffies); ) + /* about 100ms delay */ synchronize_irq(); + + /* + * Now filter out any obviously spurious interrupts + */ + val = 0; + for (i = 0; i < NR_IRQS; i++) { + irq_desc_t *desc = irq_desc + i; + unsigned int status; + + spin_lock_irq(&desc->lock); + status = desc->status; + + if (status & IRQ_AUTODETECT) { + /* It triggered already - consider it spurious. */ + if (!(status & IRQ_WAITING)) { + desc->status = status & ~IRQ_AUTODETECT; + desc->handler->shutdown(i); + } else + if (i < 32) + val |= 1 << i; + } + spin_unlock_irq(&desc->lock); + } + + return val; +} + +/* + * Return a mask of triggered interrupts (this + * can handle only legacy ISA interrupts). + */ + +/** + * probe_irq_mask - scan a bitmap of interrupt lines + * @val: mask of interrupts to consider + * + * Scan the ISA bus interrupt lines and return a bitmap of + * active interrupts. The interrupt probe logic state is then + * returned to its previous value. + * + * Note: we need to scan all the irq's even though we will + * only return ISA irq numbers - just so that we reset them + * all to a known state. + */ +unsigned int probe_irq_mask(unsigned long val) +{ + int i; + unsigned int mask; + + mask = 0; + for (i = 0; i < NR_IRQS; i++) { + irq_desc_t *desc = irq_desc + i; + unsigned int status; + + spin_lock_irq(&desc->lock); + status = desc->status; + + if (status & IRQ_AUTODETECT) { + if (i < 16 && !(status & IRQ_WAITING)) + mask |= 1 << i; + + desc->status = status & ~IRQ_AUTODETECT; + desc->handler->shutdown(i); + } + spin_unlock_irq(&desc->lock); + } + up(&probe_sem); + + return mask & val; +} + +/* + * Return the one interrupt that triggered (this can + * handle any interrupt source). + */ + +/** + * probe_irq_off - end an interrupt autodetect + * @val: mask of potential interrupts (unused) + * + * Scans the unused interrupt lines and returns the line which + * appears to have triggered the interrupt. If no interrupt was + * found then zero is returned. If more than one interrupt is + * found then minus the first candidate is returned to indicate + * their is doubt. + * + * The interrupt probe logic state is returned to its previous + * value. + * + * BUGS: When used in a module (which arguably shouldnt happen) + * nothing prevents two IRQ probe callers from overlapping. The + * results of this are non-optimal. + */ + +int probe_irq_off(unsigned long val) +{ + int i, irq_found, nr_irqs; + + nr_irqs = 0; + irq_found = 0; + for (i = 0; i < NR_IRQS; i++) { + irq_desc_t *desc = irq_desc + i; + unsigned int status; + + spin_lock_irq(&desc->lock); + status = desc->status; + + if (status & IRQ_AUTODETECT) { + if (!(status & IRQ_WAITING)) { + if (!nr_irqs) + irq_found = i; + nr_irqs++; + } + desc->status = status & ~IRQ_AUTODETECT; + desc->handler->shutdown(i); + } + spin_unlock_irq(&desc->lock); + } + up(&probe_sem); + + if (nr_irqs > 1) + irq_found = -irq_found; + return irq_found; +} + +/* this was setup_x86_irq but it seems pretty generic */ +int setup_irq(unsigned int irq, struct irqaction * new) +{ + int shared = 0; + unsigned long flags; + struct irqaction *old, **p; + irq_desc_t *desc = irq_desc + irq; + + /* + * Some drivers like serial.c use request_irq() heavily, + * so we have to be careful not to interfere with a + * running system. + */ + if (new->flags & SA_SAMPLE_RANDOM) { + /* + * This function might sleep, we want to call it first, + * outside of the atomic block. + * Yes, this might clear the entropy pool if the wrong + * driver is attempted to be loaded, without actually + * installing a new handler, but is this really a problem, + * only the sysadmin is able to do this. + */ + rand_initialize_irq(irq); + } + + /* + * The following block of code has to be executed atomically + */ + spin_lock_irqsave(&desc->lock,flags); + p = &desc->action; + if ((old = *p) != NULL) { + /* Can't share interrupts unless both agree to */ + if (!(old->flags & new->flags & SA_SHIRQ)) { + spin_unlock_irqrestore(&desc->lock,flags); + return -EBUSY; + } + + /* add new interrupt at end of irq queue */ + do { + p = &old->next; + old = *p; + } while (old); + shared = 1; + } + + *p = new; + + if (!shared) { + desc->depth = 0; + desc->status &= ~(IRQ_DISABLED | IRQ_AUTODETECT | IRQ_WAITING); + desc->handler->startup(irq); + } + spin_unlock_irqrestore(&desc->lock,flags); + + register_irq_proc(irq); + return 0; +} + +static struct proc_dir_entry * root_irq_dir; +static struct proc_dir_entry * irq_dir [NR_IRQS]; + +#define HEX_DIGITS 8 + +static unsigned int parse_hex_value (const char *buffer, + unsigned long count, unsigned long *ret) +{ + unsigned char hexnum [HEX_DIGITS]; + unsigned long value; + int i; + + if (!count) + return -EINVAL; + if (count > HEX_DIGITS) + count = HEX_DIGITS; + if (copy_from_user(hexnum, buffer, count)) + return -EFAULT; + + /* + * Parse the first 8 characters as a hex string, any non-hex char + * is end-of-string. '00e1', 'e1', '00E1', 'E1' are all the same. + */ + value = 0; + + for (i = 0; i < count; i++) { + unsigned int c = hexnum[i]; + + switch (c) { + case '0' ... '9': c -= '0'; break; + case 'a' ... 'f': c -= 'a'-10; break; + case 'A' ... 'F': c -= 'A'-10; break; + default: + goto out; + } + value = (value << 4) | c; + } +out: + *ret = value; + return 0; +} + +#if CONFIG_SMP + +static struct proc_dir_entry * smp_affinity_entry [NR_IRQS]; + +static unsigned long irq_affinity [NR_IRQS] = { [0 ... NR_IRQS-1] = ~0UL }; +static int irq_affinity_read_proc (char *page, char **start, off_t off, + int count, int *eof, void *data) +{ + if (count < HEX_DIGITS+1) + return -EINVAL; + return sprintf (page, "%08lx\n", irq_affinity[(long)data]); +} + +static int irq_affinity_write_proc (struct file *file, const char *buffer, + unsigned long count, void *data) +{ + int irq = (long) data, full_count = count, err; + unsigned long new_value; + + if (!irq_desc[irq].handler->set_affinity) + return -EIO; + + err = parse_hex_value(buffer, count, &new_value); + + /* + * Do not allow disabling IRQs completely - it's a too easy + * way to make the system unusable accidentally :-) At least + * one online CPU still has to be targeted. + */ + if (!(new_value & cpu_online_map)) + return -EINVAL; + + irq_affinity[irq] = new_value; + irq_desc[irq].handler->set_affinity(irq, new_value); + + return full_count; +} + +#endif + +static int prof_cpu_mask_read_proc (char *page, char **start, off_t off, + int count, int *eof, void *data) +{ + unsigned long *mask = (unsigned long *) data; + if (count < HEX_DIGITS+1) + return -EINVAL; + return sprintf (page, "%08lx\n", *mask); +} + +static int prof_cpu_mask_write_proc (struct file *file, const char *buffer, + unsigned long count, void *data) +{ + unsigned long *mask = (unsigned long *) data, full_count = count, err; + unsigned long new_value; + + err = parse_hex_value(buffer, count, &new_value); + if (err) + return err; + + *mask = new_value; + return full_count; +} + +#define MAX_NAMELEN 10 + +static void register_irq_proc (unsigned int irq) +{ + char name [MAX_NAMELEN]; + + if (!root_irq_dir || (irq_desc[irq].handler == &no_irq_type) || + irq_dir[irq]) + return; + + memset(name, 0, MAX_NAMELEN); + sprintf(name, "%d", irq); + + /* create /proc/irq/1234 */ + irq_dir[irq] = proc_mkdir(name, root_irq_dir); + +#if CONFIG_SMP + { + struct proc_dir_entry *entry; + + /* create /proc/irq/1234/smp_affinity */ + entry = create_proc_entry("smp_affinity", 0600, irq_dir[irq]); + + if (entry) { + entry->nlink = 1; + entry->data = (void *)(long)irq; + entry->read_proc = irq_affinity_read_proc; + entry->write_proc = irq_affinity_write_proc; + } + + smp_affinity_entry[irq] = entry; + } +#endif +} + +unsigned long prof_cpu_mask = -1; + +void init_irq_proc (void) +{ + struct proc_dir_entry *entry; + int i; + + /* create /proc/irq */ + root_irq_dir = proc_mkdir("irq", 0); + + /* create /proc/irq/prof_cpu_mask */ + entry = create_proc_entry("prof_cpu_mask", 0600, root_irq_dir); + + if (!entry) + return; + + entry->nlink = 1; + entry->data = (void *)&prof_cpu_mask; + entry->read_proc = prof_cpu_mask_read_proc; + entry->write_proc = prof_cpu_mask_write_proc; + + /* + * Create entries for all existing IRQs. + */ + for (i = 0; i < NR_IRQS; i++) + register_irq_proc(i); +} + diff --git a/xenolinux-2.4.22-sparse/arch/xeno/kernel/ldt.c b/xenolinux-2.4.22-sparse/arch/xeno/kernel/ldt.c new file mode 100644 index 0000000000..ca89b694bd --- /dev/null +++ b/xenolinux-2.4.22-sparse/arch/xeno/kernel/ldt.c @@ -0,0 +1,169 @@ +/* + * linux/kernel/ldt.c + * + * Copyright (C) 1992 Krishna Balasubramanian and Linus Torvalds + * Copyright (C) 1999 Ingo Molnar <mingo@redhat.com> + */ + +#include <linux/errno.h> +#include <linux/sched.h> +#include <linux/string.h> +#include <linux/mm.h> +#include <linux/smp.h> +#include <linux/smp_lock.h> +#include <linux/vmalloc.h> + +#include <asm/uaccess.h> +#include <asm/system.h> +#include <asm/ldt.h> +#include <asm/desc.h> + +/* + * read_ldt() is not really atomic - this is not a problem since + * synchronization of reads and writes done to the LDT has to be + * assured by user-space anyway. Writes are atomic, to protect + * the security checks done on new descriptors. + */ +static int read_ldt(void * ptr, unsigned long bytecount) +{ + int err; + unsigned long size; + struct mm_struct * mm = current->mm; + + err = 0; + if (!mm->context.segments) + goto out; + + size = LDT_ENTRIES*LDT_ENTRY_SIZE; + if (size > bytecount) + size = bytecount; + + err = size; + if (copy_to_user(ptr, mm->context.segments, size)) + err = -EFAULT; + out: + return err; +} + +static int read_default_ldt(void * ptr, unsigned long bytecount) +{ + int err; + unsigned long size; + void *address; + + err = 0; + address = &default_ldt[0]; + size = sizeof(struct desc_struct); + if (size > bytecount) + size = bytecount; + + err = size; + if (copy_to_user(ptr, address, size)) + err = -EFAULT; + + return err; +} + +static int write_ldt(void * ptr, unsigned long bytecount, int oldmode) +{ + struct mm_struct * mm = current->mm; + __u32 entry_1, entry_2, *lp; + unsigned long phys_lp; + int error; + struct modify_ldt_ldt_s ldt_info; + + error = -EINVAL; + if (bytecount != sizeof(ldt_info)) + goto out; + error = -EFAULT; + if (copy_from_user(&ldt_info, ptr, sizeof(ldt_info))) + goto out; + + error = -EINVAL; + if (ldt_info.entry_number >= LDT_ENTRIES) + goto out; + if (ldt_info.contents == 3) { + if (oldmode) + goto out; + if (ldt_info.seg_not_present == 0) + goto out; + } + + down_write(&mm->mmap_sem); + if (!mm->context.segments) { + void * segments = vmalloc(LDT_ENTRIES*LDT_ENTRY_SIZE); + error = -ENOMEM; + if (!segments) + goto out_unlock; + memset(segments, 0, LDT_ENTRIES*LDT_ENTRY_SIZE); + make_pages_readonly(segments, (LDT_ENTRIES*LDT_ENTRY_SIZE)/PAGE_SIZE); + wmb(); + mm->context.segments = segments; + mm->context.cpuvalid = 1UL << smp_processor_id(); + load_LDT(mm); + flush_page_update_queue(); + } + + lp = (__u32 *)((ldt_info.entry_number<<3) + (char *)mm->context.segments); + phys_lp = arbitrary_virt_to_phys(lp); + + /* Allow LDTs to be cleared by the user. */ + if (ldt_info.base_addr == 0 && ldt_info.limit == 0) { + if (oldmode || + (ldt_info.contents == 0 && + ldt_info.read_exec_only == 1 && + ldt_info.seg_32bit == 0 && + ldt_info.limit_in_pages == 0 && + ldt_info.seg_not_present == 1 && + ldt_info.useable == 0 )) { + entry_1 = 0; + entry_2 = 0; + goto install; + } + } + + entry_1 = ((ldt_info.base_addr & 0x0000ffff) << 16) | + (ldt_info.limit & 0x0ffff); + entry_2 = (ldt_info.base_addr & 0xff000000) | + ((ldt_info.base_addr & 0x00ff0000) >> 16) | + (ldt_info.limit & 0xf0000) | + ((ldt_info.read_exec_only ^ 1) << 9) | + (ldt_info.contents << 10) | + ((ldt_info.seg_not_present ^ 1) << 15) | + (ldt_info.seg_32bit << 22) | + (ldt_info.limit_in_pages << 23) | + 0x7000; + if (!oldmode) + entry_2 |= (ldt_info.useable << 20); + + /* Install the new entry ... */ + install: + HYPERVISOR_update_descriptor(phys_lp, entry_1, entry_2); + error = 0; + + out_unlock: + up_write(&mm->mmap_sem); + out: + return error; +} + +asmlinkage int sys_modify_ldt(int func, void *ptr, unsigned long bytecount) +{ + int ret = -ENOSYS; + + switch (func) { + case 0: + ret = read_ldt(ptr, bytecount); + break; + case 1: + ret = write_ldt(ptr, bytecount, 1); + break; + case 2: + ret = read_default_ldt(ptr, bytecount); + break; + case 0x11: + ret = write_ldt(ptr, bytecount, 0); + break; + } + return ret; +} diff --git a/xenolinux-2.4.22-sparse/arch/xeno/kernel/process.c b/xenolinux-2.4.22-sparse/arch/xeno/kernel/process.c new file mode 100644 index 0000000000..3b17c7326c --- /dev/null +++ b/xenolinux-2.4.22-sparse/arch/xeno/kernel/process.c @@ -0,0 +1,489 @@ +/* + * linux/arch/i386/kernel/process.c + * + * Copyright (C) 1995 Linus Torvalds + * + * Pentium III FXSR, SSE support + * Gareth Hughes <gareth@valinux.com>, May 2000 + */ + +/* + * This file handles the architecture-dependent parts of process handling.. + */ + +#define __KERNEL_SYSCALLS__ +#include <stdarg.h> + +#include <linux/errno.h> +#include <linux/sched.h> +#include <linux/kernel.h> +#include <linux/mm.h> +#include <linux/smp.h> +#include <linux/smp_lock.h> +#include <linux/stddef.h> +#include <linux/unistd.h> +#include <linux/ptrace.h> +#include <linux/slab.h> +#include <linux/vmalloc.h> +#include <linux/user.h> +#include <linux/a.out.h> +#include <linux/interrupt.h> +#include <linux/config.h> +#include <linux/delay.h> +#include <linux/reboot.h> +#include <linux/init.h> +#include <linux/mc146818rtc.h> + +#include <asm/uaccess.h> +#include <asm/pgtable.h> +#include <asm/system.h> +#include <asm/io.h> +#include <asm/ldt.h> +#include <asm/processor.h> +#include <asm/i387.h> +#include <asm/desc.h> +#include <asm/mmu_context.h> +#include <asm/multicall.h> +#include <asm/hypervisor-ifs/dom0_ops.h> + +#include <linux/irq.h> + +asmlinkage void ret_from_fork(void) __asm__("ret_from_fork"); + +int hlt_counter; + +/* + * Powermanagement idle function, if any.. + */ +void (*pm_idle)(void); + +/* + * Power off function, if any + */ +void (*pm_power_off)(void); + +void disable_hlt(void) +{ + hlt_counter++; +} + +void enable_hlt(void) +{ + hlt_counter--; +} + +/* + * The idle thread. There's no useful work to be + * done, so just try to conserve power and have a + * low exit latency (ie sit in a loop waiting for + * somebody to say that they'd like to reschedule) + */ +void cpu_idle (void) +{ + /* endless idle loop with no priority at all */ + init_idle(); + current->nice = 20; + current->counter = -100; + + while (1) { + while (!current->need_resched) + HYPERVISOR_yield(); + schedule(); + check_pgt_cache(); + } +} + +void machine_restart(char * __unused) +{ + HYPERVISOR_exit(); +} + +void machine_halt(void) +{ + HYPERVISOR_exit(); +} + +void machine_power_off(void) +{ + HYPERVISOR_exit(); +} + +extern void show_trace(unsigned long* esp); + +void show_regs(struct pt_regs * regs) +{ + printk("\n"); + printk("Pid: %d, comm: %20s\n", current->pid, current->comm); + printk("EIP: %04x:[<%08lx>] CPU: %d",0xffff & regs->xcs,regs->eip, smp_processor_id()); + if (regs->xcs & 2) + printk(" ESP: %04x:%08lx",0xffff & regs->xss,regs->esp); + printk(" EFLAGS: %08lx %s\n",regs->eflags, print_tainted()); + printk("EAX: %08lx EBX: %08lx ECX: %08lx EDX: %08lx\n", + regs->eax,regs->ebx,regs->ecx,regs->edx); + printk("ESI: %08lx EDI: %08lx EBP: %08lx", + regs->esi, regs->edi, regs->ebp); + printk(" DS: %04x ES: %04x\n", + 0xffff & regs->xds,0xffff & regs->xes); + + show_trace(®s->esp); +} + +/* + * No need to lock the MM as we are the last user + */ +void release_segments(struct mm_struct *mm) +{ + void * ldt = mm->context.segments; + + /* + * free the LDT + */ + if (ldt) { + mm->context.segments = NULL; + clear_LDT(); + make_pages_writeable(ldt, (LDT_ENTRIES*LDT_ENTRY_SIZE)/PAGE_SIZE); + flush_page_update_queue(); + vfree(ldt); + } +} + +/* + * Create a kernel thread + */ +int arch_kernel_thread(int (*fn)(void *), void * arg, unsigned long flags) +{ + long retval, d0; + + __asm__ __volatile__( + "movl %%esp,%%esi\n\t" + "int $0x80\n\t" /* Linux/i386 system call */ + "cmpl %%esp,%%esi\n\t" /* child or parent? */ + "je 1f\n\t" /* parent - jump */ + /* Load the argument into eax, and push it. That way, it does + * not matter whether the called function is compiled with + * -mregparm or not. */ + "movl %4,%%eax\n\t" + "pushl %%eax\n\t" + "call *%5\n\t" /* call fn */ + "movl %3,%0\n\t" /* exit */ + "int $0x80\n" + "1:\t" + :"=&a" (retval), "=&S" (d0) + :"0" (__NR_clone), "i" (__NR_exit), + "r" (arg), "r" (fn), + "b" (flags | CLONE_VM) + : "memory"); + + return retval; +} + +/* + * Free current thread data structures etc.. + */ +void exit_thread(void) +{ + /* nothing to do ... */ +} + +void flush_thread(void) +{ + struct task_struct *tsk = current; + + memset(tsk->thread.debugreg, 0, sizeof(unsigned long)*8); + + /* + * Forget coprocessor state.. + */ + clear_fpu(tsk); + tsk->used_math = 0; +} + +void release_thread(struct task_struct *dead_task) +{ + if (dead_task->mm) { + void * ldt = dead_task->mm->context.segments; + + // temporary debugging check + if (ldt) { + printk("WARNING: dead process %8s still has LDT? <%p>\n", + dead_task->comm, ldt); + BUG(); + } + } +} + +/* + * we do not have to muck with descriptors here, that is + * done in switch_mm() as needed. + */ +void copy_segments(struct task_struct *p, struct mm_struct *new_mm) +{ + struct mm_struct * old_mm; + void *old_ldt, *ldt; + + ldt = NULL; + old_mm = current->mm; + if (old_mm && (old_ldt = old_mm->context.segments) != NULL) { + /* + * Completely new LDT, we initialize it from the parent: + */ + ldt = vmalloc(LDT_ENTRIES*LDT_ENTRY_SIZE); + if ( ldt == NULL ) + { + printk(KERN_WARNING "ldt allocation failed\n"); + } + else + { + memcpy(ldt, old_ldt, LDT_ENTRIES*LDT_ENTRY_SIZE); + make_pages_readonly(ldt, (LDT_ENTRIES*LDT_ENTRY_SIZE)/PAGE_SIZE); + } + } + new_mm->context.segments = ldt; + new_mm->context.cpuvalid = ~0UL; /* valid on all CPU's - they can't have stale data */ +} + +/* + * Save a segment. + */ +#define savesegment(seg,value) \ + asm volatile("movl %%" #seg ",%0":"=m" (*(int *)&(value))) + +int copy_thread(int nr, unsigned long clone_flags, unsigned long esp, + unsigned long unused, + struct task_struct * p, struct pt_regs * regs) +{ + struct pt_regs * childregs; + unsigned long eflags; + + childregs = ((struct pt_regs *) (THREAD_SIZE + (unsigned long) p)) - 1; + struct_cpy(childregs, regs); + childregs->eax = 0; + childregs->esp = esp; + + p->thread.esp = (unsigned long) childregs; + p->thread.esp0 = (unsigned long) (childregs+1); + + p->thread.eip = (unsigned long) ret_from_fork; + + savesegment(fs,p->thread.fs); + savesegment(gs,p->thread.gs); + + unlazy_fpu(current); + struct_cpy(&p->thread.i387, ¤t->thread.i387); + + + __asm__ __volatile__ ( "pushfl; popl %0" : "=r" (eflags) : ); + p->thread.io_pl = (eflags >> 12) & 3; + + return 0; +} + +/* + * fill in the user structure for a core dump.. + */ +void dump_thread(struct pt_regs * regs, struct user * dump) +{ + int i; + +/* changed the size calculations - should hopefully work better. lbt */ + dump->magic = CMAGIC; + dump->start_code = 0; + dump->start_stack = regs->esp & ~(PAGE_SIZE - 1); + dump->u_tsize = ((unsigned long) current->mm->end_code) >> PAGE_SHIFT; + dump->u_dsize = ((unsigned long) (current->mm->brk + (PAGE_SIZE-1))) >> PAGE_SHIFT; + dump->u_dsize -= dump->u_tsize; + dump->u_ssize = 0; + for (i = 0; i < 8; i++) + dump->u_debugreg[i] = current->thread.debugreg[i]; + + if (dump->start_stack < TASK_SIZE) + dump->u_ssize = ((unsigned long) (TASK_SIZE - dump->start_stack)) >> PAGE_SHIFT; + + dump->regs.ebx = regs->ebx; + dump->regs.ecx = regs->ecx; + dump->regs.edx = regs->edx; + dump->regs.esi = regs->esi; + dump->regs.edi = regs->edi; + dump->regs.ebp = regs->ebp; + dump->regs.eax = regs->eax; + dump->regs.ds = regs->xds; + dump->regs.es = regs->xes; + savesegment(fs,dump->regs.fs); + savesegment(gs,dump->regs.gs); + dump->regs.orig_eax = regs->orig_eax; + dump->regs.eip = regs->eip; + dump->regs.cs = regs->xcs; + dump->regs.eflags = regs->eflags; + dump->regs.esp = regs->esp; + dump->regs.ss = regs->xss; + + dump->u_fpvalid = dump_fpu (regs, &dump->i387); +} + +/* + * switch_to(x,yn) should switch tasks from x to y. + * + * We fsave/fwait so that an exception goes off at the right time + * (as a call from the fsave or fwait in effect) rather than to + * the wrong process. Lazy FP saving no longer makes any sense + * with modern CPU's, and this simplifies a lot of things (SMP + * and UP become the same). + * + * NOTE! We used to use the x86 hardware context switching. The + * reason for not using it any more becomes apparent when you + * try to recover gracefully from saved state that is no longer + * valid (stale segment register values in particular). With the + * hardware task-switch, there is no way to fix up bad state in + * a reasonable manner. + * + * The fact that Intel documents the hardware task-switching to + * be slow is a fairly red herring - this code is not noticeably + * faster. However, there _is_ some room for improvement here, + * so the performance issues may eventually be a valid point. + * More important, however, is the fact that this allows us much + * more flexibility. + */ +void __switch_to(struct task_struct *prev_p, struct task_struct *next_p) +{ + struct thread_struct *next = &next_p->thread; + + __cli(); + + MULTICALL_flush_page_update_queue(); + + /* + * This is basically 'unlazy_fpu', except that we queue a multicall to + * indicate FPU task switch, rather than synchronously trapping to Xen. + */ + if ( prev_p->flags & PF_USEDFPU ) + { + if ( cpu_has_fxsr ) + asm volatile( "fxsave %0 ; fnclex" + : "=m" (prev_p->thread.i387.fxsave) ); + else + asm volatile( "fnsave %0 ; fwait" + : "=m" (prev_p->thread.i387.fsave) ); + prev_p->flags &= ~PF_USEDFPU; + queue_multicall0(__HYPERVISOR_fpu_taskswitch); + } + + queue_multicall2(__HYPERVISOR_stack_switch, __KERNEL_DS, next->esp0); + if ( start_info.flags & SIF_PRIVILEGED ) + { + dom0_op_t op; + op.cmd = DOM0_IOPL; + op.u.iopl.domain = start_info.dom_id; + op.u.iopl.iopl = next->io_pl; + queue_multicall1(__HYPERVISOR_dom0_op, (unsigned long)&op); + } + + /* EXECUTE ALL TASK SWITCH XEN SYSCALLS AT THIS POINT. */ + execute_multicall_list(); + __sti(); + + /* + * Restore %fs and %gs. + */ + loadsegment(fs, next->fs); + loadsegment(gs, next->gs); + + /* + * Now maybe reload the debug registers + */ + if ( next->debugreg[7] != 0 ) + { + HYPERVISOR_set_debugreg(0, next->debugreg[0]); + HYPERVISOR_set_debugreg(1, next->debugreg[1]); + HYPERVISOR_set_debugreg(2, next->debugreg[2]); + HYPERVISOR_set_debugreg(3, next->debugreg[3]); + /* no 4 and 5 */ + HYPERVISOR_set_debugreg(6, next->debugreg[6]); + HYPERVISOR_set_debugreg(7, next->debugreg[7]); + } +} + +asmlinkage int sys_fork(struct pt_regs regs) +{ + return do_fork(SIGCHLD, regs.esp, ®s, 0); +} + +asmlinkage int sys_clone(struct pt_regs regs) +{ + unsigned long clone_flags; + unsigned long newsp; + + clone_flags = regs.ebx; + newsp = regs.ecx; + if (!newsp) + newsp = regs.esp; + return do_fork(clone_flags, newsp, ®s, 0); +} + +/* + * This is trivial, and on the face of it looks like it + * could equally well be done in user mode. + * + * Not so, for quite unobvious reasons - register pressure. + * In user mode vfork() cannot have a stack frame, and if + * done by calling the "clone()" system call directly, you + * do not have enough call-clobbered registers to hold all + * the information you need. + */ +asmlinkage int sys_vfork(struct pt_regs regs) +{ + return do_fork(CLONE_VFORK | CLONE_VM | SIGCHLD, regs.esp, ®s, 0); +} + +/* + * sys_execve() executes a new program. + */ +asmlinkage int sys_execve(struct pt_regs regs) +{ + int error; + char * filename; + + filename = getname((char *) regs.ebx); + error = PTR_ERR(filename); + if (IS_ERR(filename)) + goto out; + error = do_execve(filename, (char **) regs.ecx, (char **) regs.edx, ®s); + if (error == 0) + current->ptrace &= ~PT_DTRACE; + putname(filename); + out: + return error; +} + +/* + * These bracket the sleeping functions.. + */ +extern void scheduling_functions_start_here(void); +extern void scheduling_functions_end_here(void); +#define first_sched ((unsigned long) scheduling_functions_start_here) +#define last_sched ((unsigned long) scheduling_functions_end_here) + +unsigned long get_wchan(struct task_struct *p) +{ + unsigned long ebp, esp, eip; + unsigned long stack_page; + int count = 0; + if (!p || p == current || p->state == TASK_RUNNING) + return 0; + stack_page = (unsigned long)p; + esp = p->thread.esp; + if (!stack_page || esp < stack_page || esp > 8188+stack_page) + return 0; + /* include/asm-i386/system.h:switch_to() pushes ebp last. */ + ebp = *(unsigned long *) esp; + do { + if (ebp < stack_page || ebp > 8184+stack_page) + return 0; + eip = *(unsigned long *) (ebp+4); + if (eip < first_sched || eip >= last_sched) + return eip; + ebp = *(unsigned long *) ebp; + } while (count++ < 16); + return 0; +} +#undef last_sched +#undef first_sched diff --git a/xenolinux-2.4.22-sparse/arch/xeno/kernel/setup.c b/xenolinux-2.4.22-sparse/arch/xeno/kernel/setup.c new file mode 100644 index 0000000000..b2532d7b58 --- /dev/null +++ b/xenolinux-2.4.22-sparse/arch/xeno/kernel/setup.c @@ -0,0 +1,1046 @@ +/* + * linux/arch/i386/kernel/setup.c + * + * Copyright (C) 1995 Linus Torvalds + */ + +/* + * This file handles the architecture-dependent parts of initialization + */ + +#include <linux/errno.h> +#include <linux/sched.h> +#include <linux/kernel.h> +#include <linux/mm.h> +#include <linux/stddef.h> +#include <linux/unistd.h> +#include <linux/ptrace.h> +#include <linux/slab.h> +#include <linux/user.h> +#include <linux/a.out.h> +#include <linux/tty.h> +#include <linux/ioport.h> +#include <linux/delay.h> +#include <linux/config.h> +#include <linux/init.h> +#include <linux/apm_bios.h> +#ifdef CONFIG_BLK_DEV_RAM +#include <linux/blk.h> +#endif +#include <linux/highmem.h> +#include <linux/bootmem.h> +#include <linux/seq_file.h> +#include <asm/processor.h> +#include <linux/console.h> +#include <linux/module.h> +#include <asm/mtrr.h> +#include <asm/uaccess.h> +#include <asm/system.h> +#include <asm/io.h> +#include <asm/smp.h> +#include <asm/msr.h> +#include <asm/desc.h> +#include <asm/dma.h> +#include <asm/mpspec.h> +#include <asm/mmu_context.h> +#include <asm/hypervisor.h> +#include <asm/hypervisor-ifs/dom0_ops.h> + +shared_info_t *HYPERVISOR_shared_info; + +unsigned long *phys_to_machine_mapping; + +/* + * Machine setup.. + */ + +char ignore_irq13; /* set if exception 16 works */ +struct cpuinfo_x86 boot_cpu_data = { 0, 0, 0, 0, -1, 1, 0, 0, -1 }; + +unsigned long mmu_cr4_features; +//EXPORT_SYMBOL(mmu_cr4_features); + +unsigned char * vgacon_mmap; + +/* + * Bus types .. + */ +#ifdef CONFIG_EISA +int EISA_bus; +#endif +int MCA_bus; + +/* for MCA, but anyone else can use it if they want */ +unsigned int machine_id; +unsigned int machine_submodel_id; +unsigned int BIOS_revision; +unsigned int mca_pentium_flag; + +/* For PCI or other memory-mapped resources */ +unsigned long pci_mem_start = 0x10000000; + +/* + * Setup options + */ +struct drive_info_struct { char dummy[32]; } drive_info; +struct screen_info screen_info; +struct apm_info apm_info; +struct sys_desc_table_struct { + unsigned short length; + unsigned char table[0]; +}; + +unsigned char aux_device_present; + +extern int root_mountflags; +extern char _text, _etext, _edata, _end; + +int enable_acpi_smp_table; + +/* Raw start-of-day parameters from the hypervisor. */ +union start_info_union start_info_union; + +#define COMMAND_LINE_SIZE 256 +static char command_line[COMMAND_LINE_SIZE]; +char saved_command_line[COMMAND_LINE_SIZE]; + +static void __init parse_mem_cmdline (char ** cmdline_p) +{ + char c = ' ', *to = command_line, *from = saved_command_line; + int len = 0; + + /* Save unparsed command line copy for /proc/cmdline */ + memcpy(saved_command_line, start_info.cmd_line, COMMAND_LINE_SIZE); + saved_command_line[COMMAND_LINE_SIZE-1] = '\0'; + + for (;;) { + /* + * "mem=nopentium" disables the 4MB page tables. + * "mem=XXX[kKmM]" defines a memory region from HIGH_MEM + * to <mem>, overriding the bios size. + * "mem=XXX[KkmM]@XXX[KkmM]" defines a memory region from + * <start> to <start>+<mem>, overriding the bios size. + */ + if (c == ' ' && !memcmp(from, "mem=", 4)) { + if (to != command_line) + to--; + if (!memcmp(from+4, "nopentium", 9)) { + from += 9+4; + } else if (!memcmp(from+4, "exactmap", 8)) { + from += 8+4; + } else { + (void)memparse(from+4, &from); + if (*from == '@') + (void)memparse(from+1, &from); + } + } + + c = *(from++); + if (!c) + break; + if (COMMAND_LINE_SIZE <= ++len) + break; + *(to++) = c; + } + *to = '\0'; + *cmdline_p = command_line; +} + +void __init setup_arch(char **cmdline_p) +{ + unsigned long start_pfn, max_pfn, max_low_pfn; + unsigned long bootmap_size; + unsigned long i; + + extern void hypervisor_callback(void); + extern void failsafe_callback(void); + + extern unsigned long cpu0_pte_quicklist[]; + extern unsigned long cpu0_pgd_quicklist[]; + + HYPERVISOR_set_callbacks( + __KERNEL_CS, (unsigned long)hypervisor_callback, + __KERNEL_CS, (unsigned long)failsafe_callback); + + boot_cpu_data.pgd_quick = cpu0_pgd_quicklist; + boot_cpu_data.pte_quick = cpu0_pte_quicklist; + + ROOT_DEV = MKDEV(RAMDISK_MAJOR,0); + memset(&drive_info, 0, sizeof(drive_info)); + memset(&screen_info, 0, sizeof(screen_info)); + + /* This is drawn from a dump from vgacon:startup in standard Linux. */ + screen_info.orig_video_mode = 3; + screen_info.orig_video_isVGA = 1; + screen_info.orig_video_lines = 25; + screen_info.orig_video_cols = 80; + screen_info.orig_video_ega_bx = 3; + screen_info.orig_video_points = 16; + + memset(&apm_info.bios, 0, sizeof(apm_info.bios)); + aux_device_present = 0; +#ifdef CONFIG_BLK_DEV_RAM + rd_image_start = 0; + rd_prompt = 0; + rd_doload = 0; +#endif + + root_mountflags &= ~MS_RDONLY; + init_mm.start_code = (unsigned long) &_text; + init_mm.end_code = (unsigned long) &_etext; + init_mm.end_data = (unsigned long) &_edata; + init_mm.brk = (unsigned long) &_end; + + parse_mem_cmdline(cmdline_p); + +#define PFN_UP(x) (((x) + PAGE_SIZE-1) >> PAGE_SHIFT) +#define PFN_DOWN(x) ((x) >> PAGE_SHIFT) +#define PFN_PHYS(x) ((x) << PAGE_SHIFT) + +/* + * 128MB for vmalloc and initrd + */ +#define VMALLOC_RESERVE (unsigned long)(128 << 20) +#define MAXMEM (unsigned long)(HYPERVISOR_VIRT_START-PAGE_OFFSET-VMALLOC_RESERVE) +#define MAXMEM_PFN PFN_DOWN(MAXMEM) +#define MAX_NONPAE_PFN (1 << 20) + + /* + * partially used pages are not usable - thus + * we are rounding upwards: + */ +#ifdef CONFIG_BLK_DEV_INITRD + if ( start_info.mod_start ) + start_pfn = PFN_UP(__pa(start_info.mod_start + start_info.mod_len)); + else +#endif + start_pfn = PFN_UP(__pa(&_end)); + max_pfn = start_info.nr_pages; + + /* + * Determine low and high memory ranges: + */ + max_low_pfn = max_pfn; + if (max_low_pfn > MAXMEM_PFN) { + max_low_pfn = MAXMEM_PFN; +#ifndef CONFIG_HIGHMEM + /* Maximum memory usable is what is directly addressable */ + printk(KERN_WARNING "Warning only %ldMB will be used.\n", + MAXMEM>>20); + if (max_pfn > MAX_NONPAE_PFN) + printk(KERN_WARNING "Use a PAE enabled kernel.\n"); + else + printk(KERN_WARNING "Use a HIGHMEM enabled kernel.\n"); +#else /* !CONFIG_HIGHMEM */ +#ifndef CONFIG_X86_PAE + if (max_pfn > MAX_NONPAE_PFN) { + max_pfn = MAX_NONPAE_PFN; + printk(KERN_WARNING "Warning only 4GB will be used.\n"); + printk(KERN_WARNING "Use a PAE enabled kernel.\n"); + } +#endif /* !CONFIG_X86_PAE */ +#endif /* !CONFIG_HIGHMEM */ + } + +#ifdef CONFIG_HIGHMEM + highstart_pfn = highend_pfn = max_pfn; + if (max_pfn > MAXMEM_PFN) { + highstart_pfn = MAXMEM_PFN; + printk(KERN_NOTICE "%ldMB HIGHMEM available.\n", + pages_to_mb(highend_pfn - highstart_pfn)); + } +#endif + + /* + * Initialize the boot-time allocator, and free up all RAM. + * Then reserve space for OS image, and the bootmem bitmap. + */ + bootmap_size = init_bootmem(start_pfn, max_low_pfn); + free_bootmem(0, PFN_PHYS(max_low_pfn)); + reserve_bootmem(0, PFN_PHYS(start_pfn) + bootmap_size + PAGE_SIZE-1); + + /* Now reserve space for the hypervisor-provided page tables. */ + { + unsigned long *pgd = (unsigned long *)start_info.pt_base; + unsigned long pte; + int i; + reserve_bootmem(__pa(pgd), PAGE_SIZE); + for ( i = 0; i < (HYPERVISOR_VIRT_START>>22); i++ ) + { + unsigned long pgde = *pgd++; + if ( !(pgde & 1) ) continue; + pte = machine_to_phys(pgde & PAGE_MASK); + reserve_bootmem(pte, PAGE_SIZE); + } + } + cur_pgd = init_mm.pgd = (pgd_t *)start_info.pt_base; + + /* Now initialise the physical->machine mapping table. */ + phys_to_machine_mapping = alloc_bootmem(max_pfn * sizeof(unsigned long)); + for ( i = 0; i < max_pfn; i++ ) + { + unsigned long pgde, *ppte; + unsigned long pfn = i + (PAGE_OFFSET >> PAGE_SHIFT); + pgde = *((unsigned long *)start_info.pt_base + (pfn >> 10)); + ppte = (unsigned long *)machine_to_phys(pgde & PAGE_MASK) + (pfn&1023); + phys_to_machine_mapping[i] = + (*(unsigned long *)__va(ppte)) >> PAGE_SHIFT; + } + +#ifdef CONFIG_BLK_DEV_INITRD + if (start_info.mod_start) { + if ((__pa(start_info.mod_start) + start_info.mod_len) <= + (max_low_pfn << PAGE_SHIFT)) { + initrd_start = start_info.mod_start; + initrd_end = initrd_start + start_info.mod_len; + initrd_below_start_ok = 1; + } + else { + printk(KERN_ERR "initrd extends beyond end of memory " + "(0x%08lx > 0x%08lx)\ndisabling initrd\n", + __pa(start_info.mod_start) + start_info.mod_len, + max_low_pfn << PAGE_SHIFT); + initrd_start = 0; + } + } +#endif + + paging_init(); + + /* We are privileged guest os - should have IO privileges. */ + if ( start_info.flags & SIF_PRIVILEGED ) + { + dom0_op_t op; + op.cmd = DOM0_IOPL; + op.u.iopl.domain = start_info.dom_id; + op.u.iopl.iopl = 1; + if( HYPERVISOR_dom0_op(&op) != 0 ) + panic("Unable to obtain IOPL, despite being SIF_PRIVILEGED"); + current->thread.io_pl = 1; + } + + if(start_info.flags & SIF_CONSOLE) + { + if( !(start_info.flags & SIF_PRIVILEGED) ) + panic("Xen granted us console access but not privileged status"); + +#if defined(CONFIG_VT) +#if defined(CONFIG_VGA_CONSOLE) + conswitchp = &vga_con; +#elif defined(CONFIG_DUMMY_CONSOLE) + conswitchp = &dummy_con; +#endif +#endif + } +} + +static int cachesize_override __initdata = -1; +static int __init cachesize_setup(char *str) +{ + get_option (&str, &cachesize_override); + return 1; +} +__setup("cachesize=", cachesize_setup); + + +static int __init get_model_name(struct cpuinfo_x86 *c) +{ + unsigned int *v; + char *p, *q; + + if (cpuid_eax(0x80000000) < 0x80000004) + return 0; + + v = (unsigned int *) c->x86_model_id; + cpuid(0x80000002, &v[0], &v[1], &v[2], &v[3]); + cpuid(0x80000003, &v[4], &v[5], &v[6], &v[7]); + cpuid(0x80000004, &v[8], &v[9], &v[10], &v[11]); + c->x86_model_id[48] = 0; + + /* Intel chips right-justify this string for some dumb reason; + undo that brain damage */ + p = q = &c->x86_model_id[0]; + while ( *p == ' ' ) + p++; + if ( p != q ) { + while ( *p ) + *q++ = *p++; + while ( q <= &c->x86_model_id[48] ) + *q++ = '\0'; /* Zero-pad the rest */ + } + + return 1; +} + + +static void __init display_cacheinfo(struct cpuinfo_x86 *c) +{ + unsigned int n, dummy, ecx, edx, l2size; + + n = cpuid_eax(0x80000000); + + if (n >= 0x80000005) { + cpuid(0x80000005, &dummy, &dummy, &ecx, &edx); + printk(KERN_INFO "CPU: L1 I Cache: %dK (%d bytes/line), D cache %dK (%d bytes/line)\n", + edx>>24, edx&0xFF, ecx>>24, ecx&0xFF); + c->x86_cache_size=(ecx>>24)+(edx>>24); + } + + if (n < 0x80000006) /* Some chips just has a large L1. */ + return; + + ecx = cpuid_ecx(0x80000006); + l2size = ecx >> 16; + + /* AMD errata T13 (order #21922) */ + if ((c->x86_vendor == X86_VENDOR_AMD) && (c->x86 == 6)) { + if (c->x86_model == 3 && c->x86_mask == 0) /* Duron Rev A0 */ + l2size = 64; + if (c->x86_model == 4 && + (c->x86_mask==0 || c->x86_mask==1)) /* Tbird rev A1/A2 */ + l2size = 256; + } + + /* Intel PIII Tualatin. This comes in two flavours. + * One has 256kb of cache, the other 512. We have no way + * to determine which, so we use a boottime override + * for the 512kb model, and assume 256 otherwise. + */ + if ((c->x86_vendor == X86_VENDOR_INTEL) && (c->x86 == 6) && + (c->x86_model == 11) && (l2size == 0)) + l2size = 256; + + if (c->x86_vendor == X86_VENDOR_CENTAUR) { + /* VIA C3 CPUs (670-68F) need further shifting. */ + if ((c->x86 == 6) && + ((c->x86_model == 7) || (c->x86_model == 8))) { + l2size >>= 8; + } + + /* VIA also screwed up Nehemiah stepping 1, and made + it return '65KB' instead of '64KB' + - Note, it seems this may only be in engineering samples. */ + if ((c->x86==6) && (c->x86_model==9) && + (c->x86_mask==1) && (l2size==65)) + l2size -= 1; + } + + /* Allow user to override all this if necessary. */ + if (cachesize_override != -1) + l2size = cachesize_override; + + if ( l2size == 0 ) + return; /* Again, no L2 cache is possible */ + + c->x86_cache_size = l2size; + + printk(KERN_INFO "CPU: L2 Cache: %dK (%d bytes/line)\n", + l2size, ecx & 0xFF); +} + + +static int __init init_amd(struct cpuinfo_x86 *c) +{ + int r; + + /* Bit 31 in normal CPUID used for nonstandard 3DNow ID; + 3DNow is IDd by bit 31 in extended CPUID (1*32+31) anyway */ + clear_bit(0*32+31, &c->x86_capability); + + r = get_model_name(c); + + switch(c->x86) + { + case 6: /* An Athlon/Duron. We can trust the BIOS probably */ + break; + default: + panic("Unsupported AMD processor\n"); + } + + display_cacheinfo(c); + return r; +} + + +static void __init init_intel(struct cpuinfo_x86 *c) +{ + char *p = NULL; + unsigned int l1i = 0, l1d = 0, l2 = 0, l3 = 0; /* Cache sizes */ + + if (c->cpuid_level > 1) { + /* supports eax=2 call */ + int i, j, n; + int regs[4]; + unsigned char *dp = (unsigned char *)regs; + + /* Number of times to iterate */ + n = cpuid_eax(2) & 0xFF; + + for ( i = 0 ; i < n ; i++ ) { + cpuid(2, ®s[0], ®s[1], ®s[2], ®s[3]); + + /* If bit 31 is set, this is an unknown format */ + for ( j = 0 ; j < 3 ; j++ ) { + if ( regs[j] < 0 ) regs[j] = 0; + } + + /* Byte 0 is level count, not a descriptor */ + for ( j = 1 ; j < 16 ; j++ ) { + unsigned char des = dp[j]; + unsigned char dl, dh; + unsigned int cs; + + dh = des >> 4; + dl = des & 0x0F; + + /* Black magic... */ + + switch ( dh ) + { + case 0: + switch ( dl ) { + case 6: + /* L1 I cache */ + l1i += 8; + break; + case 8: + /* L1 I cache */ + l1i += 16; + break; + case 10: + /* L1 D cache */ + l1d += 8; + break; + case 12: + /* L1 D cache */ + l1d += 16; + break; + default:; + /* TLB, or unknown */ + } + break; + case 2: + if ( dl ) { + /* L3 cache */ + cs = (dl-1) << 9; + l3 += cs; + } + break; + case 4: + if ( c->x86 > 6 && dl ) { + /* P4 family */ + /* L3 cache */ + cs = 128 << (dl-1); + l3 += cs; + break; + } + /* else same as 8 - fall through */ + case 8: + if ( dl ) { + /* L2 cache */ + cs = 128 << (dl-1); + l2 += cs; + } + break; + case 6: + if (dl > 5) { + /* L1 D cache */ + cs = 8<<(dl-6); + l1d += cs; + } + break; + case 7: + if ( dl >= 8 ) + { + /* L2 cache */ + cs = 64<<(dl-8); + l2 += cs; + } else { + /* L0 I cache, count as L1 */ + cs = dl ? (16 << (dl-1)) : 12; + l1i += cs; + } + break; + default: + /* TLB, or something else we don't know about */ + break; + } + } + } + if ( l1i || l1d ) + printk(KERN_INFO "CPU: L1 I cache: %dK, L1 D cache: %dK\n", + l1i, l1d); + if ( l2 ) + printk(KERN_INFO "CPU: L2 cache: %dK\n", l2); + if ( l3 ) + printk(KERN_INFO "CPU: L3 cache: %dK\n", l3); + + /* + * This assumes the L3 cache is shared; it typically lives in + * the northbridge. The L1 caches are included by the L2 + * cache, and so should not be included for the purpose of + * SMP switching weights. + */ + c->x86_cache_size = l2 ? l2 : (l1i+l1d); + } + + /* SEP CPUID bug: Pentium Pro reports SEP but doesn't have it */ + if ( c->x86 == 6 && c->x86_model < 3 && c->x86_mask < 3 ) + clear_bit(X86_FEATURE_SEP, &c->x86_capability); + + /* Names for the Pentium II/Celeron processors + detectable only by also checking the cache size. + Dixon is NOT a Celeron. */ + if (c->x86 == 6) { + switch (c->x86_model) { + case 5: + if (l2 == 0) + p = "Celeron (Covington)"; + if (l2 == 256) + p = "Mobile Pentium II (Dixon)"; + break; + + case 6: + if (l2 == 128) + p = "Celeron (Mendocino)"; + break; + + case 8: + if (l2 == 128) + p = "Celeron (Coppermine)"; + break; + } + } + + if ( p ) + strcpy(c->x86_model_id, p); +} + +void __init get_cpu_vendor(struct cpuinfo_x86 *c) +{ + char *v = c->x86_vendor_id; + + if (!strcmp(v, "GenuineIntel")) + c->x86_vendor = X86_VENDOR_INTEL; + else if (!strcmp(v, "AuthenticAMD")) + c->x86_vendor = X86_VENDOR_AMD; + else + c->x86_vendor = X86_VENDOR_UNKNOWN; +} + +struct cpu_model_info { + int vendor; + int family; + char *model_names[16]; +}; + +/* Naming convention should be: <Name> [(<Codename>)] */ +/* This table only is used unless init_<vendor>() below doesn't set it; */ +/* in particular, if CPUID levels 0x80000002..4 are supported, this isn't used */ +static struct cpu_model_info cpu_models[] __initdata = { + { X86_VENDOR_INTEL, 6, + { "Pentium Pro A-step", "Pentium Pro", NULL, "Pentium II (Klamath)", + NULL, "Pentium II (Deschutes)", "Mobile Pentium II", + "Pentium III (Katmai)", "Pentium III (Coppermine)", NULL, + "Pentium III (Cascades)", NULL, NULL, NULL, NULL }}, + { X86_VENDOR_AMD, 6, /* Is this this really necessary?? */ + { "Athlon", "Athlon", + "Athlon", NULL, "Athlon", NULL, + NULL, NULL, NULL, + NULL, NULL, NULL, NULL, NULL, NULL, NULL }} +}; + +/* Look up CPU names by table lookup. */ +static char __init *table_lookup_model(struct cpuinfo_x86 *c) +{ + struct cpu_model_info *info = cpu_models; + int i; + + if ( c->x86_model >= 16 ) + return NULL; /* Range check */ + + for ( i = 0 ; i < sizeof(cpu_models)/sizeof(struct cpu_model_info) ; i++ ) { + if ( info->vendor == c->x86_vendor && + info->family == c->x86 ) { + return info->model_names[c->x86_model]; + } + info++; + } + return NULL; /* Not found */ +} + + + +/* Standard macro to see if a specific flag is changeable */ +static inline int flag_is_changeable_p(u32 flag) +{ + u32 f1, f2; + + asm("pushfl\n\t" + "pushfl\n\t" + "popl %0\n\t" + "movl %0,%1\n\t" + "xorl %2,%0\n\t" + "pushl %0\n\t" + "popfl\n\t" + "pushfl\n\t" + "popl %0\n\t" + "popfl\n\t" + : "=&r" (f1), "=&r" (f2) + : "ir" (flag)); + + return ((f1^f2) & flag) != 0; +} + + +/* Probe for the CPUID instruction */ +static int __init have_cpuid_p(void) +{ + return flag_is_changeable_p(X86_EFLAGS_ID); +} + + + +/* + * This does the hard work of actually picking apart the CPU stuff... + */ +void __init identify_cpu(struct cpuinfo_x86 *c) +{ + int junk, i; + u32 xlvl, tfms; + + c->loops_per_jiffy = loops_per_jiffy; + c->x86_cache_size = -1; + c->x86_vendor = X86_VENDOR_UNKNOWN; + c->cpuid_level = -1; /* CPUID not detected */ + c->x86_model = c->x86_mask = 0; /* So far unknown... */ + c->x86_vendor_id[0] = '\0'; /* Unset */ + c->x86_model_id[0] = '\0'; /* Unset */ + memset(&c->x86_capability, 0, sizeof c->x86_capability); + c->hard_math = 1; + + if ( !have_cpuid_p() ) { + panic("Processor must support CPUID\n"); + } else { + /* CPU does have CPUID */ + + /* Get vendor name */ + cpuid(0x00000000, &c->cpuid_level, + (int *)&c->x86_vendor_id[0], + (int *)&c->x86_vendor_id[8], + (int *)&c->x86_vendor_id[4]); + + get_cpu_vendor(c); + /* Initialize the standard set of capabilities */ + /* Note that the vendor-specific code below might override */ + + /* Intel-defined flags: level 0x00000001 */ + if ( c->cpuid_level >= 0x00000001 ) { + cpuid(0x00000001, &tfms, &junk, &junk, + &c->x86_capability[0]); + c->x86 = (tfms >> 8) & 15; + c->x86_model = (tfms >> 4) & 15; + c->x86_mask = tfms & 15; + } else { + /* Have CPUID level 0 only - unheard of */ + c->x86 = 4; + } + + /* AMD-defined flags: level 0x80000001 */ + xlvl = cpuid_eax(0x80000000); + if ( (xlvl & 0xffff0000) == 0x80000000 ) { + if ( xlvl >= 0x80000001 ) + c->x86_capability[1] = cpuid_edx(0x80000001); + if ( xlvl >= 0x80000004 ) + get_model_name(c); /* Default name */ + } + + /* Transmeta-defined flags: level 0x80860001 */ + xlvl = cpuid_eax(0x80860000); + if ( (xlvl & 0xffff0000) == 0x80860000 ) { + if ( xlvl >= 0x80860001 ) + c->x86_capability[2] = cpuid_edx(0x80860001); + } + } + + printk(KERN_DEBUG "CPU: Before vendor init, caps: %08x %08x %08x, vendor = %d\n", + c->x86_capability[0], + c->x86_capability[1], + c->x86_capability[2], + c->x86_vendor); + + /* + * Vendor-specific initialization. In this section we + * canonicalize the feature flags, meaning if there are + * features a certain CPU supports which CPUID doesn't + * tell us, CPUID claiming incorrect flags, or other bugs, + * we handle them here. + * + * At the end of this section, c->x86_capability better + * indicate the features this CPU genuinely supports! + */ + switch ( c->x86_vendor ) { + case X86_VENDOR_AMD: + init_amd(c); + break; + + case X86_VENDOR_INTEL: + init_intel(c); + break; + + default: + panic("Unsupported CPU vendor\n"); + } + + printk(KERN_DEBUG "CPU: After vendor init, caps: %08x %08x %08x %08x\n", + c->x86_capability[0], + c->x86_capability[1], + c->x86_capability[2], + c->x86_capability[3]); + + + /* If the model name is still unset, do table lookup. */ + if ( !c->x86_model_id[0] ) { + char *p; + p = table_lookup_model(c); + if ( p ) + strcpy(c->x86_model_id, p); + else + /* Last resort... */ + sprintf(c->x86_model_id, "%02x/%02x", + c->x86_vendor, c->x86_model); + } + + /* Now the feature flags better reflect actual CPU features! */ + + printk(KERN_DEBUG "CPU: After generic, caps: %08x %08x %08x %08x\n", + c->x86_capability[0], + c->x86_capability[1], + c->x86_capability[2], + c->x86_capability[3]); + + /* + * On SMP, boot_cpu_data holds the common feature set between + * all CPUs; so make sure that we indicate which features are + * common between the CPUs. The first time this routine gets + * executed, c == &boot_cpu_data. + */ + if ( c != &boot_cpu_data ) { + /* AND the already accumulated flags with these */ + for ( i = 0 ; i < NCAPINTS ; i++ ) + boot_cpu_data.x86_capability[i] &= c->x86_capability[i]; + } + + printk(KERN_DEBUG "CPU: Common caps: %08x %08x %08x %08x\n", + boot_cpu_data.x86_capability[0], + boot_cpu_data.x86_capability[1], + boot_cpu_data.x86_capability[2], + boot_cpu_data.x86_capability[3]); +} + + +/* These need to match <asm/processor.h> */ +static char *cpu_vendor_names[] __initdata = { + "Intel", "Cyrix", "AMD", "UMC", "NexGen", "Centaur", "Rise", "Transmeta" }; + + +void __init print_cpu_info(struct cpuinfo_x86 *c) +{ + char *vendor = NULL; + + if (c->x86_vendor < sizeof(cpu_vendor_names)/sizeof(char *)) + vendor = cpu_vendor_names[c->x86_vendor]; + else if (c->cpuid_level >= 0) + vendor = c->x86_vendor_id; + + if (vendor && strncmp(c->x86_model_id, vendor, strlen(vendor))) + printk("%s ", vendor); + + if (!c->x86_model_id[0]) + printk("%d86", c->x86); + else + printk("%s", c->x86_model_id); + + if (c->x86_mask || c->cpuid_level >= 0) + printk(" stepping %02x\n", c->x86_mask); + else + printk("\n"); +} + +/* + * Get CPU information for use by the procfs. + */ +static int show_cpuinfo(struct seq_file *m, void *v) +{ + /* + * These flag bits must match the definitions in <asm/cpufeature.h>. + * NULL means this bit is undefined or reserved; either way it doesn't + * have meaning as far as Linux is concerned. Note that it's important + * to realize there is a difference between this table and CPUID -- if + * applications want to get the raw CPUID data, they should access + * /dev/cpu/<cpu_nr>/cpuid instead. + */ + static char *x86_cap_flags[] = { + /* Intel-defined */ + "fpu", "vme", "de", "pse", "tsc", "msr", "pae", "mce", + "cx8", "apic", NULL, "sep", "mtrr", "pge", "mca", "cmov", + "pat", "pse36", "pn", "clflush", NULL, "dts", "acpi", "mmx", + "fxsr", "sse", "sse2", "ss", NULL, "tm", "ia64", NULL, + + /* AMD-defined */ + NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, + NULL, NULL, NULL, "syscall", NULL, NULL, NULL, NULL, + NULL, NULL, NULL, NULL, NULL, NULL, "mmxext", NULL, + NULL, NULL, NULL, NULL, NULL, "lm", "3dnowext", "3dnow", + + /* Transmeta-defined */ + "recovery", "longrun", NULL, "lrti", NULL, NULL, NULL, NULL, + NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, + NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, + NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, + + /* Other (Linux-defined) */ + "cxmmx", "k6_mtrr", "cyrix_arr", "centaur_mcr", NULL, NULL, NULL, NULL, + NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, + NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, + NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, + }; + struct cpuinfo_x86 *c = v; + int i, n = c - cpu_data; + int fpu_exception; + +#ifdef CONFIG_SMP + if (!(cpu_online_map & (1<<n))) + return 0; +#endif + seq_printf(m, "processor\t: %d\n" + "vendor_id\t: %s\n" + "cpu family\t: %d\n" + "model\t\t: %d\n" + "model name\t: %s\n", + n, + c->x86_vendor_id[0] ? c->x86_vendor_id : "unknown", + c->x86, + c->x86_model, + c->x86_model_id[0] ? c->x86_model_id : "unknown"); + + if (c->x86_mask || c->cpuid_level >= 0) + seq_printf(m, "stepping\t: %d\n", c->x86_mask); + else + seq_printf(m, "stepping\t: unknown\n"); + + if ( test_bit(X86_FEATURE_TSC, &c->x86_capability) ) { + seq_printf(m, "cpu MHz\t\t: %lu.%03lu\n", + cpu_khz / 1000, (cpu_khz % 1000)); + } + + /* Cache size */ + if (c->x86_cache_size >= 0) + seq_printf(m, "cache size\t: %d KB\n", c->x86_cache_size); + + /* We use exception 16 if we have hardware math and we've either seen it or the CPU claims it is internal */ + fpu_exception = c->hard_math && (ignore_irq13 || cpu_has_fpu); + seq_printf(m, "fdiv_bug\t: %s\n" + "hlt_bug\t\t: %s\n" + "f00f_bug\t: %s\n" + "coma_bug\t: %s\n" + "fpu\t\t: %s\n" + "fpu_exception\t: %s\n" + "cpuid level\t: %d\n" + "wp\t\t: %s\n" + "flags\t\t:", + c->fdiv_bug ? "yes" : "no", + c->hlt_works_ok ? "no" : "yes", + c->f00f_bug ? "yes" : "no", + c->coma_bug ? "yes" : "no", + c->hard_math ? "yes" : "no", + fpu_exception ? "yes" : "no", + c->cpuid_level, + c->wp_works_ok ? "yes" : "no"); + + for ( i = 0 ; i < 32*NCAPINTS ; i++ ) + if ( test_bit(i, &c->x86_capability) && + x86_cap_flags[i] != NULL ) + seq_printf(m, " %s", x86_cap_flags[i]); + + seq_printf(m, "\nbogomips\t: %lu.%02lu\n\n", + c->loops_per_jiffy/(500000/HZ), + (c->loops_per_jiffy/(5000/HZ)) % 100); + return 0; +} + +static void *c_start(struct seq_file *m, loff_t *pos) +{ + return *pos < NR_CPUS ? cpu_data + *pos : NULL; +} +static void *c_next(struct seq_file *m, void *v, loff_t *pos) +{ + ++*pos; + return c_start(m, pos); +} +static void c_stop(struct seq_file *m, void *v) +{ +} +struct seq_operations cpuinfo_op = { + start: c_start, + next: c_next, + stop: c_stop, + show: show_cpuinfo, +}; + +unsigned long cpu_initialized __initdata = 0; + +/* + * cpu_init() initializes state that is per-CPU. Some data is already + * initialized (naturally) in the bootstrap process, such as the GDT + * and IDT. We reload them nevertheless, this function acts as a + * 'CPU state barrier', nothing should get across. + */ +void __init cpu_init (void) +{ + int nr = smp_processor_id(); + + if (test_and_set_bit(nr, &cpu_initialized)) { + printk(KERN_WARNING "CPU#%d already initialized!\n", nr); + for (;;) __sti(); + } + printk(KERN_INFO "Initializing CPU#%d\n", nr); + + /* + * set up and load the per-CPU TSS and LDT + */ + atomic_inc(&init_mm.mm_count); + current->active_mm = &init_mm; + if(current->mm) + BUG(); + enter_lazy_tlb(&init_mm, current, nr); + + HYPERVISOR_stack_switch(__KERNEL_DS, current->thread.esp0); + + load_LDT(&init_mm); + flush_page_update_queue(); + + /* Force FPU initialization. */ + current->flags &= ~PF_USEDFPU; + current->used_math = 0; + stts(); +} + + +/****************************************************************************** + * Time-to-die callback handling. + */ + +static void time_to_die(int irq, void *unused, struct pt_regs *regs) +{ + extern void ctrl_alt_del(void); + ctrl_alt_del(); +} + +static int __init setup_death_event(void) +{ + (void)request_irq(_EVENT_DIE, time_to_die, 0, "die", NULL); + return 0; +} + +__initcall(setup_death_event); diff --git a/xenolinux-2.4.22-sparse/arch/xeno/kernel/signal.c b/xenolinux-2.4.22-sparse/arch/xeno/kernel/signal.c new file mode 100644 index 0000000000..f646c5c0ca --- /dev/null +++ b/xenolinux-2.4.22-sparse/arch/xeno/kernel/signal.c @@ -0,0 +1,717 @@ +/* + * linux/arch/i386/kernel/signal.c + * + * Copyright (C) 1991, 1992 Linus Torvalds + * + * 1997-11-28 Modified for POSIX.1b signals by Richard Henderson + * 2000-06-20 Pentium III FXSR, SSE support by Gareth Hughes + */ + +#include <linux/sched.h> +#include <linux/mm.h> +#include <linux/smp.h> +#include <linux/smp_lock.h> +#include <linux/kernel.h> +#include <linux/signal.h> +#include <linux/errno.h> +#include <linux/wait.h> +#include <linux/ptrace.h> +#include <linux/unistd.h> +#include <linux/stddef.h> +#include <linux/tty.h> +#include <linux/personality.h> +#include <asm/ucontext.h> +#include <asm/uaccess.h> +#include <asm/i387.h> + +#define DEBUG_SIG 0 + +#define _BLOCKABLE (~(sigmask(SIGKILL) | sigmask(SIGSTOP))) + +int FASTCALL(do_signal(struct pt_regs *regs, sigset_t *oldset)); + +int copy_siginfo_to_user(siginfo_t *to, siginfo_t *from) +{ + if (!access_ok (VERIFY_WRITE, to, sizeof(siginfo_t))) + return -EFAULT; + if (from->si_code < 0) + return __copy_to_user(to, from, sizeof(siginfo_t)); + else { + int err; + + /* If you change siginfo_t structure, please be sure + this code is fixed accordingly. + It should never copy any pad contained in the structure + to avoid security leaks, but must copy the generic + 3 ints plus the relevant union member. */ + err = __put_user(from->si_signo, &to->si_signo); + err |= __put_user(from->si_errno, &to->si_errno); + err |= __put_user((short)from->si_code, &to->si_code); + /* First 32bits of unions are always present. */ + err |= __put_user(from->si_pid, &to->si_pid); + switch (from->si_code >> 16) { + case __SI_FAULT >> 16: + break; + case __SI_CHLD >> 16: + err |= __put_user(from->si_utime, &to->si_utime); + err |= __put_user(from->si_stime, &to->si_stime); + err |= __put_user(from->si_status, &to->si_status); + default: + err |= __put_user(from->si_uid, &to->si_uid); + break; + /* case __SI_RT: This is not generated by the kernel as of now. */ + } + return err; + } +} + +/* + * Atomically swap in the new signal mask, and wait for a signal. + */ +asmlinkage int +sys_sigsuspend(int history0, int history1, old_sigset_t mask) +{ + struct pt_regs * regs = (struct pt_regs *) &history0; + sigset_t saveset; + + mask &= _BLOCKABLE; + spin_lock_irq(¤t->sigmask_lock); + saveset = current->blocked; + siginitset(¤t->blocked, mask); + recalc_sigpending(current); + spin_unlock_irq(¤t->sigmask_lock); + + regs->eax = -EINTR; + while (1) { + current->state = TASK_INTERRUPTIBLE; + schedule(); + if (do_signal(regs, &saveset)) + return -EINTR; + } +} + +asmlinkage int +sys_rt_sigsuspend(sigset_t *unewset, size_t sigsetsize) +{ + struct pt_regs * regs = (struct pt_regs *) &unewset; + sigset_t saveset, newset; + + /* XXX: Don't preclude handling different sized sigset_t's. */ + if (sigsetsize != sizeof(sigset_t)) + return -EINVAL; + + if (copy_from_user(&newset, unewset, sizeof(newset))) + return -EFAULT; + sigdelsetmask(&newset, ~_BLOCKABLE); + + spin_lock_irq(¤t->sigmask_lock); + saveset = current->blocked; + current->blocked = newset; + recalc_sigpending(current); + spin_unlock_irq(¤t->sigmask_lock); + + regs->eax = -EINTR; + while (1) { + current->state = TASK_INTERRUPTIBLE; + schedule(); + if (do_signal(regs, &saveset)) + return -EINTR; + } +} + +asmlinkage int +sys_sigaction(int sig, const struct old_sigaction *act, + struct old_sigaction *oact) +{ + struct k_sigaction new_ka, old_ka; + int ret; + + if (act) { + old_sigset_t mask; + if (verify_area(VERIFY_READ, act, sizeof(*act)) || + __get_user(new_ka.sa.sa_handler, &act->sa_handler) || + __get_user(new_ka.sa.sa_restorer, &act->sa_restorer)) + return -EFAULT; + __get_user(new_ka.sa.sa_flags, &act->sa_flags); + __get_user(mask, &act->sa_mask); + siginitset(&new_ka.sa.sa_mask, mask); + } + + ret = do_sigaction(sig, act ? &new_ka : NULL, oact ? &old_ka : NULL); + + if (!ret && oact) { + if (verify_area(VERIFY_WRITE, oact, sizeof(*oact)) || + __put_user(old_ka.sa.sa_handler, &oact->sa_handler) || + __put_user(old_ka.sa.sa_restorer, &oact->sa_restorer)) + return -EFAULT; + __put_user(old_ka.sa.sa_flags, &oact->sa_flags); + __put_user(old_ka.sa.sa_mask.sig[0], &oact->sa_mask); + } + + return ret; +} + +asmlinkage int +sys_sigaltstack(const stack_t *uss, stack_t *uoss) +{ + struct pt_regs *regs = (struct pt_regs *) &uss; + return do_sigaltstack(uss, uoss, regs->esp); +} + + +/* + * Do a signal return; undo the signal stack. + */ + +struct sigframe +{ + char *pretcode; + int sig; + struct sigcontext sc; + struct _fpstate fpstate; + unsigned long extramask[_NSIG_WORDS-1]; + char retcode[8]; +}; + +struct rt_sigframe +{ + char *pretcode; + int sig; + struct siginfo *pinfo; + void *puc; + struct siginfo info; + struct ucontext uc; + struct _fpstate fpstate; + char retcode[8]; +}; + +static int +restore_sigcontext(struct pt_regs *regs, struct sigcontext *sc, int *peax) +{ + unsigned int err = 0; + +#define COPY(x) err |= __get_user(regs->x, &sc->x) + +#define COPY_SEG(seg) \ + { unsigned short tmp; \ + err |= __get_user(tmp, &sc->seg); \ + regs->x##seg = tmp; } + +#define COPY_SEG_STRICT(seg) \ + { unsigned short tmp; \ + err |= __get_user(tmp, &sc->seg); \ + regs->x##seg = tmp|3; } + +#define GET_SEG(seg) \ + { unsigned short tmp; \ + err |= __get_user(tmp, &sc->seg); \ + loadsegment(seg,tmp); } + + GET_SEG(gs); + GET_SEG(fs); + COPY_SEG(es); + COPY_SEG(ds); + COPY(edi); + COPY(esi); + COPY(ebp); + COPY(esp); + COPY(ebx); + COPY(edx); + COPY(ecx); + COPY(eip); + COPY_SEG_STRICT(cs); + COPY_SEG_STRICT(ss); + + { + unsigned int tmpflags; + err |= __get_user(tmpflags, &sc->eflags); + regs->eflags = (regs->eflags & ~0x40DD5) | (tmpflags & 0x40DD5); + regs->orig_eax = -1; /* disable syscall checks */ + } + + { + struct _fpstate * buf; + err |= __get_user(buf, &sc->fpstate); + if (buf) { + if (verify_area(VERIFY_READ, buf, sizeof(*buf))) + goto badframe; + err |= restore_i387(buf); + } + } + + err |= __get_user(*peax, &sc->eax); + return err; + +badframe: + return 1; +} + +asmlinkage int sys_sigreturn(unsigned long __unused) +{ + struct pt_regs *regs = (struct pt_regs *) &__unused; + struct sigframe *frame = (struct sigframe *)(regs->esp - 8); + sigset_t set; + int eax; + + if (verify_area(VERIFY_READ, frame, sizeof(*frame))) + goto badframe; + if (__get_user(set.sig[0], &frame->sc.oldmask) + || (_NSIG_WORDS > 1 + && __copy_from_user(&set.sig[1], &frame->extramask, + sizeof(frame->extramask)))) + goto badframe; + + sigdelsetmask(&set, ~_BLOCKABLE); + spin_lock_irq(¤t->sigmask_lock); + current->blocked = set; + recalc_sigpending(current); + spin_unlock_irq(¤t->sigmask_lock); + + if (restore_sigcontext(regs, &frame->sc, &eax)) + goto badframe; + return eax; + +badframe: + force_sig(SIGSEGV, current); + return 0; +} + +asmlinkage int sys_rt_sigreturn(unsigned long __unused) +{ + struct pt_regs *regs = (struct pt_regs *) &__unused; + struct rt_sigframe *frame = (struct rt_sigframe *)(regs->esp - 4); + sigset_t set; + stack_t st; + int eax; + + if (verify_area(VERIFY_READ, frame, sizeof(*frame))) + goto badframe; + if (__copy_from_user(&set, &frame->uc.uc_sigmask, sizeof(set))) + goto badframe; + + sigdelsetmask(&set, ~_BLOCKABLE); + spin_lock_irq(¤t->sigmask_lock); + current->blocked = set; + recalc_sigpending(current); + spin_unlock_irq(¤t->sigmask_lock); + + if (restore_sigcontext(regs, &frame->uc.uc_mcontext, &eax)) + goto badframe; + + if (__copy_from_user(&st, &frame->uc.uc_stack, sizeof(st))) + goto badframe; + /* It is more difficult to avoid calling this function than to + call it and ignore errors. */ + do_sigaltstack(&st, NULL, regs->esp); + + return eax; + +badframe: + force_sig(SIGSEGV, current); + return 0; +} + +/* + * Set up a signal frame. + */ + +static int +setup_sigcontext(struct sigcontext *sc, struct _fpstate *fpstate, + struct pt_regs *regs, unsigned long mask) +{ + int tmp, err = 0; + + tmp = 0; + __asm__("movl %%gs,%0" : "=r"(tmp): "0"(tmp)); + err |= __put_user(tmp, (unsigned int *)&sc->gs); + __asm__("movl %%fs,%0" : "=r"(tmp): "0"(tmp)); + err |= __put_user(tmp, (unsigned int *)&sc->fs); + + err |= __put_user(regs->xes, (unsigned int *)&sc->es); + err |= __put_user(regs->xds, (unsigned int *)&sc->ds); + err |= __put_user(regs->edi, &sc->edi); + err |= __put_user(regs->esi, &sc->esi); + err |= __put_user(regs->ebp, &sc->ebp); + err |= __put_user(regs->esp, &sc->esp); + err |= __put_user(regs->ebx, &sc->ebx); + err |= __put_user(regs->edx, &sc->edx); + err |= __put_user(regs->ecx, &sc->ecx); + err |= __put_user(regs->eax, &sc->eax); + err |= __put_user(current->thread.trap_no, &sc->trapno); + err |= __put_user(current->thread.error_code, &sc->err); + err |= __put_user(regs->eip, &sc->eip); + err |= __put_user(regs->xcs, (unsigned int *)&sc->cs); + err |= __put_user(regs->eflags, &sc->eflags); + err |= __put_user(regs->esp, &sc->esp_at_signal); + err |= __put_user(regs->xss, (unsigned int *)&sc->ss); + + tmp = save_i387(fpstate); + if (tmp < 0) + err = 1; + else + err |= __put_user(tmp ? fpstate : NULL, &sc->fpstate); + + /* non-iBCS2 extensions.. */ + err |= __put_user(mask, &sc->oldmask); + err |= __put_user(current->thread.cr2, &sc->cr2); + + return err; +} + +/* + * Determine which stack to use.. + */ +static inline void * +get_sigframe(struct k_sigaction *ka, struct pt_regs * regs, size_t frame_size) +{ + unsigned long esp; + + /* Default to using normal stack */ + esp = regs->esp; + + /* This is the X/Open sanctioned signal stack switching. */ + if (ka->sa.sa_flags & SA_ONSTACK) { + if (sas_ss_flags(esp) == 0) + esp = current->sas_ss_sp + current->sas_ss_size; + } + + /* This is the legacy signal stack switching. */ + else if ((regs->xss & 0xffff) != __USER_DS && + !(ka->sa.sa_flags & SA_RESTORER) && + ka->sa.sa_restorer) { + esp = (unsigned long) ka->sa.sa_restorer; + } + + return (void *)((esp - frame_size) & -8ul); +} + +static void setup_frame(int sig, struct k_sigaction *ka, + sigset_t *set, struct pt_regs * regs) +{ + struct sigframe *frame; + int err = 0; + + frame = get_sigframe(ka, regs, sizeof(*frame)); + + if (!access_ok(VERIFY_WRITE, frame, sizeof(*frame))) + goto give_sigsegv; + + err |= __put_user((current->exec_domain + && current->exec_domain->signal_invmap + && sig < 32 + ? current->exec_domain->signal_invmap[sig] + : sig), + &frame->sig); + if (err) + goto give_sigsegv; + + err |= setup_sigcontext(&frame->sc, &frame->fpstate, regs, set->sig[0]); + if (err) + goto give_sigsegv; + + if (_NSIG_WORDS > 1) { + err |= __copy_to_user(frame->extramask, &set->sig[1], + sizeof(frame->extramask)); + } + if (err) + goto give_sigsegv; + + /* Set up to return from userspace. If provided, use a stub + already in userspace. */ + if (ka->sa.sa_flags & SA_RESTORER) { + err |= __put_user(ka->sa.sa_restorer, &frame->pretcode); + } else { + err |= __put_user(frame->retcode, &frame->pretcode); + /* This is popl %eax ; movl $,%eax ; int $0x80 */ + err |= __put_user(0xb858, (short *)(frame->retcode+0)); + err |= __put_user(__NR_sigreturn, (int *)(frame->retcode+2)); + err |= __put_user(0x80cd, (short *)(frame->retcode+6)); + } + + if (err) + goto give_sigsegv; + + /* Set up registers for signal handler */ + regs->esp = (unsigned long) frame; + regs->eip = (unsigned long) ka->sa.sa_handler; + + set_fs(USER_DS); + regs->xds = __USER_DS; + regs->xes = __USER_DS; + regs->xss = __USER_DS; + regs->xcs = __USER_CS; + regs->eflags &= ~TF_MASK; + +#if DEBUG_SIG + printk("SIG deliver (%s:%d): sp=%p pc=%p ra=%p\n", + current->comm, current->pid, frame, regs->eip, frame->pretcode); +#endif + + return; + +give_sigsegv: + if (sig == SIGSEGV) + ka->sa.sa_handler = SIG_DFL; + force_sig(SIGSEGV, current); +} + +static void setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info, + sigset_t *set, struct pt_regs * regs) +{ + struct rt_sigframe *frame; + int err = 0; + + frame = get_sigframe(ka, regs, sizeof(*frame)); + + if (!access_ok(VERIFY_WRITE, frame, sizeof(*frame))) + goto give_sigsegv; + + err |= __put_user((current->exec_domain + && current->exec_domain->signal_invmap + && sig < 32 + ? current->exec_domain->signal_invmap[sig] + : sig), + &frame->sig); + err |= __put_user(&frame->info, &frame->pinfo); + err |= __put_user(&frame->uc, &frame->puc); + err |= copy_siginfo_to_user(&frame->info, info); + if (err) + goto give_sigsegv; + + /* Create the ucontext. */ + err |= __put_user(0, &frame->uc.uc_flags); + err |= __put_user(0, &frame->uc.uc_link); + err |= __put_user(current->sas_ss_sp, &frame->uc.uc_stack.ss_sp); + err |= __put_user(sas_ss_flags(regs->esp), + &frame->uc.uc_stack.ss_flags); + err |= __put_user(current->sas_ss_size, &frame->uc.uc_stack.ss_size); + err |= setup_sigcontext(&frame->uc.uc_mcontext, &frame->fpstate, + regs, set->sig[0]); + err |= __copy_to_user(&frame->uc.uc_sigmask, set, sizeof(*set)); + if (err) + goto give_sigsegv; + + /* Set up to return from userspace. If provided, use a stub + already in userspace. */ + if (ka->sa.sa_flags & SA_RESTORER) { + err |= __put_user(ka->sa.sa_restorer, &frame->pretcode); + } else { + err |= __put_user(frame->retcode, &frame->pretcode); + /* This is movl $,%eax ; int $0x80 */ + err |= __put_user(0xb8, (char *)(frame->retcode+0)); + err |= __put_user(__NR_rt_sigreturn, (int *)(frame->retcode+1)); + err |= __put_user(0x80cd, (short *)(frame->retcode+5)); + } + + if (err) + goto give_sigsegv; + + /* Set up registers for signal handler */ + regs->esp = (unsigned long) frame; + regs->eip = (unsigned long) ka->sa.sa_handler; + + set_fs(USER_DS); + regs->xds = __USER_DS; + regs->xes = __USER_DS; + regs->xss = __USER_DS; + regs->xcs = __USER_CS; + regs->eflags &= ~TF_MASK; + +#if DEBUG_SIG + printk("SIG deliver (%s:%d): sp=%p pc=%p ra=%p\n", + current->comm, current->pid, frame, regs->eip, frame->pretcode); +#endif + + return; + +give_sigsegv: + if (sig == SIGSEGV) + ka->sa.sa_handler = SIG_DFL; + force_sig(SIGSEGV, current); +} + +/* + * OK, we're invoking a handler + */ + +static void +handle_signal(unsigned long sig, struct k_sigaction *ka, + siginfo_t *info, sigset_t *oldset, struct pt_regs * regs) +{ + /* Are we from a system call? */ + if (regs->orig_eax >= 0) { + /* If so, check system call restarting.. */ + switch (regs->eax) { + case -ERESTARTNOHAND: + regs->eax = -EINTR; + break; + + case -ERESTARTSYS: + if (!(ka->sa.sa_flags & SA_RESTART)) { + regs->eax = -EINTR; + break; + } + /* fallthrough */ + case -ERESTARTNOINTR: + regs->eax = regs->orig_eax; + regs->eip -= 2; + } + } + + /* Set up the stack frame */ + if (ka->sa.sa_flags & SA_SIGINFO) + setup_rt_frame(sig, ka, info, oldset, regs); + else + setup_frame(sig, ka, oldset, regs); + + if (ka->sa.sa_flags & SA_ONESHOT) + ka->sa.sa_handler = SIG_DFL; + + if (!(ka->sa.sa_flags & SA_NODEFER)) { + spin_lock_irq(¤t->sigmask_lock); + sigorsets(¤t->blocked,¤t->blocked,&ka->sa.sa_mask); + sigaddset(¤t->blocked,sig); + recalc_sigpending(current); + spin_unlock_irq(¤t->sigmask_lock); + } +} + +/* + * Note that 'init' is a special process: it doesn't get signals it doesn't + * want to handle. Thus you cannot kill init even with a SIGKILL even by + * mistake. + */ +int do_signal(struct pt_regs *regs, sigset_t *oldset) +{ + siginfo_t info; + struct k_sigaction *ka; + + /* + * We want the common case to go fast, which + * is why we may in certain cases get here from + * kernel mode. Just return without doing anything + * if so. + */ + if ((regs->xcs & 2) != 2) + return 1; + + if (!oldset) + oldset = ¤t->blocked; + + for (;;) { + unsigned long signr; + + spin_lock_irq(¤t->sigmask_lock); + signr = dequeue_signal(¤t->blocked, &info); + spin_unlock_irq(¤t->sigmask_lock); + + if (!signr) + break; + + if ((current->ptrace & PT_PTRACED) && signr != SIGKILL) { + /* Let the debugger run. */ + current->exit_code = signr; + current->state = TASK_STOPPED; + notify_parent(current, SIGCHLD); + schedule(); + + /* We're back. Did the debugger cancel the sig? */ + if (!(signr = current->exit_code)) + continue; + current->exit_code = 0; + + /* The debugger continued. Ignore SIGSTOP. */ + if (signr == SIGSTOP) + continue; + + /* Update the siginfo structure. Is this good? */ + if (signr != info.si_signo) { + info.si_signo = signr; + info.si_errno = 0; + info.si_code = SI_USER; + info.si_pid = current->p_pptr->pid; + info.si_uid = current->p_pptr->uid; + } + + /* If the (new) signal is now blocked, requeue it. */ + if (sigismember(¤t->blocked, signr)) { + send_sig_info(signr, &info, current); + continue; + } + } + + ka = ¤t->sig->action[signr-1]; + if (ka->sa.sa_handler == SIG_IGN) { + if (signr != SIGCHLD) + continue; + /* Check for SIGCHLD: it's special. */ + while (sys_wait4(-1, NULL, WNOHANG, NULL) > 0) + /* nothing */; + continue; + } + + if (ka->sa.sa_handler == SIG_DFL) { + int exit_code = signr; + + /* Init gets no signals it doesn't want. */ + if (current->pid == 1) + continue; + + switch (signr) { + case SIGCONT: case SIGCHLD: case SIGWINCH: case SIGURG: + continue; + + case SIGTSTP: case SIGTTIN: case SIGTTOU: + if (is_orphaned_pgrp(current->pgrp)) + continue; + /* FALLTHRU */ + + case SIGSTOP: { + struct signal_struct *sig; + current->state = TASK_STOPPED; + current->exit_code = signr; + sig = current->p_pptr->sig; + if (sig && !(sig->action[SIGCHLD-1].sa.sa_flags & SA_NOCLDSTOP)) + notify_parent(current, SIGCHLD); + schedule(); + continue; + } + + case SIGQUIT: case SIGILL: case SIGTRAP: + case SIGABRT: case SIGFPE: case SIGSEGV: + case SIGBUS: case SIGSYS: case SIGXCPU: case SIGXFSZ: + if (do_coredump(signr, regs)) + exit_code |= 0x80; + /* FALLTHRU */ + + default: + sig_exit(signr, exit_code, &info); + /* NOTREACHED */ + } + } + + /* Reenable any watchpoints before delivering the + * signal to user space. The processor register will + * have been cleared if the watchpoint triggered + * inside the kernel. + */ + if ( current->thread.debugreg[7] != 0 ) + HYPERVISOR_set_debugreg(7, current->thread.debugreg[7]); + + /* Whee! Actually deliver the signal. */ + handle_signal(signr, ka, &info, oldset, regs); + return 1; + } + + /* Did we come from a system call? */ + if (regs->orig_eax >= 0) { + /* Restart the system call - no handlers present */ + if (regs->eax == -ERESTARTNOHAND || + regs->eax == -ERESTARTSYS || + regs->eax == -ERESTARTNOINTR) { + regs->eax = regs->orig_eax; + regs->eip -= 2; + } + } + return 0; +} diff --git a/xenolinux-2.4.22-sparse/arch/xeno/kernel/time.c b/xenolinux-2.4.22-sparse/arch/xeno/kernel/time.c new file mode 100644 index 0000000000..73ac82c9a4 --- /dev/null +++ b/xenolinux-2.4.22-sparse/arch/xeno/kernel/time.c @@ -0,0 +1,323 @@ +/* -*- Mode:C; c-basic-offset:4; tab-width:4 -*- + **************************************************************************** + * (C) 2002 - Rolf Neugebauer - Intel Research Cambridge + **************************************************************************** + * + * File: arch.xeno/time.c + * Author: Rolf Neugebauer + * Changes: + * + * Date: Nov 2002 + * + * Environment: XenoLinux + * Description: Interface with Hypervisor to get correct notion of time + * Currently supports Systemtime and WallClock time. + * + * (This has hardly any resemblence with the Linux code but left the + * copyright notice anyway. Ignore the comments in the copyright notice.) + **************************************************************************** + * $Id: c-insert.c,v 1.7 2002/11/08 16:04:34 rn Exp $ + **************************************************************************** + */ + +/* + * linux/arch/i386/kernel/time.c + * + * Copyright (C) 1991, 1992, 1995 Linus Torvalds + * + * This file contains the PC-specific time handling details: + * reading the RTC at bootup, etc.. + * 1994-07-02 Alan Modra + * fixed set_rtc_mmss, fixed time.year for >= 2000, new mktime + * 1995-03-26 Markus Kuhn + * fixed 500 ms bug at call to set_rtc_mmss, fixed DS12887 + * precision CMOS clock update + * 1996-05-03 Ingo Molnar + * fixed time warps in do_[slow|fast]_gettimeoffset() + * 1997-09-10 Updated NTP code according to technical memorandum Jan '96 + * "A Kernel Model for Precision Timekeeping" by Dave Mills + * 1998-09-05 (Various) + * More robust do_fast_gettimeoffset() algorithm implemented + * (works with APM, Cyrix 6x86MX and Centaur C6), + * monotonic gettimeofday() with fast_get_timeoffset(), + * drift-proof precision TSC calibration on boot + * (C. Scott Ananian <cananian@alumni.princeton.edu>, Andrew D. + * Balsa <andrebalsa@altern.org>, Philip Gladstone <philip@raptor.com>; + * ported from 2.0.35 Jumbo-9 by Michael Krause <m.krause@tu-harburg.de>). + * 1998-12-16 Andrea Arcangeli + * Fixed Jumbo-9 code in 2.1.131: do_gettimeofday was missing 1 jiffy + * because was not accounting lost_ticks. + * 1998-12-24 Copyright (C) 1998 Andrea Arcangeli + * Fixed a xtime SMP race (we need the xtime_lock rw spinlock to + * serialize accesses to xtime/lost_ticks). + */ + +#include <asm/smp.h> +#include <asm/irq.h> +#include <asm/msr.h> +#include <asm/delay.h> +#include <asm/mpspec.h> +#include <asm/uaccess.h> +#include <asm/processor.h> + +#include <asm/div64.h> +#include <asm/hypervisor.h> + +#include <linux/kernel.h> +#include <linux/interrupt.h> +#include <linux/time.h> +#include <linux/init.h> +#include <linux/smp.h> +#include <linux/irq.h> + +#undef XENO_TIME_DEBUG /* adds sanity checks and periodic printouts */ + +spinlock_t rtc_lock = SPIN_LOCK_UNLOCKED; +extern rwlock_t xtime_lock; + +unsigned long cpu_khz; /* get this from Xen, used elsewhere */ +static spinlock_t hyp_time_lock = SPIN_LOCK_UNLOCKED; + +static unsigned int rdtsc_bitshift; +static u32 st_scale_f; +static u32 st_scale_i; +static u32 shadow_st_pcc; +static s64 shadow_st; + +/* + * System time. + * Although the rest of the Linux kernel doesn't know about this, we + * we use it to extrapolate passage of wallclock time. + * We need to read the values from the shared info page "atomically" + * and use the cycle counter value as the "version" number. Clashes + * should be very rare. + */ +static inline s64 __get_s_time(void) +{ + s32 delta_tsc; + u32 low; + u64 delta, tsc; + + rdtscll(tsc); + low = (u32)(tsc >> rdtsc_bitshift); + delta_tsc = (s32)(low - shadow_st_pcc); + if ( unlikely(delta_tsc < 0) ) delta_tsc = 0; + delta = ((u64)delta_tsc * st_scale_f); + delta >>= 32; + delta += ((u64)delta_tsc * st_scale_i); + + return shadow_st + delta; +} + +/* + * Wallclock time. + * Based on what the hypervisor tells us, extrapolated using system time. + * Again need to read a number of values from the shared page "atomically". + * this time using a version number. + */ +static u32 shadow_wc_version=0; +static long shadow_tv_sec; +static long shadow_tv_usec; +static long long shadow_wc_timestamp; +void do_gettimeofday(struct timeval *tv) +{ + unsigned long flags; + long usec, sec; + u32 version; + u64 now, cpu_freq, scale; + + spin_lock_irqsave(&hyp_time_lock, flags); + + while ( (version = HYPERVISOR_shared_info->wc_version) != + shadow_wc_version ) + { + barrier(); + + shadow_wc_version = version; + shadow_tv_sec = HYPERVISOR_shared_info->tv_sec; + shadow_tv_usec = HYPERVISOR_shared_info->tv_usec; + shadow_wc_timestamp = HYPERVISOR_shared_info->wc_timestamp; + shadow_st_pcc = HYPERVISOR_shared_info->st_timestamp; + shadow_st = HYPERVISOR_shared_info->system_time; + + rdtsc_bitshift = HYPERVISOR_shared_info->rdtsc_bitshift; + cpu_freq = HYPERVISOR_shared_info->cpu_freq; + + /* XXX cpu_freq as u32 limits it to 4.29 GHz. Get a better do_div! */ + scale = 1000000000LL << (32 + rdtsc_bitshift); + do_div(scale,(u32)cpu_freq); + st_scale_f = scale & 0xffffffff; + st_scale_i = scale >> 32; + + barrier(); + } + + now = __get_s_time(); + usec = ((unsigned long)(now-shadow_wc_timestamp))/1000; + sec = shadow_tv_sec; + usec += shadow_tv_usec; + + while ( usec >= 1000000 ) + { + usec -= 1000000; + sec++; + } + + tv->tv_sec = sec; + tv->tv_usec = usec; + + spin_unlock_irqrestore(&hyp_time_lock, flags); + +#ifdef XENO_TIME_DEBUG + { + static long long old_now=0; + static long long wct=0, old_wct=0; + + /* This debug code checks if time increase over two subsequent calls */ + wct=(((long long)sec) * 1000000) + usec; + /* wall clock time going backwards */ + if ((wct < old_wct) ) { + printk("Urgh1: wc diff=%6ld, usec = %ld (0x%lX)\n", + (long)(wct-old_wct), usec, usec); + printk(" st diff=%lld cur st=0x%016llX old st=0x%016llX\n", + now-old_now, now, old_now); + } + + /* system time going backwards */ + if (now<=old_now) { + printk("Urgh2: st diff=%lld cur st=0x%016llX old st=0x%016llX\n", + now-old_now, now, old_now); + } + old_wct = wct; + old_now = now; + } +#endif +} + +void do_settimeofday(struct timeval *tv) +{ +/* XXX RN: should do something special here for dom0 */ +#if 0 + write_lock_irq(&xtime_lock); + /* + * This is revolting. We need to set "xtime" correctly. However, the + * value in this location is the value at the most recent update of + * wall time. Discover what correction gettimeofday() would have + * made, and then undo it! + */ + tv->tv_usec -= do_gettimeoffset(); + tv->tv_usec -= (jiffies - wall_jiffies) * (1000000 / HZ); + + while ( tv->tv_usec < 0 ) + { + tv->tv_usec += 1000000; + tv->tv_sec--; + } + + xtime = *tv; + time_adjust = 0; /* stop active adjtime() */ + time_status |= STA_UNSYNC; + time_maxerror = NTP_PHASE_LIMIT; + time_esterror = NTP_PHASE_LIMIT; + write_unlock_irq(&xtime_lock); +#endif +} + + +/* + * Timer ISR. + * Unlike normal Linux these don't come in at a fixed rate of HZ. + * In here we wrok out how often it should have been called and then call + * the architecture independent part (do_timer()) the appropriate number of + * times. A bit of a nasty hack, to keep the "other" notion of wallclock time + * happy. + */ +static long long us_per_tick=1000000/HZ; +static long long last_irq; +static inline void do_timer_interrupt(int irq, void *dev_id, + struct pt_regs *regs) +{ + struct timeval tv; + long long time, delta; + + /* + * The next bit really sucks: + * Linux not only uses do_gettimeofday() to keep a notion of + * wallclock time, but also maintains the xtime struct and jiffies. + * (Even worse some userland code accesses this via the sys_time() + * system call) + * Unfortunately, xtime is maintain in the architecture independent + * part of the timer ISR (./kernel/timer.c sic!). So, although we have + * perfectly valid notion of wallclock time from the hypervisor we here + * fake missed timer interrupts so that the arch independent part of + * the Timer ISR updates jiffies for us *and* once the bh gets run + * updates xtime accordingly. Yuck! + */ + + /* Work out the number of jiffy intervals passed and update them. */ + do_gettimeofday(&tv); + time = (((long long)tv.tv_sec) * 1000000) + tv.tv_usec; + delta = time - last_irq; + if (delta <= 0) { + printk ("Timer ISR: Time went backwards: %lld\n", delta); + return; + } + while (delta >= us_per_tick) { + do_timer(regs); + delta -= us_per_tick; + last_irq += us_per_tick; + } + +#if 0 + if (!user_mode(regs)) + x86_do_profile(regs->eip); +#endif +} + +static void timer_interrupt(int irq, void *dev_id, struct pt_regs *regs) +{ + write_lock(&xtime_lock); + do_timer_interrupt(irq, NULL, regs); + write_unlock(&xtime_lock); +} + +static struct irqaction irq_timer = { + timer_interrupt, + SA_INTERRUPT, + 0, + "timer", + NULL, + NULL +}; + +void __init time_init(void) +{ + unsigned long long alarm; + u64 __cpu_khz; + + __cpu_khz = HYPERVISOR_shared_info->cpu_freq; + do_div(__cpu_khz, 1000); + cpu_khz = (u32)__cpu_khz; + printk("Xen reported: %lu.%03lu MHz processor.\n", + cpu_khz / 1000, cpu_khz % 1000); + + do_gettimeofday(&xtime); + last_irq = (((long long)xtime.tv_sec) * 1000000) + xtime.tv_usec; + + setup_irq(TIMER_IRQ, &irq_timer); + + /* + * Start ticker. Note that timing runs of wall clock, not virtual 'domain' + * time. This means that clock sshould run at the correct rate. For things + * like scheduling, it's not clear whether it matters which sort of time + * we use. XXX RN: unimplemented. + */ + + rdtscll(alarm); +#if 0 + alarm += (1000/HZ)*HYPERVISOR_shared_info->ticks_per_ms; + HYPERVISOR_shared_info->wall_timeout = alarm; + HYPERVISOR_shared_info->domain_timeout = ~0ULL; +#endif + clear_bit(_EVENT_TIMER, &HYPERVISOR_shared_info->events); +} diff --git a/xenolinux-2.4.22-sparse/arch/xeno/kernel/traps.c b/xenolinux-2.4.22-sparse/arch/xeno/kernel/traps.c new file mode 100644 index 0000000000..85c6acb8f0 --- /dev/null +++ b/xenolinux-2.4.22-sparse/arch/xeno/kernel/traps.c @@ -0,0 +1,600 @@ +/* + * linux/arch/i386/traps.c + * + * Copyright (C) 1991, 1992 Linus Torvalds + * + * Pentium III FXSR, SSE support + * Gareth Hughes <gareth@valinux.com>, May 2000 + */ + +/* + * 'Traps.c' handles hardware traps and faults after we have saved some + * state in 'asm.s'. + */ +#include <linux/config.h> +#include <linux/sched.h> +#include <linux/kernel.h> +#include <linux/string.h> +#include <linux/errno.h> +#include <linux/ptrace.h> +#include <linux/timer.h> +#include <linux/mm.h> +#include <linux/init.h> +#include <linux/delay.h> +#include <linux/spinlock.h> +#include <linux/interrupt.h> +#include <linux/highmem.h> + +#include <asm/system.h> +#include <asm/uaccess.h> +#include <asm/io.h> +#include <asm/atomic.h> +#include <asm/debugreg.h> +#include <asm/desc.h> +#include <asm/i387.h> + +#include <asm/smp.h> +#include <asm/pgalloc.h> + +#include <asm/hypervisor.h> + +#include <linux/irq.h> +#include <linux/module.h> + +asmlinkage int system_call(void); +asmlinkage void lcall7(void); +asmlinkage void lcall27(void); + +asmlinkage void divide_error(void); +asmlinkage void debug(void); +asmlinkage void int3(void); +asmlinkage void overflow(void); +asmlinkage void bounds(void); +asmlinkage void invalid_op(void); +asmlinkage void device_not_available(void); +asmlinkage void double_fault(void); +asmlinkage void coprocessor_segment_overrun(void); +asmlinkage void invalid_TSS(void); +asmlinkage void segment_not_present(void); +asmlinkage void stack_segment(void); +asmlinkage void general_protection(void); +asmlinkage void page_fault(void); +asmlinkage void coprocessor_error(void); +asmlinkage void simd_coprocessor_error(void); +asmlinkage void alignment_check(void); +asmlinkage void spurious_interrupt_bug(void); +asmlinkage void machine_check(void); + +int kstack_depth_to_print = 24; + + +/* + * If the address is either in the .text section of the + * kernel, or in the vmalloc'ed module regions, it *may* + * be the address of a calling routine + */ + +#ifdef CONFIG_MODULES + +extern struct module *module_list; +extern struct module kernel_module; + +static inline int kernel_text_address(unsigned long addr) +{ + int retval = 0; + struct module *mod; + + if (addr >= (unsigned long) &_stext && + addr <= (unsigned long) &_etext) + return 1; + + for (mod = module_list; mod != &kernel_module; mod = mod->next) { + /* mod_bound tests for addr being inside the vmalloc'ed + * module area. Of course it'd be better to test only + * for the .text subset... */ + if (mod_bound(addr, 0, mod)) { + retval = 1; + break; + } + } + + return retval; +} + +#else + +static inline int kernel_text_address(unsigned long addr) +{ + return (addr >= (unsigned long) &_stext && + addr <= (unsigned long) &_etext); +} + +#endif + +void show_trace(unsigned long * stack) +{ + int i; + unsigned long addr; + + if (!stack) + stack = (unsigned long*)&stack; + + printk("Call Trace: "); + i = 1; + while (((long) stack & (THREAD_SIZE-1)) != 0) { + addr = *stack++; + if (kernel_text_address(addr)) { + if (i && ((i % 6) == 0)) + printk("\n "); + printk("[<%08lx>] ", addr); + i++; + } + } + printk("\n"); +} + +void show_trace_task(struct task_struct *tsk) +{ + unsigned long esp = tsk->thread.esp; + + /* User space on another CPU? */ + if ((esp ^ (unsigned long)tsk) & (PAGE_MASK<<1)) + return; + show_trace((unsigned long *)esp); +} + +void show_stack(unsigned long * esp) +{ + unsigned long *stack; + int i; + + // debugging aid: "show_stack(NULL);" prints the + // back trace for this cpu. + + if(esp==NULL) + esp=(unsigned long*)&esp; + + stack = esp; + for(i=0; i < kstack_depth_to_print; i++) { + if (((long) stack & (THREAD_SIZE-1)) == 0) + break; + if (i && ((i % 8) == 0)) + printk("\n "); + printk("%08lx ", *stack++); + } + printk("\n"); + show_trace(esp); +} + +void show_registers(struct pt_regs *regs) +{ + int in_kernel = 1; + unsigned long esp; + unsigned short ss; + + esp = (unsigned long) (®s->esp); + ss = __KERNEL_DS; + if (regs->xcs & 2) { + in_kernel = 0; + esp = regs->esp; + ss = regs->xss & 0xffff; + } + printk("CPU: %d\nEIP: %04x:[<%08lx>] %s\nEFLAGS: %08lx\n", + smp_processor_id(), 0xffff & regs->xcs, regs->eip, print_tainted(), regs->eflags); + printk("eax: %08lx ebx: %08lx ecx: %08lx edx: %08lx\n", + regs->eax, regs->ebx, regs->ecx, regs->edx); + printk("esi: %08lx edi: %08lx ebp: %08lx esp: %08lx\n", + regs->esi, regs->edi, regs->ebp, esp); + printk("ds: %04x es: %04x ss: %04x\n", + regs->xds & 0xffff, regs->xes & 0xffff, ss); + printk("Process %s (pid: %d, stackpage=%08lx)", + current->comm, current->pid, 4096+(unsigned long)current); + /* + * When in-kernel, we also print out the stack and code at the + * time of the fault.. + */ + if (in_kernel) { + + printk("\nStack: "); + show_stack((unsigned long*)esp); + +#if 0 + { + int i; + printk("\nCode: "); + if(regs->eip < PAGE_OFFSET) + goto bad; + + for(i=0;i<20;i++) + { + unsigned char c; + if(__get_user(c, &((unsigned char*)regs->eip)[i])) { +bad: + printk(" Bad EIP value."); + break; + } + printk("%02x ", c); + } + } +#endif + } + printk("\n"); +} + +spinlock_t die_lock = SPIN_LOCK_UNLOCKED; + +void die(const char * str, struct pt_regs * regs, long err) +{ + console_verbose(); + spin_lock_irq(&die_lock); + bust_spinlocks(1); + printk("%s: %04lx\n", str, err & 0xffff); + show_registers(regs); + bust_spinlocks(0); + spin_unlock_irq(&die_lock); + do_exit(SIGSEGV); +} + +static inline void die_if_kernel(const char * str, struct pt_regs * regs, long err) +{ + if (!(2 & regs->xcs)) + die(str, regs, err); +} + + +static void inline do_trap(int trapnr, int signr, char *str, + struct pt_regs * regs, long error_code, + siginfo_t *info) +{ + if (!(regs->xcs & 2)) + goto kernel_trap; + + /*trap_signal:*/ { + struct task_struct *tsk = current; + tsk->thread.error_code = error_code; + tsk->thread.trap_no = trapnr; + if (info) + force_sig_info(signr, info, tsk); + else + force_sig(signr, tsk); + return; + } + + kernel_trap: { + unsigned long fixup = search_exception_table(regs->eip); + if (fixup) + regs->eip = fixup; + else + die(str, regs, error_code); + return; + } +} + +#define DO_ERROR(trapnr, signr, str, name) \ +asmlinkage void do_##name(struct pt_regs * regs, long error_code) \ +{ \ + do_trap(trapnr, signr, str, regs, error_code, NULL); \ +} + +#define DO_ERROR_INFO(trapnr, signr, str, name, sicode, siaddr) \ +asmlinkage void do_##name(struct pt_regs * regs, long error_code) \ +{ \ + siginfo_t info; \ + info.si_signo = signr; \ + info.si_errno = 0; \ + info.si_code = sicode; \ + info.si_addr = (void *)siaddr; \ + do_trap(trapnr, signr, str, regs, error_code, &info); \ +} + +DO_ERROR_INFO( 0, SIGFPE, "divide error", divide_error, FPE_INTDIV, regs->eip) +DO_ERROR( 3, SIGTRAP, "int3", int3) +DO_ERROR( 4, SIGSEGV, "overflow", overflow) +DO_ERROR( 5, SIGSEGV, "bounds", bounds) +DO_ERROR_INFO( 6, SIGILL, "invalid operand", invalid_op, ILL_ILLOPN, regs->eip) +DO_ERROR( 7, SIGSEGV, "device not available", device_not_available) +DO_ERROR( 8, SIGSEGV, "double fault", double_fault) +DO_ERROR( 9, SIGFPE, "coprocessor segment overrun", coprocessor_segment_overrun) +DO_ERROR(10, SIGSEGV, "invalid TSS", invalid_TSS) +DO_ERROR(11, SIGBUS, "segment not present", segment_not_present) +DO_ERROR(12, SIGBUS, "stack segment", stack_segment) +DO_ERROR_INFO(17, SIGBUS, "alignment check", alignment_check, BUS_ADRALN, 0) +DO_ERROR(18, SIGBUS, "machine check", machine_check) + +asmlinkage void do_general_protection(struct pt_regs * regs, long error_code) +{ + if (!(regs->xcs & 2)) + goto gp_in_kernel; + + current->thread.error_code = error_code; + current->thread.trap_no = 13; + force_sig(SIGSEGV, current); + return; + +gp_in_kernel: + { + unsigned long fixup; + fixup = search_exception_table(regs->eip); + if (fixup) { + regs->eip = fixup; + return; + } + die("general protection fault", regs, error_code); + } +} + + +asmlinkage void do_debug(struct pt_regs * regs, long error_code) +{ + unsigned int condition; + struct task_struct *tsk = current; + siginfo_t info; + + condition = HYPERVISOR_get_debugreg(6); + + /* Mask out spurious debug traps due to lazy DR7 setting */ + if (condition & (DR_TRAP0|DR_TRAP1|DR_TRAP2|DR_TRAP3)) { + if (!tsk->thread.debugreg[7]) + goto clear_dr7; + } + + /* Save debug status register where ptrace can see it */ + tsk->thread.debugreg[6] = condition; + + /* Mask out spurious TF errors due to lazy TF clearing */ + if (condition & DR_STEP) { + /* + * The TF error should be masked out only if the current + * process is not traced and if the TRAP flag has been set + * previously by a tracing process (condition detected by + * the PT_DTRACE flag); remember that the i386 TRAP flag + * can be modified by the process itself in user mode, + * allowing programs to debug themselves without the ptrace() + * interface. + */ + if ((tsk->ptrace & (PT_DTRACE|PT_PTRACED)) == PT_DTRACE) + goto clear_TF; + } + + /* Ok, finally something we can handle */ + tsk->thread.trap_no = 1; + tsk->thread.error_code = error_code; + info.si_signo = SIGTRAP; + info.si_errno = 0; + info.si_code = TRAP_BRKPT; + + /* If this is a kernel mode trap, save the user PC on entry to + * the kernel, that's what the debugger can make sense of. + */ + info.si_addr = ((regs->xcs & 2) == 0) ? (void *)tsk->thread.eip : + (void *)regs->eip; + force_sig_info(SIGTRAP, &info, tsk); + + /* Disable additional traps. They'll be re-enabled when + * the signal is delivered. + */ + clear_dr7: + HYPERVISOR_set_debugreg(7, 0); + return; + + clear_TF: + regs->eflags &= ~TF_MASK; + return; +} + + +/* + * Note that we play around with the 'TS' bit in an attempt to get + * the correct behaviour even in the presence of the asynchronous + * IRQ13 behaviour + */ +void math_error(void *eip) +{ + struct task_struct * task; + siginfo_t info; + unsigned short cwd, swd; + + /* + * Save the info for the exception handler and clear the error. + */ + task = current; + save_init_fpu(task); + task->thread.trap_no = 16; + task->thread.error_code = 0; + info.si_signo = SIGFPE; + info.si_errno = 0; + info.si_code = __SI_FAULT; + info.si_addr = eip; + /* + * (~cwd & swd) will mask out exceptions that are not set to unmasked + * status. 0x3f is the exception bits in these regs, 0x200 is the + * C1 reg you need in case of a stack fault, 0x040 is the stack + * fault bit. We should only be taking one exception at a time, + * so if this combination doesn't produce any single exception, + * then we have a bad program that isn't syncronizing its FPU usage + * and it will suffer the consequences since we won't be able to + * fully reproduce the context of the exception + */ + cwd = get_fpu_cwd(task); + swd = get_fpu_swd(task); + switch (((~cwd) & swd & 0x3f) | (swd & 0x240)) { + case 0x000: + default: + break; + case 0x001: /* Invalid Op */ + case 0x040: /* Stack Fault */ + case 0x240: /* Stack Fault | Direction */ + info.si_code = FPE_FLTINV; + break; + case 0x002: /* Denormalize */ + case 0x010: /* Underflow */ + info.si_code = FPE_FLTUND; + break; + case 0x004: /* Zero Divide */ + info.si_code = FPE_FLTDIV; + break; + case 0x008: /* Overflow */ + info.si_code = FPE_FLTOVF; + break; + case 0x020: /* Precision */ + info.si_code = FPE_FLTRES; + break; + } + force_sig_info(SIGFPE, &info, task); +} + +asmlinkage void do_coprocessor_error(struct pt_regs * regs, long error_code) +{ + ignore_irq13 = 1; + math_error((void *)regs->eip); +} + +void simd_math_error(void *eip) +{ + struct task_struct * task; + siginfo_t info; + unsigned short mxcsr; + + /* + * Save the info for the exception handler and clear the error. + */ + task = current; + save_init_fpu(task); + task->thread.trap_no = 19; + task->thread.error_code = 0; + info.si_signo = SIGFPE; + info.si_errno = 0; + info.si_code = __SI_FAULT; + info.si_addr = eip; + /* + * The SIMD FPU exceptions are handled a little differently, as there + * is only a single status/control register. Thus, to determine which + * unmasked exception was caught we must mask the exception mask bits + * at 0x1f80, and then use these to mask the exception bits at 0x3f. + */ + mxcsr = get_fpu_mxcsr(task); + switch (~((mxcsr & 0x1f80) >> 7) & (mxcsr & 0x3f)) { + case 0x000: + default: + break; + case 0x001: /* Invalid Op */ + info.si_code = FPE_FLTINV; + break; + case 0x002: /* Denormalize */ + case 0x010: /* Underflow */ + info.si_code = FPE_FLTUND; + break; + case 0x004: /* Zero Divide */ + info.si_code = FPE_FLTDIV; + break; + case 0x008: /* Overflow */ + info.si_code = FPE_FLTOVF; + break; + case 0x020: /* Precision */ + info.si_code = FPE_FLTRES; + break; + } + force_sig_info(SIGFPE, &info, task); +} + +asmlinkage void do_simd_coprocessor_error(struct pt_regs * regs, + long error_code) +{ + if (cpu_has_xmm) { + /* Handle SIMD FPU exceptions on PIII+ processors. */ + ignore_irq13 = 1; + simd_math_error((void *)regs->eip); + } else { + die_if_kernel("cache flush denied", regs, error_code); + current->thread.trap_no = 19; + current->thread.error_code = error_code; + force_sig(SIGSEGV, current); + } +} + +asmlinkage void do_spurious_interrupt_bug(struct pt_regs * regs, + long error_code) +{ +} + +/* + * 'math_state_restore()' saves the current math information in the + * old math state array, and gets the new ones from the current task + * + * Careful.. There are problems with IBM-designed IRQ13 behaviour. + * Don't touch unless you *really* know how it works. + */ +asmlinkage void math_state_restore(struct pt_regs regs) +{ + if (current->used_math) { + restore_fpu(current); + } else { + init_fpu(); + } + current->flags |= PF_USEDFPU; /* So we fnsave on switch_to() */ +} + + +#define _set_gate(gate_addr,type,dpl,addr) \ +do { \ + int __d0, __d1; \ + __asm__ __volatile__ ("movw %%dx,%%ax\n\t" \ + "movw %4,%%dx\n\t" \ + "movl %%eax,%0\n\t" \ + "movl %%edx,%1" \ + :"=m" (*((long *) (gate_addr))), \ + "=m" (*(1+(long *) (gate_addr))), "=&a" (__d0), "=&d" (__d1) \ + :"i" ((short) (0x8000+(dpl<<13)+(type<<8))), \ + "3" ((char *) (addr)),"2" (__KERNEL_CS << 16)); \ +} while (0) + +static void __init set_call_gate(void *a, void *addr) +{ + _set_gate(a,12,3,addr); +} + + +/* NB. All these are "trap gates" (i.e. events_mask isn't cleared). */ +static trap_info_t trap_table[] = { + { 0, 0, __KERNEL_CS, (unsigned long)divide_error }, + { 1, 0, __KERNEL_CS, (unsigned long)debug }, + { 3, 3, __KERNEL_CS, (unsigned long)int3 }, + { 4, 3, __KERNEL_CS, (unsigned long)overflow }, + { 5, 3, __KERNEL_CS, (unsigned long)bounds }, + { 6, 0, __KERNEL_CS, (unsigned long)invalid_op }, + { 7, 0, __KERNEL_CS, (unsigned long)device_not_available }, + { 8, 0, __KERNEL_CS, (unsigned long)double_fault }, + { 9, 0, __KERNEL_CS, (unsigned long)coprocessor_segment_overrun }, + { 10, 0, __KERNEL_CS, (unsigned long)invalid_TSS }, + { 11, 0, __KERNEL_CS, (unsigned long)segment_not_present }, + { 12, 0, __KERNEL_CS, (unsigned long)stack_segment }, + { 13, 0, __KERNEL_CS, (unsigned long)general_protection }, + { 14, 0, __KERNEL_CS, (unsigned long)page_fault }, + { 15, 0, __KERNEL_CS, (unsigned long)spurious_interrupt_bug }, + { 16, 0, __KERNEL_CS, (unsigned long)coprocessor_error }, + { 17, 0, __KERNEL_CS, (unsigned long)alignment_check }, + { 18, 0, __KERNEL_CS, (unsigned long)machine_check }, + { 19, 0, __KERNEL_CS, (unsigned long)simd_coprocessor_error }, + { SYSCALL_VECTOR, + 3, __KERNEL_CS, (unsigned long)system_call }, + { 0, 0, 0, 0 } +}; + + + +void __init trap_init(void) +{ + HYPERVISOR_set_trap_table(trap_table); + HYPERVISOR_set_fast_trap(SYSCALL_VECTOR); + + /* + * The default LDT is a single-entry callgate to lcall7 for iBCS and a + * callgate to lcall27 for Solaris/x86 binaries. + */ + clear_page(&default_ldt[0]); + set_call_gate(&default_ldt[0],lcall7); + set_call_gate(&default_ldt[4],lcall27); + __make_page_readonly(&default_ldt[0]); + + cpu_init(); +} diff --git a/xenolinux-2.4.22-sparse/arch/xeno/lib/Makefile b/xenolinux-2.4.22-sparse/arch/xeno/lib/Makefile new file mode 100644 index 0000000000..2224f0312c --- /dev/null +++ b/xenolinux-2.4.22-sparse/arch/xeno/lib/Makefile @@ -0,0 +1,15 @@ + +.S.o: + $(CC) $(AFLAGS) -c $< -o $*.o + +L_TARGET = lib.a + +obj-y = checksum.o old-checksum.o delay.o \ + usercopy.o getuser.o \ + memcpy.o strstr.o + +obj-$(CONFIG_X86_USE_3DNOW) += mmx.o +obj-$(CONFIG_HAVE_DEC_LOCK) += dec_and_lock.o +obj-$(CONFIG_DEBUG_IOVIRT) += iodebug.o + +include $(TOPDIR)/Rules.make diff --git a/xenolinux-2.4.22-sparse/arch/xeno/lib/delay.c b/xenolinux-2.4.22-sparse/arch/xeno/lib/delay.c new file mode 100644 index 0000000000..0035bed074 --- /dev/null +++ b/xenolinux-2.4.22-sparse/arch/xeno/lib/delay.c @@ -0,0 +1,52 @@ +/* + * Precise Delay Loops for i386 + * + * Copyright (C) 1993 Linus Torvalds + * Copyright (C) 1997 Martin Mares <mj@atrey.karlin.mff.cuni.cz> + * + * The __delay function must _NOT_ be inlined as its execution time + * depends wildly on alignment on many x86 processors. The additional + * jump magic is needed to get the timing stable on all the CPU's + * we have to worry about. + */ + +#include <linux/config.h> +#include <linux/sched.h> +#include <linux/delay.h> +#include <asm/processor.h> +#include <asm/delay.h> + +#ifdef CONFIG_SMP +#include <asm/smp.h> +#endif + +void __delay(unsigned long loops) +{ + unsigned long bclock, now; + + rdtscl(bclock); + do + { + rep_nop(); + rdtscl(now); + } while ((now-bclock) < loops); +} + +inline void __const_udelay(unsigned long xloops) +{ + int d0; + __asm__("mull %0" + :"=d" (xloops), "=&a" (d0) + :"1" (xloops),"0" (current_cpu_data.loops_per_jiffy)); + __delay(xloops * HZ); +} + +void __udelay(unsigned long usecs) +{ + __const_udelay(usecs * 0x000010c6); /* 2**32 / 1000000 */ +} + +void __ndelay(unsigned long nsecs) +{ + __const_udelay(nsecs * 0x00005); /* 2**32 / 1000000000 (rounded up) */ +} diff --git a/xenolinux-2.4.22-sparse/arch/xeno/mm/Makefile b/xenolinux-2.4.22-sparse/arch/xeno/mm/Makefile new file mode 100644 index 0000000000..d0d16114b6 --- /dev/null +++ b/xenolinux-2.4.22-sparse/arch/xeno/mm/Makefile @@ -0,0 +1,16 @@ +# +# Makefile for the linux i386-specific parts of the memory manager. +# +# Note! Dependencies are done automagically by 'make dep', which also +# removes any old dependencies. DON'T put your own dependencies here +# unless it's something special (ie not a .c file). +# +# Note 2! The CFLAGS definition is now in the main makefile... + +O_TARGET := mm.o + +obj-y := init.o fault.o extable.o pageattr.o hypervisor.o ioremap.o + +export-objs := pageattr.o + +include $(TOPDIR)/Rules.make diff --git a/xenolinux-2.4.22-sparse/arch/xeno/mm/fault.c b/xenolinux-2.4.22-sparse/arch/xeno/mm/fault.c new file mode 100644 index 0000000000..d4d24d2085 --- /dev/null +++ b/xenolinux-2.4.22-sparse/arch/xeno/mm/fault.c @@ -0,0 +1,325 @@ +/* + * linux/arch/i386/mm/fault.c + * + * Copyright (C) 1995 Linus Torvalds + */ + +#include <linux/signal.h> +#include <linux/sched.h> +#include <linux/kernel.h> +#include <linux/errno.h> +#include <linux/string.h> +#include <linux/types.h> +#include <linux/ptrace.h> +#include <linux/mman.h> +#include <linux/mm.h> +#include <linux/smp.h> +#include <linux/smp_lock.h> +#include <linux/interrupt.h> +#include <linux/init.h> +#include <linux/tty.h> +#include <linux/vt_kern.h> /* For unblank_screen() */ + +#include <asm/system.h> +#include <asm/uaccess.h> +#include <asm/pgalloc.h> +#include <asm/hardirq.h> + +extern void die(const char *,struct pt_regs *,long); + +pgd_t *cur_pgd; + +extern spinlock_t timerlist_lock; + +/* + * Unlock any spinlocks which will prevent us from getting the + * message out (timerlist_lock is acquired through the + * console unblank code) + */ +void bust_spinlocks(int yes) +{ + spin_lock_init(&timerlist_lock); + if (yes) { + oops_in_progress = 1; + } else { + int loglevel_save = console_loglevel; +#ifdef CONFIG_VT + unblank_screen(); +#endif + oops_in_progress = 0; + /* + * OK, the message is on the console. Now we call printk() + * without oops_in_progress set so that printk will give klogd + * a poke. Hold onto your hats... + */ + console_loglevel = 15; /* NMI oopser may have shut the console up */ + printk(" "); + console_loglevel = loglevel_save; + } +} + +/* + * This routine handles page faults. It determines the address, + * and the problem, and then passes it off to one of the appropriate + * routines. + * + * error_code: + * bit 0 == 0 means no page found, 1 means protection fault + * bit 1 == 0 means read, 1 means write + * bit 2 == 0 means kernel, 1 means user-mode + */ +asmlinkage void do_page_fault(struct pt_regs *regs, + unsigned long error_code, + unsigned long address) +{ + struct task_struct *tsk = current; + struct mm_struct *mm; + struct vm_area_struct * vma; + unsigned long page; + unsigned long fixup; + int write; + siginfo_t info; + + /* Set the "privileged fault" bit to something sane. */ + error_code &= 3; + error_code |= (regs->xcs & 2) << 1; + +#if PT_UPDATE_DEBUG > 0 + if ( (error_code == 0) && (address >= TASK_SIZE) ) + { + unsigned long paddr = __pa(address); + int i; + for ( i = 0; i < pt_update_queue_idx; i++ ) + { + if ( update_debug_queue[i].ptr == paddr ) + { + printk("XXX now(EIP=%08lx:ptr=%08lx) " + "then(%s/%d:p/v=%08lx/%08lx)\n", + regs->eip, address, + update_debug_queue[i].file, + update_debug_queue[i].line, + update_debug_queue[i].ptr, + update_debug_queue[i].val); + } + } + } +#endif + + if ( flush_page_update_queue() != 0 ) return; + + /* + * We fault-in kernel-space virtual memory on-demand. The + * 'reference' page table is init_mm.pgd. + * + * NOTE! We MUST NOT take any locks for this case. We may + * be in an interrupt or a critical region, and should + * only copy the information from the master page table, + * nothing more. + * + * This verifies that the fault happens in kernel space + * (error_code & 4) == 0, and that the fault was not a + * protection error (error_code & 1) == 0. + */ + if (address >= TASK_SIZE && !(error_code & 5)) + goto vmalloc_fault; + + mm = tsk->mm; + info.si_code = SEGV_MAPERR; + + /* + * If we're in an interrupt or have no user + * context, we must not take the fault.. + */ + if (in_interrupt() || !mm) + goto no_context; + + down_read(&mm->mmap_sem); + + vma = find_vma(mm, address); + if (!vma) + goto bad_area; + if (vma->vm_start <= address) + goto good_area; + if (!(vma->vm_flags & VM_GROWSDOWN)) + goto bad_area; + if (error_code & 4) { + /* + * accessing the stack below %esp is always a bug. + * The "+ 32" is there due to some instructions (like + * pusha) doing post-decrement on the stack and that + * doesn't show up until later.. + */ + if (address + 32 < regs->esp) + goto bad_area; + } + if (expand_stack(vma, address)) + goto bad_area; +/* + * Ok, we have a good vm_area for this memory access, so + * we can handle it.. + */ +good_area: + info.si_code = SEGV_ACCERR; + write = 0; + switch (error_code & 3) { + default: /* 3: write, present */ + /* fall through */ + case 2: /* write, not present */ + if (!(vma->vm_flags & VM_WRITE)) + goto bad_area; + write++; + break; + case 1: /* read, present */ + goto bad_area; + case 0: /* read, not present */ + if (!(vma->vm_flags & (VM_READ | VM_EXEC))) + goto bad_area; + } + + survive: + /* + * If for any reason at all we couldn't handle the fault, + * make sure we exit gracefully rather than endlessly redo + * the fault. + */ + switch (handle_mm_fault(mm, vma, address, write)) { + case 1: + tsk->min_flt++; + break; + case 2: + tsk->maj_flt++; + break; + case 0: + goto do_sigbus; + default: + goto out_of_memory; + } + + up_read(&mm->mmap_sem); + return; + +/* + * Something tried to access memory that isn't in our memory map.. + * Fix it, but check if it's kernel or user first.. + */ +bad_area: + up_read(&mm->mmap_sem); + + /* User mode accesses just cause a SIGSEGV */ + if (error_code & 4) { + tsk->thread.cr2 = address; + tsk->thread.error_code = error_code; + tsk->thread.trap_no = 14; + info.si_signo = SIGSEGV; + info.si_errno = 0; + /* info.si_code has been set above */ + info.si_addr = (void *)address; + force_sig_info(SIGSEGV, &info, tsk); + return; + } + +no_context: + /* Are we prepared to handle this kernel fault? */ + if ((fixup = search_exception_table(regs->eip)) != 0) { + regs->eip = fixup; + return; + } + +/* + * Oops. The kernel tried to access some bad page. We'll have to + * terminate things with extreme prejudice. + */ + + bust_spinlocks(1); + + if (address < PAGE_SIZE) + printk(KERN_ALERT "Unable to handle kernel NULL pointer dereference"); + else + printk(KERN_ALERT "Unable to handle kernel paging request"); + printk(" at virtual address %08lx\n",address); + printk(" printing eip:\n"); + printk("%08lx\n", regs->eip); + page = ((unsigned long *) cur_pgd)[address >> 22]; + printk(KERN_ALERT "*pde=%08lx(%08lx)\n", page, machine_to_phys(page)); + if (page & 1) { + page &= PAGE_MASK; + address &= 0x003ff000; + page = machine_to_phys(page); + page = ((unsigned long *) __va(page))[address >> PAGE_SHIFT]; + printk(KERN_ALERT "*pte=%08lx(%08lx)\n", page, + machine_to_phys(page)); + } + die("Oops", regs, error_code); + bust_spinlocks(0); + do_exit(SIGKILL); + +/* + * We ran out of memory, or some other thing happened to us that made + * us unable to handle the page fault gracefully. + */ +out_of_memory: + if (tsk->pid == 1) { + yield(); + goto survive; + } + up_read(&mm->mmap_sem); + printk("VM: killing process %s\n", tsk->comm); + if (error_code & 4) + do_exit(SIGKILL); + goto no_context; + +do_sigbus: + up_read(&mm->mmap_sem); + + /* + * Send a sigbus, regardless of whether we were in kernel + * or user mode. + */ + tsk->thread.cr2 = address; + tsk->thread.error_code = error_code; + tsk->thread.trap_no = 14; + info.si_signo = SIGBUS; + info.si_errno = 0; + info.si_code = BUS_ADRERR; + info.si_addr = (void *)address; + force_sig_info(SIGBUS, &info, tsk); + + /* Kernel mode? Handle exceptions or die */ + if (!(error_code & 4)) + goto no_context; + return; + +vmalloc_fault: + { + /* + * Synchronize this task's top level page-table + * with the 'reference' page table. + * + * Do _not_ use "tsk" here. We might be inside + * an interrupt in the middle of a task switch.. + */ + int offset = __pgd_offset(address); + pgd_t *pgd, *pgd_k; + pmd_t *pmd, *pmd_k; + pte_t *pte_k; + + pgd = offset + cur_pgd; + pgd_k = init_mm.pgd + offset; + + if (!pgd_present(*pgd_k)) + goto no_context; + set_pgd(pgd, *pgd_k); + + pmd = pmd_offset(pgd, address); + pmd_k = pmd_offset(pgd_k, address); + if (!pmd_present(*pmd_k)) + goto no_context; + set_pmd(pmd, *pmd_k); + XENO_flush_page_update_queue(); /* flush PMD update */ + + pte_k = pte_offset(pmd_k, address); + if (!pte_present(*pte_k)) + goto no_context; + return; + } +} diff --git a/xenolinux-2.4.22-sparse/arch/xeno/mm/hypervisor.c b/xenolinux-2.4.22-sparse/arch/xeno/mm/hypervisor.c new file mode 100644 index 0000000000..1935595307 --- /dev/null +++ b/xenolinux-2.4.22-sparse/arch/xeno/mm/hypervisor.c @@ -0,0 +1,238 @@ +/****************************************************************************** + * xeno/mm/hypervisor.c + * + * Update page tables via the hypervisor. + * + * Copyright (c) 2002, K A Fraser + */ + +#include <linux/config.h> +#include <linux/sched.h> +#include <asm/hypervisor.h> +#include <asm/page.h> +#include <asm/pgtable.h> +#include <asm/multicall.h> + +/* + * This suffices to protect us if we ever move to SMP domains. + * Further, it protects us against interrupts. At the very least, this is + * required for the network driver which flushes the update queue before + * pushing new receive buffers. + */ +static spinlock_t update_lock = SPIN_LOCK_UNLOCKED; + +#define QUEUE_SIZE 2048 +static page_update_request_t update_queue[QUEUE_SIZE]; +unsigned int pt_update_queue_idx = 0; +#define idx pt_update_queue_idx + +#if PT_UPDATE_DEBUG > 0 +page_update_debug_t update_debug_queue[QUEUE_SIZE] = {{0}}; +#undef queue_l1_entry_update +#undef queue_l2_entry_update +static void DEBUG_allow_pt_reads(void) +{ + pte_t *pte; + page_update_request_t update; + int i; + for ( i = idx-1; i >= 0; i-- ) + { + pte = update_debug_queue[i].ptep; + if ( pte == NULL ) continue; + update_debug_queue[i].ptep = NULL; + update.ptr = phys_to_machine(__pa(pte)); + update.val = update_debug_queue[i].pteval; + HYPERVISOR_pt_update(&update, 1); + } +} +static void DEBUG_disallow_pt_read(unsigned long pa) +{ + pte_t *pte; + pmd_t *pmd; + pgd_t *pgd; + unsigned long pteval; + /* + * We may fault because of an already outstanding update. + * That's okay -- it'll get fixed up in the fault handler. + */ + page_update_request_t update; + unsigned long va = (unsigned long)__va(pa); + pgd = pgd_offset_k(va); + pmd = pmd_offset(pgd, va); + pte = pte_offset(pmd, va); + update.ptr = phys_to_machine(__pa(pte)); + pteval = *(unsigned long *)pte; + update.val = pteval & ~_PAGE_PRESENT; + HYPERVISOR_pt_update(&update, 1); + update_debug_queue[idx].ptep = pte; + update_debug_queue[idx].pteval = pteval; +} +#endif + +#if PT_UPDATE_DEBUG > 1 +#undef queue_pt_switch +#undef queue_tlb_flush +#undef queue_invlpg +#undef queue_pgd_pin +#undef queue_pgd_unpin +#undef queue_pte_pin +#undef queue_pte_unpin +#endif + + +/* + * MULTICALL_flush_page_update_queue: + * This is a version of the flush which queues as part of a multicall. + */ +void MULTICALL_flush_page_update_queue(void) +{ + unsigned long flags; + spin_lock_irqsave(&update_lock, flags); + if ( idx != 0 ) + { +#if PT_UPDATE_DEBUG > 1 + printk("Flushing %d entries from pt update queue\n", idx); +#endif +#if PT_UPDATE_DEBUG > 0 + DEBUG_allow_pt_reads(); +#endif + queue_multicall2(__HYPERVISOR_pt_update, (unsigned long)update_queue, idx); + idx = 0; + } + spin_unlock_irqrestore(&update_lock, flags); +} + +static inline void __flush_page_update_queue(void) +{ +#if PT_UPDATE_DEBUG > 1 + printk("Flushing %d entries from pt update queue\n", idx); +#endif +#if PT_UPDATE_DEBUG > 0 + DEBUG_allow_pt_reads(); +#endif + HYPERVISOR_pt_update(update_queue, idx); + idx = 0; +} + +void _flush_page_update_queue(void) +{ + unsigned long flags; + spin_lock_irqsave(&update_lock, flags); + if ( idx != 0 ) __flush_page_update_queue(); + spin_unlock_irqrestore(&update_lock, flags); +} + +static inline void increment_index(void) +{ + idx++; + if ( unlikely(idx == QUEUE_SIZE) ) __flush_page_update_queue(); +} + +void queue_l1_entry_update(unsigned long ptr, unsigned long val) +{ + unsigned long flags; + spin_lock_irqsave(&update_lock, flags); +#if PT_UPDATE_DEBUG > 0 + DEBUG_disallow_pt_read(ptr); +#endif + update_queue[idx].ptr = phys_to_machine(ptr); + update_queue[idx].val = val; + increment_index(); + spin_unlock_irqrestore(&update_lock, flags); +} + +void queue_l2_entry_update(unsigned long ptr, unsigned long val) +{ + unsigned long flags; + spin_lock_irqsave(&update_lock, flags); + update_queue[idx].ptr = phys_to_machine(ptr); + update_queue[idx].val = val; + increment_index(); + spin_unlock_irqrestore(&update_lock, flags); +} + +void queue_pt_switch(unsigned long ptr) +{ + unsigned long flags; + spin_lock_irqsave(&update_lock, flags); + update_queue[idx].ptr = phys_to_machine(ptr); + update_queue[idx].ptr |= PGREQ_EXTENDED_COMMAND; + update_queue[idx].val = PGEXT_NEW_BASEPTR; + increment_index(); + spin_unlock_irqrestore(&update_lock, flags); +} + +void queue_tlb_flush(void) +{ + unsigned long flags; + spin_lock_irqsave(&update_lock, flags); + update_queue[idx].ptr = PGREQ_EXTENDED_COMMAND; + update_queue[idx].val = PGEXT_TLB_FLUSH; + increment_index(); + spin_unlock_irqrestore(&update_lock, flags); +} + +void queue_invlpg(unsigned long ptr) +{ + unsigned long flags; + spin_lock_irqsave(&update_lock, flags); + update_queue[idx].ptr = PGREQ_EXTENDED_COMMAND; + update_queue[idx].val = ptr & PAGE_MASK; + update_queue[idx].val |= PGEXT_INVLPG; + increment_index(); + spin_unlock_irqrestore(&update_lock, flags); +} + +void queue_pgd_pin(unsigned long ptr) +{ + unsigned long flags; + spin_lock_irqsave(&update_lock, flags); + update_queue[idx].ptr = phys_to_machine(ptr); + update_queue[idx].ptr |= PGREQ_EXTENDED_COMMAND; + update_queue[idx].val = PGEXT_PIN_L2_TABLE; + increment_index(); + spin_unlock_irqrestore(&update_lock, flags); +} + +void queue_pgd_unpin(unsigned long ptr) +{ + unsigned long flags; + spin_lock_irqsave(&update_lock, flags); + update_queue[idx].ptr = phys_to_machine(ptr); + update_queue[idx].ptr |= PGREQ_EXTENDED_COMMAND; + update_queue[idx].val = PGEXT_UNPIN_TABLE; + increment_index(); + spin_unlock_irqrestore(&update_lock, flags); +} + +void queue_pte_pin(unsigned long ptr) +{ + unsigned long flags; + spin_lock_irqsave(&update_lock, flags); + update_queue[idx].ptr = phys_to_machine(ptr); + update_queue[idx].ptr |= PGREQ_EXTENDED_COMMAND; + update_queue[idx].val = PGEXT_PIN_L1_TABLE; + increment_index(); + spin_unlock_irqrestore(&update_lock, flags); +} + +void queue_pte_unpin(unsigned long ptr) +{ + unsigned long flags; + spin_lock_irqsave(&update_lock, flags); + update_queue[idx].ptr = phys_to_machine(ptr); + update_queue[idx].ptr |= PGREQ_EXTENDED_COMMAND; + update_queue[idx].val = PGEXT_UNPIN_TABLE; + increment_index(); + spin_unlock_irqrestore(&update_lock, flags); +} + +void queue_set_ldt(unsigned long ptr, unsigned long len) +{ + unsigned long flags; + spin_lock_irqsave(&update_lock, flags); + update_queue[idx].ptr = PGREQ_EXTENDED_COMMAND | ptr; + update_queue[idx].val = PGEXT_SET_LDT | (len << PGEXT_CMD_SHIFT); + increment_index(); + spin_unlock_irqrestore(&update_lock, flags); +} diff --git a/xenolinux-2.4.22-sparse/arch/xeno/mm/init.c b/xenolinux-2.4.22-sparse/arch/xeno/mm/init.c new file mode 100644 index 0000000000..acce1fbfd7 --- /dev/null +++ b/xenolinux-2.4.22-sparse/arch/xeno/mm/init.c @@ -0,0 +1,394 @@ +/* + * linux/arch/i386/mm/init.c + * + * Copyright (C) 1995 Linus Torvalds + * + * Support of BIGMEM added by Gerhard Wichert, Siemens AG, July 1999 + */ + +#include <linux/config.h> +#include <linux/signal.h> +#include <linux/sched.h> +#include <linux/kernel.h> +#include <linux/errno.h> +#include <linux/string.h> +#include <linux/types.h> +#include <linux/ptrace.h> +#include <linux/mman.h> +#include <linux/mm.h> +#include <linux/swap.h> +#include <linux/smp.h> +#include <linux/init.h> +#ifdef CONFIG_BLK_DEV_INITRD +#include <linux/blk.h> +#endif +#include <linux/highmem.h> +#include <linux/pagemap.h> +#include <linux/bootmem.h> +#include <linux/slab.h> + +#include <asm/processor.h> +#include <asm/system.h> +#include <asm/uaccess.h> +#include <asm/pgtable.h> +#include <asm/pgalloc.h> +#include <asm/dma.h> +#include <asm/apic.h> +#include <asm/tlb.h> + +mmu_gather_t mmu_gathers[NR_CPUS]; +unsigned long highstart_pfn, highend_pfn; +static unsigned long totalram_pages; +static unsigned long totalhigh_pages; + +int do_check_pgt_cache(int low, int high) +{ + int freed = 0; + if(pgtable_cache_size > high) { + do { + if (!QUICKLIST_EMPTY(pgd_quicklist)) { + free_pgd_slow(get_pgd_fast()); + freed++; + } + if (!QUICKLIST_EMPTY(pte_quicklist)) { + pte_free_slow(pte_alloc_one_fast(NULL, 0)); + freed++; + } + } while(pgtable_cache_size > low); + } + return freed; +} + +void show_mem(void) +{ + int i, total = 0, reserved = 0; + int shared = 0, cached = 0; + int highmem = 0; + + printk("Mem-info:\n"); + show_free_areas(); + printk("Free swap: %6dkB\n",nr_swap_pages<<(PAGE_SHIFT-10)); + i = max_mapnr; + while (i-- > 0) { + total++; + if (PageHighMem(mem_map+i)) + highmem++; + if (PageReserved(mem_map+i)) + reserved++; + else if (PageSwapCache(mem_map+i)) + cached++; + else if (page_count(mem_map+i)) + shared += page_count(mem_map+i) - 1; + } + printk("%d pages of RAM\n", total); + printk("%d pages of HIGHMEM\n",highmem); + printk("%d reserved pages\n",reserved); + printk("%d pages shared\n",shared); + printk("%d pages swap cached\n",cached); + printk("%ld pages in page table cache\n",pgtable_cache_size); + show_buffers(); +} + +/* References to section boundaries */ + +extern char _text, _etext, _edata, __bss_start, _end; +extern char __init_begin, __init_end; + +static inline void set_pte_phys (unsigned long vaddr, + unsigned long phys, pgprot_t flags) +{ + pgprot_t prot; + pgd_t *pgd; + pmd_t *pmd; + pte_t *pte; + + pgd = init_mm.pgd + __pgd_offset(vaddr); + if (pgd_none(*pgd)) { + printk("PAE BUG #00!\n"); + return; + } + pmd = pmd_offset(pgd, vaddr); + if (pmd_none(*pmd)) { + printk("PAE BUG #01!\n"); + return; + } + pte = pte_offset(pmd, vaddr); + + if (pte_val(*pte)) + pte_ERROR(*pte); + + pgprot_val(prot) = pgprot_val(PAGE_KERNEL) | pgprot_val(flags); + + /* We queue directly, avoiding hidden phys->machine translation. */ + queue_l1_entry_update(__pa(pte), phys | pgprot_val(prot)); + + /* + * It's enough to flush this one mapping. + * (PGE mappings get flushed as well) + */ + __flush_tlb_one(vaddr); +} + +void __set_fixmap (enum fixed_addresses idx, unsigned long phys, + pgprot_t flags) +{ + unsigned long address = __fix_to_virt(idx); + + if (idx >= __end_of_fixed_addresses) { + printk("Invalid __set_fixmap\n"); + return; + } + set_pte_phys(address, phys, flags); +} + +static void __init fixrange_init (unsigned long start, + unsigned long end, pgd_t *pgd_base) +{ + pgd_t *pgd, *kpgd; + pmd_t *pmd, *kpmd; + pte_t *pte, *kpte; + int i, j; + unsigned long vaddr; + + vaddr = start; + i = __pgd_offset(vaddr); + j = __pmd_offset(vaddr); + pgd = pgd_base + i; + + for ( ; (i < PTRS_PER_PGD) && (vaddr != end); pgd++, i++) { +#if CONFIG_X86_PAE + if (pgd_none(*pgd)) { + pmd = (pmd_t *) alloc_bootmem_low_pages(PAGE_SIZE); + set_pgd(pgd, __pgd(__pa(pmd) + 0x1)); + if (pmd != pmd_offset(pgd, 0)) + printk("PAE BUG #02!\n"); + } + pmd = pmd_offset(pgd, vaddr); +#else + pmd = (pmd_t *)pgd; +#endif + for (; (j < PTRS_PER_PMD) && (vaddr != end); pmd++, j++) { + if (pmd_none(*pmd)) { + pte = (pte_t *) alloc_bootmem_low_pages(PAGE_SIZE); + clear_page(pte); + kpgd = pgd_offset_k((unsigned long)pte); + kpmd = pmd_offset(kpgd, (unsigned long)pte); + kpte = pte_offset(kpmd, (unsigned long)pte); + queue_l1_entry_update(__pa(kpte), + (*(unsigned long *)kpte)&~_PAGE_RW); + + set_pmd(pmd, __pmd(_KERNPG_TABLE + __pa(pte))); + + // XXX Xen below reqd ? + //if (pte != pte_offset(pmd, 0)) + // BUG(); + } + vaddr += PMD_SIZE; + } + j = 0; + } + + XENO_flush_page_update_queue(); +} + + +static void __init zone_sizes_init(void) +{ + unsigned long zones_size[MAX_NR_ZONES] = {0, 0, 0}; + unsigned int max_dma, high, low; + + max_dma = virt_to_phys((char *)MAX_DMA_ADDRESS) >> PAGE_SHIFT; + low = max_low_pfn; + high = highend_pfn; + + if (low < max_dma) + zones_size[ZONE_DMA] = low; + else { + zones_size[ZONE_DMA] = max_dma; + zones_size[ZONE_NORMAL] = low - max_dma; +#ifdef CONFIG_HIGHMEM + zones_size[ZONE_HIGHMEM] = high - low; +#endif + } + free_area_init(zones_size); +} + +/* + * paging_init() sets up the page tables - note that the first 8MB are + * already mapped by head.S. + * + * This routines also unmaps the page at virtual kernel address 0, so + * that we can trap those pesky NULL-reference errors in the kernel. + */ +void __init paging_init(void) +{ + unsigned long vaddr; + + zone_sizes_init(); + + /* + * Fixed mappings, only the page table structure has to be created - + * mappings will be set by set_fixmap(): + */ + vaddr = __fix_to_virt(__end_of_fixed_addresses - 1) & PMD_MASK; + fixrange_init(vaddr, HYPERVISOR_VIRT_START, init_mm.pgd); + + /* Cheesy: this can probably be moved to the blkdev driver. */ + set_fixmap(FIX_BLKRING_BASE, start_info.blk_ring); + +#ifdef CONFIG_HIGHMEM +#error + kmap_init(); +#endif +} + +static inline int page_is_ram (unsigned long pagenr) +{ + return 1; +} + +#ifdef CONFIG_HIGHMEM +void __init one_highpage_init(struct page *page, int pfn, int bad_ppro) +{ + if (!page_is_ram(pfn)) { + SetPageReserved(page); + return; + } + + if (bad_ppro && page_kills_ppro(pfn)) { + SetPageReserved(page); + return; + } + + ClearPageReserved(page); + set_bit(PG_highmem, &page->flags); + atomic_set(&page->count, 1); + __free_page(page); + totalhigh_pages++; +} +#endif /* CONFIG_HIGHMEM */ + +static void __init set_max_mapnr_init(void) +{ +#ifdef CONFIG_HIGHMEM + highmem_start_page = mem_map + highstart_pfn; + max_mapnr = num_physpages = highend_pfn; + num_mappedpages = max_low_pfn; +#else + max_mapnr = num_mappedpages = num_physpages = max_low_pfn; +#endif +} + +static int __init free_pages_init(void) +{ +#ifdef CONFIG_HIGHMEM +#error Where is this supposed to be initialised? + int bad_ppro; +#endif + int reservedpages, pfn; + + /* this will put all low memory onto the freelists */ + totalram_pages += free_all_bootmem(); + + reservedpages = 0; + for (pfn = 0; pfn < max_low_pfn; pfn++) { + /* + * Only count reserved RAM pages + */ + if (page_is_ram(pfn) && PageReserved(mem_map+pfn)) + reservedpages++; + } +#ifdef CONFIG_HIGHMEM + for (pfn = highend_pfn-1; pfn >= highstart_pfn; pfn--) + one_highpage_init((struct page *) (mem_map + pfn), pfn, bad_ppro); + totalram_pages += totalhigh_pages; +#endif + return reservedpages; +} + +void __init mem_init(void) +{ + int codesize, reservedpages, datasize, initsize; + + if (!mem_map) + BUG(); + + set_max_mapnr_init(); + + high_memory = (void *) __va(max_low_pfn * PAGE_SIZE); + + /* clear the zero-page */ + memset(empty_zero_page, 0, PAGE_SIZE); + + reservedpages = free_pages_init(); + + codesize = (unsigned long) &_etext - (unsigned long) &_text; + datasize = (unsigned long) &_edata - (unsigned long) &_etext; + initsize = (unsigned long) &__init_end - (unsigned long) &__init_begin; + + printk(KERN_INFO "Memory: %luk/%luk available (%dk kernel code, %dk reserved, %dk data, %dk init, %ldk highmem)\n", + (unsigned long) nr_free_pages() << (PAGE_SHIFT-10), + max_mapnr << (PAGE_SHIFT-10), + codesize >> 10, + reservedpages << (PAGE_SHIFT-10), + datasize >> 10, + initsize >> 10, + (unsigned long) (totalhigh_pages << (PAGE_SHIFT-10)) + ); + + boot_cpu_data.wp_works_ok = 1; +} + +void free_initmem(void) +{ + unsigned long addr; + + addr = (unsigned long)(&__init_begin); + for (; addr < (unsigned long)(&__init_end); addr += PAGE_SIZE) { + ClearPageReserved(virt_to_page(addr)); + set_page_count(virt_to_page(addr), 1); + free_page(addr); + totalram_pages++; + } + printk (KERN_INFO "Freeing unused kernel memory: %dk freed\n", (&__init_end - &__init_begin) >> 10); +} + +#ifdef CONFIG_BLK_DEV_INITRD +void free_initrd_mem(unsigned long start, unsigned long end) +{ + if (start < end) + printk (KERN_INFO "Freeing initrd memory: %ldk freed\n", (end - start) >> 10); + for (; start < end; start += PAGE_SIZE) { + ClearPageReserved(virt_to_page(start)); + set_page_count(virt_to_page(start), 1); + free_page(start); + totalram_pages++; + } +} +#endif + +void si_meminfo(struct sysinfo *val) +{ + val->totalram = totalram_pages; + val->sharedram = 0; + val->freeram = nr_free_pages(); + val->bufferram = atomic_read(&buffermem_pages); + val->totalhigh = totalhigh_pages; + val->freehigh = nr_free_highpages(); + val->mem_unit = PAGE_SIZE; + return; +} + +#if defined(CONFIG_X86_PAE) +struct kmem_cache_s *pae_pgd_cachep; +void __init pgtable_cache_init(void) +{ + /* + * PAE pgds must be 16-byte aligned: + */ + pae_pgd_cachep = kmem_cache_create("pae_pgd", 32, 0, + SLAB_HWCACHE_ALIGN | SLAB_MUST_HWCACHE_ALIGN, NULL, NULL); + if (!pae_pgd_cachep) + panic("init_pae(): Cannot alloc pae_pgd SLAB cache"); +} +#endif /* CONFIG_X86_PAE */ diff --git a/xenolinux-2.4.22-sparse/arch/xeno/mm/ioremap.c b/xenolinux-2.4.22-sparse/arch/xeno/mm/ioremap.c new file mode 100644 index 0000000000..d537956105 --- /dev/null +++ b/xenolinux-2.4.22-sparse/arch/xeno/mm/ioremap.c @@ -0,0 +1,227 @@ +/* + * arch/xeno/mm/ioremap.c + * + * Re-map IO memory to kernel address space so that we can access it. + * + * (C) Copyright 1995 1996 Linus Torvalds + * + * Modifications for Xenolinux (c) 2003 Keir Fraser + */ + +#include <linux/slab.h> +#include <linux/mm.h> +#include <linux/mman.h> +#include <linux/vmalloc.h> +#include <asm/io.h> +#include <asm/pgalloc.h> +#include <asm/uaccess.h> +#include <asm/tlb.h> +#include <asm/mmu.h> + +#if defined(CONFIG_XENO_PRIV) + +#define direct_set_pte(pteptr, pteval) \ + queue_l1_entry_update(__pa(pteptr)|PGREQ_UNCHECKED_UPDATE, (pteval).pte_low) +#define __direct_pte(x) ((pte_t) { (x) } ) +#define __direct_mk_pte(page_nr,pgprot) \ + __direct_pte(((page_nr) << PAGE_SHIFT) | pgprot_val(pgprot)) +#define direct_mk_pte_phys(physpage, pgprot) \ + __direct_mk_pte((physpage) >> PAGE_SHIFT, pgprot) + + +static inline void direct_remap_area_pte(pte_t *pte, + unsigned long address, + unsigned long size, + unsigned long machine_addr, + pgprot_t prot) +{ + unsigned long end; + + address &= ~PMD_MASK; + end = address + size; + if (end > PMD_SIZE) + end = PMD_SIZE; + if (address >= end) + BUG(); + do { + if (!pte_none(*pte)) { + printk("direct_remap_area_pte: page already exists\n"); + BUG(); + } + direct_set_pte(pte, pte_mkio(direct_mk_pte_phys(machine_addr, prot))); + address += PAGE_SIZE; + machine_addr += PAGE_SIZE; + pte++; + } while (address && (address < end)); +} + +static inline int direct_remap_area_pmd(struct mm_struct *mm, + pmd_t *pmd, + unsigned long address, + unsigned long size, + unsigned long machine_addr, + pgprot_t prot) +{ + unsigned long end; + + address &= ~PGDIR_MASK; + end = address + size; + if (end > PGDIR_SIZE) + end = PGDIR_SIZE; + machine_addr -= address; + if (address >= end) + BUG(); + do { + pte_t * pte = pte_alloc(mm, pmd, address); + if (!pte) + return -ENOMEM; + direct_remap_area_pte(pte, address, end - address, + address + machine_addr, prot); + address = (address + PMD_SIZE) & PMD_MASK; + pmd++; + } while (address && (address < end)); + return 0; +} + +int direct_remap_area_pages(struct mm_struct *mm, + unsigned long address, + unsigned long machine_addr, + unsigned long size, + pgprot_t prot) +{ + int error = 0; + pgd_t * dir; + unsigned long end = address + size; + + machine_addr -= address; + dir = pgd_offset(mm, address); + flush_cache_all(); + if (address >= end) + BUG(); + spin_lock(&mm->page_table_lock); + do { + pmd_t *pmd = pmd_alloc(mm, dir, address); + error = -ENOMEM; + if (!pmd) + break; + error = direct_remap_area_pmd(mm, pmd, address, end - address, + machine_addr + address, prot); + if (error) + break; + address = (address + PGDIR_SIZE) & PGDIR_MASK; + dir++; + } while (address && (address < end)); + spin_unlock(&mm->page_table_lock); + flush_tlb_all(); + return error; +} + +#endif /* CONFIG_XENO_PRIV */ + + +/* + * Remap an arbitrary machine address space into the kernel virtual + * address space. Needed when a privileged instance of Xenolinux wants + * to access space outside its world directly. + * + * NOTE! We need to allow non-page-aligned mappings too: we will obviously + * have to convert them into an offset in a page-aligned mapping, but the + * caller shouldn't need to know that small detail. + */ +void * __ioremap(unsigned long machine_addr, + unsigned long size, + unsigned long flags) +{ +#if defined(CONFIG_XENO_PRIV) + void * addr; + struct vm_struct * area; + unsigned long offset, last_addr; + pgprot_t prot; + + /* Only privileged Xenolinux can make unchecked pagetable updates. */ + if ( !(start_info.flags & SIF_PRIVILEGED) ) + return NULL; + + /* Don't allow wraparound or zero size */ + last_addr = machine_addr + size - 1; + if (!size || last_addr < machine_addr) + return NULL; + + /* Mappings have to be page-aligned */ + offset = machine_addr & ~PAGE_MASK; + machine_addr &= PAGE_MASK; + size = PAGE_ALIGN(last_addr) - machine_addr; + + /* Ok, go for it */ + area = get_vm_area(size, VM_IOREMAP); + if (!area) + return NULL; + addr = area->addr; + prot = __pgprot(_PAGE_PRESENT | _PAGE_RW | _PAGE_DIRTY | + _PAGE_ACCESSED | flags); + if (direct_remap_area_pages(&init_mm, VMALLOC_VMADDR(addr), + machine_addr, size, prot)) { + vfree(addr); + return NULL; + } + return (void *) (offset + (char *)addr); +#else + return NULL; +#endif +} + +void iounmap(void *addr) +{ + vfree((void *)((unsigned long)addr & PAGE_MASK)); +} + +/* implementation of boot time ioremap for purpose of provising access +to the vga console for privileged domains. Unlike boot time ioremap on +other architectures, ours is permanent and not reclaimed when then vmalloc +infrastructure is started */ + +void __init *bt_ioremap(unsigned long machine_addr, unsigned long size) +{ + unsigned long offset, last_addr; + unsigned int nrpages; + enum fixed_addresses idx; + + /* Don't allow wraparound or zero size */ + last_addr = machine_addr + size - 1; + if (!size || last_addr < machine_addr) + return NULL; + + /* + * Mappings have to be page-aligned + */ + offset = machine_addr & ~PAGE_MASK; + machine_addr &= PAGE_MASK; + size = PAGE_ALIGN(last_addr) - machine_addr; + + /* + * Mappings have to fit in the FIX_BTMAP area. + */ + nrpages = size >> PAGE_SHIFT; + if (nrpages > NR_FIX_BTMAPS) + return NULL; + + /* + * Ok, go for it.. + */ + idx = FIX_BTMAP_BEGIN; + while (nrpages > 0) { + set_fixmap(idx, machine_addr); + machine_addr += PAGE_SIZE; + --idx; + --nrpages; + } + + flush_tlb_all(); + + return (void*) (offset + fix_to_virt(FIX_BTMAP_BEGIN)); +} + + +#if 0 /* We don't support these functions. They shouldn't be required. */ +void __init bt_iounmap(void *addr, unsigned long size) {} +#endif diff --git a/xenolinux-2.4.22-sparse/arch/xeno/vmlinux.lds b/xenolinux-2.4.22-sparse/arch/xeno/vmlinux.lds new file mode 100644 index 0000000000..7c4c4f8e9c --- /dev/null +++ b/xenolinux-2.4.22-sparse/arch/xeno/vmlinux.lds @@ -0,0 +1,82 @@ +/* ld script to make i386 Linux kernel + * Written by Martin Mares <mj@atrey.karlin.mff.cuni.cz>; + */ +OUTPUT_FORMAT("elf32-i386", "elf32-i386", "elf32-i386") +OUTPUT_ARCH(i386) +ENTRY(_start) +SECTIONS +{ + . = 0xC0000000 + 0x000000; + _text = .; /* Text and read-only data */ + .text : { + *(.text) + *(.fixup) + *(.gnu.warning) + } = 0x9090 + + _etext = .; /* End of text section */ + + .rodata : { *(.rodata) *(.rodata.*) } + .kstrtab : { *(.kstrtab) } + + . = ALIGN(16); /* Exception table */ + __start___ex_table = .; + __ex_table : { *(__ex_table) } + __stop___ex_table = .; + + __start___ksymtab = .; /* Kernel symbol table */ + __ksymtab : { *(__ksymtab) } + __stop___ksymtab = .; + + .data : { /* Data */ + *(.data) + CONSTRUCTORS + } + + _edata = .; /* End of data section */ + + . = ALIGN(8192); /* init_task */ + .data.init_task : { *(.data.init_task) } + + . = ALIGN(4096); /* Init code and data */ + __init_begin = .; + .text.init : { *(.text.init) } + .data.init : { *(.data.init) } + . = ALIGN(16); + __setup_start = .; + .setup.init : { *(.setup.init) } + __setup_end = .; + __initcall_start = .; + .initcall.init : { *(.initcall.init) } + __initcall_end = .; + . = ALIGN(4096); + __init_end = .; + + . = ALIGN(4096); + .data.page_aligned : { *(.data.idt) } + + . = ALIGN(32); + .data.cacheline_aligned : { *(.data.cacheline_aligned) } + + __bss_start = .; /* BSS */ + .bss : { + *(.bss) + } + _end = . ; + + /* Sections to be discarded */ + /DISCARD/ : { + *(.text.exit) + *(.data.exit) + *(.exitcall.exit) + } + + /* Stabs debugging sections. */ + .stab 0 : { *(.stab) } + .stabstr 0 : { *(.stabstr) } + .stab.excl 0 : { *(.stab.excl) } + .stab.exclstr 0 : { *(.stab.exclstr) } + .stab.index 0 : { *(.stab.index) } + .stab.indexstr 0 : { *(.stab.indexstr) } + .comment 0 : { *(.comment) } +} |