aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--.rootkeys5
-rw-r--r--BitKeeper/etc/ignore2
-rw-r--r--README62
-rw-r--r--linux-2.6.11-xen-sparse/arch/xen/configs/xen0_defconfig_x86_644
-rw-r--r--linux-2.6.11-xen-sparse/arch/xen/configs/xenU_defconfig_x86_644
-rw-r--r--linux-2.6.11-xen-sparse/arch/xen/x86_64/ia32/Makefile58
-rw-r--r--linux-2.6.11-xen-sparse/arch/xen/x86_64/ia32/ia32entry.S629
-rw-r--r--linux-2.6.11-xen-sparse/arch/xen/x86_64/ia32/syscall32.c143
-rw-r--r--linux-2.6.11-xen-sparse/arch/xen/x86_64/ia32/vsyscall-int80.S57
-rw-r--r--linux-2.6.11-xen-sparse/arch/xen/x86_64/kernel/entry.S41
-rw-r--r--linux-2.6.11-xen-sparse/arch/xen/x86_64/kernel/ldt.c22
-rw-r--r--linux-2.6.11-xen-sparse/arch/xen/x86_64/kernel/traps.c3
-rw-r--r--linux-2.6.11-xen-sparse/arch/xen/x86_64/kernel/xen_entry.S38
-rw-r--r--linux-2.6.11-xen-sparse/arch/xen/x86_64/mm/fault.c1
-rw-r--r--linux-2.6.11-xen-sparse/drivers/xen/netfront/netfront.c4
-rw-r--r--linux-2.6.11-xen-sparse/include/asm-xen/asm-x86_64/mmu_context.h32
-rw-r--r--linux-2.6.11-xen-sparse/include/asm-xen/asm-x86_64/pgtable.h16
-rw-r--r--tools/ioemu/exec.c3
-rw-r--r--tools/libxc/xc.h1
-rw-r--r--tools/libxc/xc_domain.c10
-rw-r--r--tools/libxc/xc_evtchn.c2
-rw-r--r--tools/libxc/xc_gnttab.c2
-rw-r--r--tools/libxc/xc_linux_build.c2
-rw-r--r--tools/libxc/xc_misc.c2
-rw-r--r--tools/libxc/xc_private.c14
-rw-r--r--tools/libxc/xc_private.h46
-rw-r--r--tools/libxc/xc_ptrace.c11
-rw-r--r--tools/policy/policy_tool.c23
-rw-r--r--tools/python/xen/xm/create.py3
-rw-r--r--xen/arch/x86/dom0_ops.c5
-rw-r--r--xen/arch/x86/mm.c6
-rw-r--r--xen/arch/x86/setup.c3
-rw-r--r--xen/arch/x86/vmx.c48
-rw-r--r--xen/arch/x86/vmx_intercept.c12
-rw-r--r--xen/arch/x86/vmx_io.c148
-rw-r--r--xen/arch/x86/vmx_platform.c291
-rw-r--r--xen/include/asm-x86/ldt.h4
-rw-r--r--xen/include/asm-x86/page.h4
-rw-r--r--xen/include/asm-x86/vmx_platform.h1
-rw-r--r--xen/include/public/arch-x86_32.h2
40 files changed, 1511 insertions, 253 deletions
diff --git a/.rootkeys b/.rootkeys
index 29ea9db08f..be88036027 100644
--- a/.rootkeys
+++ b/.rootkeys
@@ -263,6 +263,10 @@
3f68905c5eiA-lBMQSvXLMWS1ikDEA linux-2.6.11-xen-sparse/arch/xen/kernel/xen_proc.c
424efaa6xahU2q85_dT-SjUJEaivfg linux-2.6.11-xen-sparse/arch/xen/x86_64/Kconfig
424efaa6kKleWe45IrqsG8gkejgEQA linux-2.6.11-xen-sparse/arch/xen/x86_64/Makefile
+42ba7fc4lb16rk9fVJeOBTPpy7bHFQ linux-2.6.11-xen-sparse/arch/xen/x86_64/ia32/Makefile
+42ba7fc4URo9Q9gyOqemf4cCcAfYCg linux-2.6.11-xen-sparse/arch/xen/x86_64/ia32/ia32entry.S
+42ba7fc4qPAX8G1HhVUPmps8xw_Odw linux-2.6.11-xen-sparse/arch/xen/x86_64/ia32/syscall32.c
+42ba7fc4xqwiER7U_2NvNwqxhRMo1g linux-2.6.11-xen-sparse/arch/xen/x86_64/ia32/vsyscall-int80.S
424efaa6HSyuVodl6SxFGj39vlp6MA linux-2.6.11-xen-sparse/arch/xen/x86_64/kernel/Makefile
428f0973_moB26LYt56xXKYCTqEdXw linux-2.6.11-xen-sparse/arch/xen/x86_64/kernel/acpi/Makefile
428f0973NBdgINmWOEJjoIDL73SDbQ linux-2.6.11-xen-sparse/arch/xen/x86_64/kernel/apic.c
@@ -289,6 +293,7 @@
424efaa7szEu90xkjpXk5TufZxxa4g linux-2.6.11-xen-sparse/arch/xen/x86_64/kernel/traps.c
424efaa6sJsuHdGIGxm0r-ugsss3OQ linux-2.6.11-xen-sparse/arch/xen/x86_64/kernel/vsyscall.c
424efaa6xbX9LkKyaXvgbL9s_39Trw linux-2.6.11-xen-sparse/arch/xen/x86_64/kernel/x8664_ksyms.c
+42ba7fc4mg4zVSdJUO5Wc9PM-KUbqg linux-2.6.11-xen-sparse/arch/xen/x86_64/kernel/xen_entry.S
424efaa670zlQTtnOYK_aNgqhmSx-Q linux-2.6.11-xen-sparse/arch/xen/x86_64/mm/Makefile
424efaa6HUC68-hBHgiWOMDfKZogIA linux-2.6.11-xen-sparse/arch/xen/x86_64/mm/fault.c
424efaa65ELRJ3JfgQQKLzW6y0ECYQ linux-2.6.11-xen-sparse/arch/xen/x86_64/mm/init.c
diff --git a/BitKeeper/etc/ignore b/BitKeeper/etc/ignore
index a6d5a390a0..e8c82227fe 100644
--- a/BitKeeper/etc/ignore
+++ b/BitKeeper/etc/ignore
@@ -124,6 +124,8 @@ tools/misc/miniterm/miniterm
tools/misc/xc_shadow
tools/misc/xen_cpuperf
tools/misc/xenperf
+tools/policy/policy_tool
+tools/policy/xen/*
tools/pygrub/build/*
tools/python/build/*
tools/tests/test_x86_emulator
diff --git a/README b/README
index ee395e42d9..695dc2926f 100644
--- a/README
+++ b/README
@@ -40,7 +40,9 @@ Quick-Start Guide - Pre-Built Binary Release
performed with root privileges.]
1. Install the binary distribution onto your filesystem:
+
# sh ./install.sh
+
Amongst other things, this will install Xen and XenLinux kernel
files in /boot, kernel modules and Python packages in /lib, and
various control tools in standard 'bin' directories.
@@ -50,9 +52,11 @@ performed with root privileges.]
alternatives such as LILO are *not* supported. You can most likely
find your GRUB menu file at /boot/grub/menu.lst: edit this file to
include an entry like the following:
+
# title Xen 2.0 / XenLinux 2.6
# kernel /boot/xen-2.0.gz dom0_mem=<mem-kb> console=vga
# module /boot/vmlinuz-2.6-xen0 root=<root-dev> ro console=tty0
+
For <mem-kb> you should specify the amount of memory, in kilobytes,
to allocate for use by your initial XenLinux virtual machine. Note
that Xen itself reserves about 32MB memory for internal use, which
@@ -79,3 +83,61 @@ provided by your Linux distributor:
[NB. Unless noted otherwise, all the following steps should be
performed with root privileges.]
+
+1. Download and untar the source tarball file. This will be a
+ file named xen-unstable-src.tgz, or xen-$version-src.tgz.
+ You can also pull the current version from the SCMS
+ that is being used (Bitkeeper, scheduled to change shortly).
+
+ # tar xzf xen-unstable-src.tgz
+
+ Assuming you are using the unstable tree, this will
+ untar into xen-unstable. The rest of the instructions
+ use the unstable tree as an example, substitute the
+ version for unstable.
+
+2. cd to xen-unstable (or whatever you sensibly rename it to).
+ The Linux (2.4 and 2.6), netbsd and freebsd kernel source
+ trees are in the $os-$version-xen-sparse directories.
+
+On Linux:
+
+3. For the very first build, or if you want to destroy existing
+ .configs and build trees, perform the following steps:
+
+ # make world
+ # make install
+
+ This will create the directories linux-2.6.11-dom0/ and
+ linux-2.6.11-domU/ after first cleaning everything. It will
+ create and install into the dist/ directory which is the
+ default install location. It will build the xen binary
+ (xen.gz), the boot images for dom0 and an unpriviledged
+ guest kernel (vmlinuz-2.6.11-xen0 and vmlinuz-2.6.11-xenU
+ respectively), the tools and the documentation.
+
+4. To rebuild an existing tree without modifying the config:
+ # make dist
+
+ This will build and install xen, kernels, tools, and
+ docs into the local dist/ directory.
+
+5. To rebuild a kernel with a modified config:
+
+ # cd linux-2.6.11-xen0 # or linux-2.6.11-xenU
+ # make ARCH=xen menuconfig # or xconfig
+ # cd ..
+ # make dist
+ # make install
+
+ You can copy your own config into linux-2.6.11-xen0 first.
+ Alternatively, you can also copy your config file to
+ dist/install/boot/config-$version-xen0/U. This is picked up
+ when a make dist is done. Include the ARCH=xen directive for
+ all make commands when building the kernels.
+
+6. To see a full list of targets and a brief description, type:
+ # make help
+
+7. Edit your grub.conf file as described above to have an
+ appropriate entry for your new kernel.
diff --git a/linux-2.6.11-xen-sparse/arch/xen/configs/xen0_defconfig_x86_64 b/linux-2.6.11-xen-sparse/arch/xen/configs/xen0_defconfig_x86_64
index 7df45e168c..745fd653db 100644
--- a/linux-2.6.11-xen-sparse/arch/xen/configs/xen0_defconfig_x86_64
+++ b/linux-2.6.11-xen-sparse/arch/xen/configs/xen0_defconfig_x86_64
@@ -133,8 +133,8 @@ CONFIG_DUMMY_IOMMU=y
#
# Executable file formats / Emulations
#
-# CONFIG_IA32_EMULATION is not set
-
+CONFIG_IA32_EMULATION=y
+# CONFIG_IA32_AOUT is not set
#
# Executable file formats
#
diff --git a/linux-2.6.11-xen-sparse/arch/xen/configs/xenU_defconfig_x86_64 b/linux-2.6.11-xen-sparse/arch/xen/configs/xenU_defconfig_x86_64
index e28efc0d5b..de7cc84038 100644
--- a/linux-2.6.11-xen-sparse/arch/xen/configs/xenU_defconfig_x86_64
+++ b/linux-2.6.11-xen-sparse/arch/xen/configs/xenU_defconfig_x86_64
@@ -128,8 +128,8 @@ CONFIG_DUMMY_IOMMU=y
#
# Executable file formats / Emulations
#
-# CONFIG_IA32_EMULATION is not set
-
+CONFIG_IA32_EMULATION=y
+# CONFIG_IA32_AOUT is not set
#
# Executable file formats
#
diff --git a/linux-2.6.11-xen-sparse/arch/xen/x86_64/ia32/Makefile b/linux-2.6.11-xen-sparse/arch/xen/x86_64/ia32/Makefile
new file mode 100644
index 0000000000..4f30b40334
--- /dev/null
+++ b/linux-2.6.11-xen-sparse/arch/xen/x86_64/ia32/Makefile
@@ -0,0 +1,58 @@
+#
+# Makefile for the ia32 kernel emulation subsystem.
+#
+XENARCH := $(subst ",,$(CONFIG_XENARCH))
+
+CFLAGS += -Iarch/$(XENARCH)/kernel
+
+obj-$(CONFIG_IA32_EMULATION) := ia32entry.o syscall32.o
+
+c-obj-$(CONFIG_IA32_EMULATION) := sys_ia32.o ia32_ioctl.o \
+ ia32_signal.o tls32.o \
+ ia32_binfmt.o fpu32.o ptrace32.o
+
+s-obj-y :=
+
+sysv-$(CONFIG_SYSVIPC) := ipc32.o
+c-obj-$(CONFIG_IA32_EMULATION) += $(sysv-y)
+
+c-obj-$(CONFIG_IA32_AOUT) += ia32_aout.o
+
+$(obj)/syscall32.o: $(src)/syscall32.c \
+ $(foreach F,int80 sysenter syscall,$(obj)/vsyscall-$F.so)
+
+# Teach kbuild about targets
+targets := $(foreach F,int80 sysenter syscall,vsyscall-$F.o vsyscall-$F.so)
+
+# The DSO images are built using a special linker script
+quiet_cmd_syscall = SYSCALL $@
+ cmd_syscall = $(CC) -m32 -nostdlib -shared -s \
+ -Wl,-soname=linux-gate.so.1 -o $@ \
+ -Wl,-T,$(filter-out FORCE,$^)
+
+
+$(obj)/vsyscall-int80.so $(obj)/vsyscall-sysenter.so $(obj)/vsyscall-syscall.so: \
+$(obj)/vsyscall-%.so: $(src)/vsyscall.lds $(src)/vsyscall-%.o FORCE
+ $(call if_changed,syscall)
+
+AFLAGS_vsyscall-int80.o = -m32
+AFLAGS_vsyscall-sysenter.o = -m32
+AFLAGS_vsyscall-syscall.o = -m32
+CFLAGS_ia32_ioctl.o += -Ifs/
+
+s-link := vsyscall-syscall.o vsyscall-sysenter.o vsyscall-sigreturn.o
+
+$(src)/vsyscall.lds:
+ @ln -fsn $(srctree)/arch/x86_64/ia32/$(notdir $@) $@
+
+$(patsubst %.o,$(obj)/%.c,$(c-obj-y) $(c-link)) $(patsubst %.o,$(obj)/%.S,$(s-obj-y) $(s-link)):
+ @ln -fsn $(srctree)/arch/x86_64/ia32/$(notdir $@) $@
+
+$(obj)/vsyscall-int80.S: $(obj)/vsyscall-sigreturn.S
+$(obj)/vsyscall-sysenter.S: $(obj)/vsyscall-sigreturn.S
+$(obj)/vsyscall-syscall.S: $(obj)/vsyscall-sigreturn.S
+
+obj-y += $(c-obj-y) $(s-obj-y)
+
+clean-files += $(patsubst %.o,%.c,$(c-obj-y) $(c-obj-) $(c-link))
+clean-files += $(patsubst %.o,%.S,$(s-obj-y) $(s-obj-) $(s-link))
diff --git a/linux-2.6.11-xen-sparse/arch/xen/x86_64/ia32/ia32entry.S b/linux-2.6.11-xen-sparse/arch/xen/x86_64/ia32/ia32entry.S
new file mode 100644
index 0000000000..521c881c98
--- /dev/null
+++ b/linux-2.6.11-xen-sparse/arch/xen/x86_64/ia32/ia32entry.S
@@ -0,0 +1,629 @@
+/*
+ * Compatibility mode system call entry point for x86-64.
+ *
+ * Copyright 2000-2002 Andi Kleen, SuSE Labs.
+ */
+
+#include <asm/dwarf2.h>
+#include <asm/calling.h>
+#include <asm/offset.h>
+#include <asm/current.h>
+#include <asm/errno.h>
+#include <asm/ia32_unistd.h>
+#include <asm/thread_info.h>
+#include <asm/segment.h>
+#include <asm/vsyscall32.h>
+#include <linux/linkage.h>
+
+#define __XEN_X86_64 1
+
+ .macro IA32_ARG_FIXUP noebp=0
+ movl %edi,%r8d
+ .if \noebp
+ .else
+ movl %ebp,%r9d
+ .endif
+ xchg %ecx,%esi
+ movl %ebx,%edi
+ movl %edx,%edx /* zero extension */
+ .endm
+
+ /* clobbers %eax */
+ .macro CLEAR_RREGS
+ xorl %eax,%eax
+ movq %rax,R11(%rsp)
+ movq %rax,R10(%rsp)
+ movq %rax,R9(%rsp)
+ movq %rax,R8(%rsp)
+ .endm
+
+#if defined (__XEN_X86_64)
+#include "../kernel/xen_entry.S"
+
+#define __swapgs
+#define __cli
+#define __sti
+#else
+/*
+ * Use the native instructions
+ */
+#define __swapgs swapgs
+#define __cli cli
+#define __sti sti
+#endif
+
+/*
+ * 32bit SYSENTER instruction entry.
+ *
+ * Arguments:
+ * %eax System call number.
+ * %ebx Arg1
+ * %ecx Arg2
+ * %edx Arg3
+ * %esi Arg4
+ * %edi Arg5
+ * %ebp user stack
+ * 0(%ebp) Arg6
+ *
+ * Interrupts off.
+ *
+ * This is purely a fast path. For anything complicated we use the int 0x80
+ * path below. Set up a complete hardware stack frame to share code
+ * with the int 0x80 path.
+ */
+ENTRY(ia32_sysenter_target)
+ CFI_STARTPROC
+ __swapgs
+ movq %gs:pda_kernelstack, %rsp
+ addq $(PDA_STACKOFFSET),%rsp
+ XEN_UNBLOCK_EVENTS(%r11)
+ __sti
+ movl %ebp,%ebp /* zero extension */
+ pushq $__USER32_DS
+ pushq %rbp
+ pushfq
+ movl $VSYSCALL32_SYSEXIT, %r10d
+ pushq $__USER32_CS
+ movl %eax, %eax
+ pushq %r10
+ pushq %rax
+ cld
+ SAVE_ARGS 0,0,1
+ /* no need to do an access_ok check here because rbp has been
+ 32bit zero extended */
+1: movl (%rbp),%r9d
+ .section __ex_table,"a"
+ .quad 1b,ia32_badarg
+ .previous
+ GET_THREAD_INFO(%r10)
+ testl $(_TIF_SYSCALL_TRACE|_TIF_SYSCALL_AUDIT),threadinfo_flags(%r10)
+ jnz sysenter_tracesys
+sysenter_do_call:
+ cmpl $(IA32_NR_syscalls),%eax
+ jae ia32_badsys
+ IA32_ARG_FIXUP 1
+ call *ia32_sys_call_table(,%rax,8)
+ movq %rax,RAX-ARGOFFSET(%rsp)
+ GET_THREAD_INFO(%r10)
+ XEN_BLOCK_EVENTS(%r11)
+ __cli
+ testl $_TIF_ALLWORK_MASK,threadinfo_flags(%r10)
+ jnz int_ret_from_sys_call
+ /* clear IF, that popfq doesn't enable interrupts early */
+ andl $~0x200,EFLAGS-R11(%rsp)
+ RESTORE_ARGS 1,24,1,1,1,1
+ popfq
+ popq %rcx /* User %esp */
+ movl $VSYSCALL32_SYSEXIT,%edx /* User %eip */
+ __swapgs
+ XEN_UNBLOCK_EVENTS(%r11)
+ __sti /* sti only takes effect after the next instruction */
+ /* sysexit */
+ .byte 0xf, 0x35 /* TBD */
+
+sysenter_tracesys:
+ SAVE_REST
+ CLEAR_RREGS
+ movq $-ENOSYS,RAX(%rsp) /* really needed? */
+ movq %rsp,%rdi /* &pt_regs -> arg1 */
+ call syscall_trace_enter
+ LOAD_ARGS ARGOFFSET /* reload args from stack in case ptrace changed it */
+ RESTORE_REST
+ movl %ebp, %ebp
+ /* no need to do an access_ok check here because rbp has been
+ 32bit zero extended */
+1: movl (%rbp),%r9d
+ .section __ex_table,"a"
+ .quad 1b,ia32_badarg
+ .previous
+ jmp sysenter_do_call
+ CFI_ENDPROC
+
+/*
+ * 32bit SYSCALL instruction entry.
+ *
+ * Arguments:
+ * %eax System call number.
+ * %ebx Arg1
+ * %ecx return EIP
+ * %edx Arg3
+ * %esi Arg4
+ * %edi Arg5
+ * %ebp Arg2 [note: not saved in the stack frame, should not be touched]
+ * %esp user stack
+ * 0(%esp) Arg6
+ *
+ * Interrupts off.
+ *
+ * This is purely a fast path. For anything complicated we use the int 0x80
+ * path below. Set up a complete hardware stack frame to share code
+ * with the int 0x80 path.
+ */
+ENTRY(ia32_cstar_target)
+ CFI_STARTPROC
+ __swapgs
+ movl %esp,%r8d
+ movq %gs:pda_kernelstack,%rsp
+ XEN_UNBLOCK_EVENTS(%r11)
+ __sti
+ SAVE_ARGS 8,1,1
+ movl %eax,%eax /* zero extension */
+ movq %rax,ORIG_RAX-ARGOFFSET(%rsp)
+ movq %rcx,RIP-ARGOFFSET(%rsp)
+ movq %rbp,RCX-ARGOFFSET(%rsp) /* this lies slightly to ptrace */
+ movl %ebp,%ecx
+ movq $__USER32_CS,CS-ARGOFFSET(%rsp)
+ movq $__USER32_DS,SS-ARGOFFSET(%rsp)
+ movq %r11,EFLAGS-ARGOFFSET(%rsp)
+ movq %r8,RSP-ARGOFFSET(%rsp)
+ /* no need to do an access_ok check here because r8 has been
+ 32bit zero extended */
+ /* hardware stack frame is complete now */
+1: movl (%r8),%r9d
+ .section __ex_table,"a"
+ .quad 1b,ia32_badarg
+ .previous
+ GET_THREAD_INFO(%r10)
+ testl $(_TIF_SYSCALL_TRACE|_TIF_SYSCALL_AUDIT),threadinfo_flags(%r10)
+ jnz cstar_tracesys
+cstar_do_call:
+ cmpl $IA32_NR_syscalls,%eax
+ jae ia32_badsys
+ IA32_ARG_FIXUP 1
+ call *ia32_sys_call_table(,%rax,8)
+ movq %rax,RAX-ARGOFFSET(%rsp)
+ GET_THREAD_INFO(%r10)
+ XEN_BLOCK_EVENTS(%r11)
+ __cli
+ testl $_TIF_ALLWORK_MASK,threadinfo_flags(%r10)
+ jnz int_ret_from_sys_call
+ RESTORE_ARGS 1,-ARG_SKIP,1,1,1
+ movl RIP-ARGOFFSET(%rsp),%ecx
+ movl EFLAGS-ARGOFFSET(%rsp),%r11d
+ movl RSP-ARGOFFSET(%rsp),%esp
+ __swapgs
+ sysretl /* TBD */
+
+cstar_tracesys:
+ SAVE_REST
+ CLEAR_RREGS
+ movq $-ENOSYS,RAX(%rsp) /* really needed? */
+ movq %rsp,%rdi /* &pt_regs -> arg1 */
+ call syscall_trace_enter
+ LOAD_ARGS ARGOFFSET /* reload args from stack in case ptrace changed it */
+ RESTORE_REST
+ movl RSP-ARGOFFSET(%rsp), %r8d
+ /* no need to do an access_ok check here because r8 has been
+ 32bit zero extended */
+1: movl (%r8),%r9d
+ .section __ex_table,"a"
+ .quad 1b,ia32_badarg
+ .previous
+ jmp cstar_do_call
+
+ia32_badarg:
+ movq $-EFAULT,%rax
+ jmp ia32_sysret
+ CFI_ENDPROC
+
+/*
+ * Emulated IA32 system calls via int 0x80.
+ *
+ * Arguments:
+ * %eax System call number.
+ * %ebx Arg1
+ * %ecx Arg2
+ * %edx Arg3
+ * %esi Arg4
+ * %edi Arg5
+ * %ebp Arg6 [note: not saved in the stack frame, should not be touched]
+ *
+ * Notes:
+ * Uses the same stack frame as the x86-64 version.
+ * All registers except %eax must be saved (but ptrace may violate that)
+ * Arguments are zero extended. For system calls that want sign extension and
+ * take long arguments a wrapper is needed. Most calls can just be called
+ * directly.
+ * Assumes it is only called from user space and entered with interrupts off.
+ */
+
+ENTRY(ia32_syscall)
+ CFI_STARTPROC
+ __swapgs
+ XEN_UNBLOCK_EVENTS(%r11)
+ __sti
+ movq (%rsp),%rcx
+ movq 8(%rsp),%r11
+ addq $0x10,%rsp /* skip rcx and r11 */
+ movl %eax,%eax
+ pushq %rax
+ cld
+/* 1: jmp 1b */
+ /* note the registers are not zero extended to the sf.
+ this could be a problem. */
+ SAVE_ARGS 0,0,1
+ GET_THREAD_INFO(%r10)
+ testl $(_TIF_SYSCALL_TRACE|_TIF_SYSCALL_AUDIT),threadinfo_flags(%r10)
+ jnz ia32_tracesys
+ia32_do_syscall:
+ cmpl $(IA32_NR_syscalls),%eax
+ jae ia32_badsys
+ IA32_ARG_FIXUP
+ call *ia32_sys_call_table(,%rax,8) # xxx: rip relative
+ia32_sysret:
+ movq %rax,RAX-ARGOFFSET(%rsp)
+ jmp int_ret_from_sys_call
+
+ia32_tracesys:
+ SAVE_REST
+ movq $-ENOSYS,RAX(%rsp) /* really needed? */
+ movq %rsp,%rdi /* &pt_regs -> arg1 */
+ call syscall_trace_enter
+ LOAD_ARGS ARGOFFSET /* reload args from stack in case ptrace changed it */
+ RESTORE_REST
+ jmp ia32_do_syscall
+
+ia32_badsys:
+ movq $0,ORIG_RAX-ARGOFFSET(%rsp)
+ movq $-ENOSYS,RAX-ARGOFFSET(%rsp)
+ jmp int_ret_from_sys_call
+
+ni_syscall:
+ movq %rax,%rdi
+ jmp sys32_ni_syscall
+
+quiet_ni_syscall:
+ movq $-ENOSYS,%rax
+ ret
+ CFI_ENDPROC
+
+ .macro PTREGSCALL label, func, arg
+ .globl \label
+\label:
+ leaq \func(%rip),%rax
+ leaq -ARGOFFSET+8(%rsp),\arg /* 8 for return address */
+ jmp ia32_ptregs_common
+ .endm
+
+ PTREGSCALL stub32_rt_sigreturn, sys32_rt_sigreturn, %rdi
+ PTREGSCALL stub32_sigreturn, sys32_sigreturn, %rdi
+ PTREGSCALL stub32_sigaltstack, sys32_sigaltstack, %rdx
+ PTREGSCALL stub32_sigsuspend, sys32_sigsuspend, %rcx
+ PTREGSCALL stub32_execve, sys32_execve, %rcx
+ PTREGSCALL stub32_fork, sys_fork, %rdi
+ PTREGSCALL stub32_clone, sys32_clone, %rdx
+ PTREGSCALL stub32_vfork, sys_vfork, %rdi
+ PTREGSCALL stub32_iopl, sys_iopl, %rsi
+ PTREGSCALL stub32_rt_sigsuspend, sys_rt_sigsuspend, %rdx
+
+ENTRY(ia32_ptregs_common)
+ CFI_STARTPROC
+ popq %r11
+ SAVE_REST
+ call *%rax
+ RESTORE_REST
+ jmp ia32_sysret /* misbalances the return cache */
+ CFI_ENDPROC
+
+ .data
+ .align 8
+ .globl ia32_sys_call_table
+ia32_sys_call_table:
+ .quad sys_restart_syscall
+ .quad sys_exit
+ .quad stub32_fork
+ .quad sys_read
+ .quad sys_write
+ .quad sys32_open /* 5 */
+ .quad sys_close
+ .quad sys32_waitpid
+ .quad sys_creat
+ .quad sys_link
+ .quad sys_unlink /* 10 */
+ .quad stub32_execve
+ .quad sys_chdir
+ .quad compat_sys_time
+ .quad sys_mknod
+ .quad sys_chmod /* 15 */
+ .quad sys_lchown16
+ .quad quiet_ni_syscall /* old break syscall holder */
+ .quad sys_stat
+ .quad sys32_lseek
+ .quad sys_getpid /* 20 */
+ .quad compat_sys_mount /* mount */
+ .quad sys_oldumount /* old_umount */
+ .quad sys_setuid16
+ .quad sys_getuid16
+ .quad compat_sys_stime /* stime */ /* 25 */
+ .quad sys32_ptrace /* ptrace */
+ .quad sys_alarm
+ .quad sys_fstat /* (old)fstat */
+ .quad sys_pause
+ .quad compat_sys_utime /* 30 */
+ .quad quiet_ni_syscall /* old stty syscall holder */
+ .quad quiet_ni_syscall /* old gtty syscall holder */
+ .quad sys_access
+ .quad sys_nice
+ .quad quiet_ni_syscall /* 35 */ /* old ftime syscall holder */
+ .quad sys_sync
+ .quad sys32_kill
+ .quad sys_rename
+ .quad sys_mkdir
+ .quad sys_rmdir /* 40 */
+ .quad sys_dup
+ .quad sys32_pipe
+ .quad compat_sys_times
+ .quad quiet_ni_syscall /* old prof syscall holder */
+ .quad sys_brk /* 45 */
+ .quad sys_setgid16
+ .quad sys_getgid16
+ .quad sys_signal
+ .quad sys_geteuid16
+ .quad sys_getegid16 /* 50 */
+ .quad sys_acct
+ .quad sys_umount /* new_umount */
+ .quad quiet_ni_syscall /* old lock syscall holder */
+ .quad compat_sys_ioctl
+ .quad compat_sys_fcntl64 /* 55 */
+ .quad quiet_ni_syscall /* old mpx syscall holder */
+ .quad sys_setpgid
+ .quad quiet_ni_syscall /* old ulimit syscall holder */
+ .quad sys32_olduname
+ .quad sys_umask /* 60 */
+ .quad sys_chroot
+ .quad sys32_ustat
+ .quad sys_dup2
+ .quad sys_getppid
+ .quad sys_getpgrp /* 65 */
+ .quad sys_setsid
+ .quad sys32_sigaction
+ .quad sys_sgetmask
+ .quad sys_ssetmask
+ .quad sys_setreuid16 /* 70 */
+ .quad sys_setregid16
+ .quad stub32_sigsuspend
+ .quad compat_sys_sigpending
+ .quad sys_sethostname
+ .quad compat_sys_setrlimit /* 75 */
+ .quad compat_sys_old_getrlimit /* old_getrlimit */
+ .quad compat_sys_getrusage
+ .quad sys32_gettimeofday
+ .quad sys32_settimeofday
+ .quad sys_getgroups16 /* 80 */
+ .quad sys_setgroups16
+ .quad sys32_old_select
+ .quad sys_symlink
+ .quad sys_lstat
+ .quad sys_readlink /* 85 */
+#ifdef CONFIG_IA32_AOUT
+ .quad sys_uselib
+#else
+ .quad quiet_ni_syscall
+#endif
+ .quad sys_swapon
+ .quad sys_reboot
+ .quad compat_sys_old_readdir
+ .quad sys32_mmap /* 90 */
+ .quad sys_munmap
+ .quad sys_truncate
+ .quad sys_ftruncate
+ .quad sys_fchmod
+ .quad sys_fchown16 /* 95 */
+ .quad sys_getpriority
+ .quad sys_setpriority
+ .quad quiet_ni_syscall /* old profil syscall holder */
+ .quad compat_sys_statfs
+ .quad compat_sys_fstatfs /* 100 */
+ .quad sys_ioperm
+ .quad compat_sys_socketcall
+ .quad sys_syslog
+ .quad compat_sys_setitimer
+ .quad compat_sys_getitimer /* 105 */
+ .quad compat_sys_newstat
+ .quad compat_sys_newlstat
+ .quad compat_sys_newfstat
+ .quad sys32_uname
+ .quad stub32_iopl /* 110 */
+ .quad sys_vhangup
+ .quad quiet_ni_syscall /* old "idle" system call */
+ .quad sys32_vm86_warning /* vm86old */
+ .quad compat_sys_wait4
+ .quad sys_swapoff /* 115 */
+ .quad sys32_sysinfo
+ .quad sys32_ipc
+ .quad sys_fsync
+ .quad stub32_sigreturn
+ .quad stub32_clone /* 120 */
+ .quad sys_setdomainname
+ .quad sys_uname
+ .quad sys_modify_ldt
+ .quad sys32_adjtimex
+ .quad sys32_mprotect /* 125 */
+ .quad compat_sys_sigprocmask
+ .quad quiet_ni_syscall /* create_module */
+ .quad sys_init_module
+ .quad sys_delete_module
+ .quad quiet_ni_syscall /* 130 get_kernel_syms */
+ .quad sys_quotactl
+ .quad sys_getpgid
+ .quad sys_fchdir
+ .quad quiet_ni_syscall /* bdflush */
+ .quad sys_sysfs /* 135 */
+ .quad sys_personality
+ .quad quiet_ni_syscall /* for afs_syscall */
+ .quad sys_setfsuid16
+ .quad sys_setfsgid16
+ .quad sys_llseek /* 140 */
+ .quad compat_sys_getdents
+ .quad compat_sys_select
+ .quad sys_flock
+ .quad sys_msync
+ .quad compat_sys_readv /* 145 */
+ .quad compat_sys_writev
+ .quad sys_getsid
+ .quad sys_fdatasync
+ .quad sys32_sysctl /* sysctl */
+ .quad sys_mlock /* 150 */
+ .quad sys_munlock
+ .quad sys_mlockall
+ .quad sys_munlockall
+ .quad sys_sched_setparam
+ .quad sys_sched_getparam /* 155 */
+ .quad sys_sched_setscheduler
+ .quad sys_sched_getscheduler
+ .quad sys_sched_yield
+ .quad sys_sched_get_priority_max
+ .quad sys_sched_get_priority_min /* 160 */
+ .quad sys_sched_rr_get_interval
+ .quad compat_sys_nanosleep
+ .quad sys_mremap
+ .quad sys_setresuid16
+ .quad sys_getresuid16 /* 165 */
+ .quad sys32_vm86_warning /* vm86 */
+ .quad quiet_ni_syscall /* query_module */
+ .quad sys_poll
+ .quad compat_sys_nfsservctl
+ .quad sys_setresgid16 /* 170 */
+ .quad sys_getresgid16
+ .quad sys_prctl
+ .quad stub32_rt_sigreturn
+ .quad sys32_rt_sigaction
+ .quad sys32_rt_sigprocmask /* 175 */
+ .quad sys32_rt_sigpending
+ .quad compat_sys_rt_sigtimedwait
+ .quad sys32_rt_sigqueueinfo
+ .quad stub32_rt_sigsuspend
+ .quad sys32_pread /* 180 */
+ .quad sys32_pwrite
+ .quad sys_chown16
+ .quad sys_getcwd
+ .quad sys_capget
+ .quad sys_capset
+ .quad stub32_sigaltstack
+ .quad sys32_sendfile
+ .quad quiet_ni_syscall /* streams1 */
+ .quad quiet_ni_syscall /* streams2 */
+ .quad stub32_vfork /* 190 */
+ .quad compat_sys_getrlimit
+ .quad sys32_mmap2
+ .quad sys32_truncate64
+ .quad sys32_ftruncate64
+ .quad sys32_stat64 /* 195 */
+ .quad sys32_lstat64
+ .quad sys32_fstat64
+ .quad sys_lchown
+ .quad sys_getuid
+ .quad sys_getgid /* 200 */
+ .quad sys_geteuid
+ .quad sys_getegid
+ .quad sys_setreuid
+ .quad sys_setregid
+ .quad sys_getgroups /* 205 */
+ .quad sys_setgroups
+ .quad sys_fchown
+ .quad sys_setresuid
+ .quad sys_getresuid
+ .quad sys_setresgid /* 210 */
+ .quad sys_getresgid
+ .quad sys_chown
+ .quad sys_setuid
+ .quad sys_setgid
+ .quad sys_setfsuid /* 215 */
+ .quad sys_setfsgid
+ .quad sys_pivot_root
+ .quad sys_mincore
+ .quad sys_madvise
+ .quad compat_sys_getdents64 /* 220 getdents64 */
+ .quad compat_sys_fcntl64
+ .quad quiet_ni_syscall /* tux */
+ .quad quiet_ni_syscall /* security */
+ .quad sys_gettid
+ .quad sys_readahead /* 225 */
+ .quad sys_setxattr
+ .quad sys_lsetxattr
+ .quad sys_fsetxattr
+ .quad sys_getxattr
+ .quad sys_lgetxattr /* 230 */
+ .quad sys_fgetxattr
+ .quad sys_listxattr
+ .quad sys_llistxattr
+ .quad sys_flistxattr
+ .quad sys_removexattr /* 235 */
+ .quad sys_lremovexattr
+ .quad sys_fremovexattr
+ .quad sys_tkill
+ .quad sys_sendfile64
+ .quad compat_sys_futex /* 240 */
+ .quad compat_sys_sched_setaffinity
+ .quad compat_sys_sched_getaffinity
+ .quad sys32_set_thread_area
+ .quad sys32_get_thread_area
+ .quad compat_sys_io_setup /* 245 */
+ .quad sys_io_destroy
+ .quad compat_sys_io_getevents
+ .quad compat_sys_io_submit
+ .quad sys_io_cancel
+ .quad sys_fadvise64 /* 250 */
+ .quad quiet_ni_syscall /* free_huge_pages */
+ .quad sys_exit_group
+ .quad sys32_lookup_dcookie
+ .quad sys_epoll_create
+ .quad sys_epoll_ctl /* 255 */
+ .quad sys_epoll_wait
+ .quad sys_remap_file_pages
+ .quad sys_set_tid_address
+ .quad sys32_timer_create
+ .quad compat_sys_timer_settime /* 260 */
+ .quad compat_sys_timer_gettime
+ .quad sys_timer_getoverrun
+ .quad sys_timer_delete
+ .quad compat_sys_clock_settime
+ .quad compat_sys_clock_gettime /* 265 */
+ .quad compat_sys_clock_getres
+ .quad compat_sys_clock_nanosleep
+ .quad compat_sys_statfs64
+ .quad compat_sys_fstatfs64
+ .quad sys_tgkill /* 270 */
+ .quad compat_sys_utimes
+ .quad sys32_fadvise64_64
+ .quad quiet_ni_syscall /* sys_vserver */
+ .quad sys_mbind
+ .quad compat_sys_get_mempolicy /* 275 */
+ .quad sys_set_mempolicy
+ .quad compat_sys_mq_open
+ .quad sys_mq_unlink
+ .quad compat_sys_mq_timedsend
+ .quad compat_sys_mq_timedreceive /* 280 */
+ .quad compat_sys_mq_notify
+ .quad compat_sys_mq_getsetattr
+ .quad quiet_ni_syscall /* reserved for kexec */
+ .quad sys32_waitid
+ .quad quiet_ni_syscall /* sys_altroot */
+ .quad sys_add_key
+ .quad sys_request_key
+ .quad sys_keyctl
+ /* don't forget to change IA32_NR_syscalls */
+ia32_syscall_end:
+ .rept IA32_NR_syscalls-(ia32_syscall_end-ia32_sys_call_table)/8
+ .quad ni_syscall
+ .endr
diff --git a/linux-2.6.11-xen-sparse/arch/xen/x86_64/ia32/syscall32.c b/linux-2.6.11-xen-sparse/arch/xen/x86_64/ia32/syscall32.c
new file mode 100644
index 0000000000..0f97d1cf54
--- /dev/null
+++ b/linux-2.6.11-xen-sparse/arch/xen/x86_64/ia32/syscall32.c
@@ -0,0 +1,143 @@
+/* Copyright 2002,2003 Andi Kleen, SuSE Labs */
+
+/* vsyscall handling for 32bit processes. Map a stub page into it
+ on demand because 32bit cannot reach the kernel's fixmaps */
+
+#include <linux/mm.h>
+#include <linux/string.h>
+#include <linux/kernel.h>
+#include <linux/gfp.h>
+#include <linux/init.h>
+#include <linux/stringify.h>
+#include <asm/proto.h>
+#include <asm/tlbflush.h>
+#include <asm/ia32_unistd.h>
+
+#define USE_INT80
+
+#ifdef USE_INT80
+/* 32bit VDSOs mapped into user space. */
+asm(".section \".init.data\",\"aw\"\n"
+ "syscall32_int80:\n"
+ ".incbin \"arch/xen/x86_64/ia32/vsyscall-int80.so\"\n"
+ "syscall32_int80_end:\n"
+ "syscall32_syscall:\n"
+ ".incbin \"arch/xen/x86_64/ia32/vsyscall-syscall.so\"\n"
+ "syscall32_syscall_end:\n"
+ "syscall32_sysenter:\n"
+ ".incbin \"arch/xen/x86_64/ia32/vsyscall-sysenter.so\"\n"
+ "syscall32_sysenter_end:\n"
+ ".previous");
+
+extern unsigned char syscall32_int80[], syscall32_int80_end[];
+#else
+/* 32bit VDSOs mapped into user space. */
+asm(".section \".init.data\",\"aw\"\n"
+ "syscall32_syscall:\n"
+ ".incbin \"arch/xen/x86_64/ia32/vsyscall-syscall.so\"\n"
+ "syscall32_syscall_end:\n"
+ "syscall32_sysenter:\n"
+ ".incbin \"arch/xen/x86_64/ia32/vsyscall-sysenter.so\"\n"
+ "syscall32_sysenter_end:\n"
+ ".previous");
+
+static int use_sysenter = -1;
+#endif
+
+extern unsigned char syscall32_syscall[], syscall32_syscall_end[];
+extern unsigned char syscall32_sysenter[], syscall32_sysenter_end[];
+extern int sysctl_vsyscall32;
+
+char *syscall32_page;
+
+/*
+ * Map the 32bit vsyscall page on demand.
+ *
+ * RED-PEN: This knows too much about high level VM.
+ *
+ * Alternative would be to generate a vma with appropriate backing options
+ * and let it be handled by generic VM.
+ */
+int __map_syscall32(struct mm_struct *mm, unsigned long address)
+{
+ pgd_t *pgd;
+ pud_t *pud;
+ pte_t *pte;
+ pmd_t *pmd;
+ int err = -ENOMEM;
+
+ spin_lock(&mm->page_table_lock);
+ pgd = pgd_offset(mm, address);
+ pud = pud_alloc(mm, pgd, address);
+ if (pud) {
+ pmd = pmd_alloc(mm, pud, address);
+ if (pmd && (pte = pte_alloc_map(mm, pmd, address)) != NULL) {
+ if (pte_none(*pte)) {
+ set_pte(pte,
+ mk_pte(virt_to_page(syscall32_page),
+ PAGE_KERNEL_VSYSCALL32));
+ }
+ /* Flush only the local CPU. Other CPUs taking a fault
+ will just end up here again
+ This probably not needed and just paranoia. */
+ __flush_tlb_one(address);
+ err = 0;
+ }
+ }
+ spin_unlock(&mm->page_table_lock);
+ return err;
+}
+
+int map_syscall32(struct mm_struct *mm, unsigned long address)
+{
+ int err;
+ down_read(&mm->mmap_sem);
+ err = __map_syscall32(mm, address);
+ up_read(&mm->mmap_sem);
+ return err;
+}
+
+static int __init init_syscall32(void)
+{
+ syscall32_page = (void *)get_zeroed_page(GFP_KERNEL);
+ if (!syscall32_page)
+ panic("Cannot allocate syscall32 page");
+ SetPageReserved(virt_to_page(syscall32_page));
+
+#ifdef USE_INT80
+ /*
+ * At this point we use int 0x80.
+ */
+ memcpy(syscall32_page, syscall32_int80,
+ syscall32_int80_end - syscall32_int80);
+#else
+
+ if (use_sysenter > 0) {
+ memcpy(syscall32_page, syscall32_sysenter,
+ syscall32_sysenter_end - syscall32_sysenter);
+ } else {
+ memcpy(syscall32_page, syscall32_syscall,
+ syscall32_syscall_end - syscall32_syscall);
+ }
+#endif
+ return 0;
+}
+
+__initcall(init_syscall32);
+
+/* May not be __init: called during resume */
+void syscall32_cpu_init(void)
+{
+#ifndef USE_INT80
+ if (use_sysenter < 0)
+ use_sysenter = (boot_cpu_data.x86_vendor == X86_VENDOR_INTEL);
+
+ /* Load these always in case some future AMD CPU supports
+ SYSENTER from compat mode too. */
+ checking_wrmsrl(MSR_IA32_SYSENTER_CS, (u64)(__KERNEL_CS | 3));
+ checking_wrmsrl(MSR_IA32_SYSENTER_ESP, 0ULL);
+ checking_wrmsrl(MSR_IA32_SYSENTER_EIP, (u64)ia32_sysenter_target);
+
+ wrmsrl(MSR_CSTAR, ia32_cstar_target);
+#endif
+}
diff --git a/linux-2.6.11-xen-sparse/arch/xen/x86_64/ia32/vsyscall-int80.S b/linux-2.6.11-xen-sparse/arch/xen/x86_64/ia32/vsyscall-int80.S
new file mode 100644
index 0000000000..6ed66bd8ad
--- /dev/null
+++ b/linux-2.6.11-xen-sparse/arch/xen/x86_64/ia32/vsyscall-int80.S
@@ -0,0 +1,57 @@
+/*
+ * Code for the vsyscall page. This version uses the old int $0x80 method.
+ *
+ * NOTE:
+ * 1) __kernel_vsyscall _must_ be first in this page.
+ * 2) there are alignment constraints on this stub, see vsyscall-sigreturn.S
+ * for details.
+ */
+#include <asm/ia32_unistd.h>
+#include <asm/offset.h>
+
+ .text
+ .section .text.vsyscall,"ax"
+ .globl __kernel_vsyscall
+ .type __kernel_vsyscall,@function
+__kernel_vsyscall:
+.LSTART_vsyscall:
+ int $0x80
+ ret
+.LEND_vsyscall:
+ .size __kernel_vsyscall,.-.LSTART_vsyscall
+ .previous
+
+ .section .eh_frame,"a",@progbits
+.LSTARTFRAME:
+ .long .LENDCIE-.LSTARTCIE
+.LSTARTCIE:
+ .long 0 /* CIE ID */
+ .byte 1 /* Version number */
+ .string "zR" /* NUL-terminated augmentation string */
+ .uleb128 1 /* Code alignment factor */
+ .sleb128 -4 /* Data alignment factor */
+ .byte 8 /* Return address register column */
+ .uleb128 1 /* Augmentation value length */
+ .byte 0x1b /* DW_EH_PE_pcrel|DW_EH_PE_sdata4. */
+ .byte 0x0c /* DW_CFA_def_cfa */
+ .uleb128 4
+ .uleb128 4
+ .byte 0x88 /* DW_CFA_offset, column 0x8 */
+ .uleb128 1
+ .align 4
+.LENDCIE:
+
+ .long .LENDFDE1-.LSTARTFDE1 /* Length FDE */
+.LSTARTFDE1:
+ .long .LSTARTFDE1-.LSTARTFRAME /* CIE pointer */
+ .long .LSTART_vsyscall-. /* PC-relative start address */
+ .long .LEND_vsyscall-.LSTART_vsyscall
+ .uleb128 0 /* Augmentation length */
+ .align 4
+.LENDFDE1:
+
+/*
+ * Get the common code for the sigreturn entry points.
+ */
+#define SYSCALL_ENTER_KERNEL int $0x80
+#include "vsyscall-sigreturn.S"
diff --git a/linux-2.6.11-xen-sparse/arch/xen/x86_64/kernel/entry.S b/linux-2.6.11-xen-sparse/arch/xen/x86_64/kernel/entry.S
index e01fdd064a..0285860d73 100644
--- a/linux-2.6.11-xen-sparse/arch/xen/x86_64/kernel/entry.S
+++ b/linux-2.6.11-xen-sparse/arch/xen/x86_64/kernel/entry.S
@@ -50,43 +50,8 @@
#include "irq_vectors.h"
-EVENT_MASK = (CS+4)
-VGCF_IN_SYSCALL = (1<<8)
-
-/*
- * Copied from arch/xen/i386/kernel/entry.S
- */
-/* Offsets into shared_info_t. */
-#define evtchn_upcall_pending 0
-#define evtchn_upcall_mask 1
-
-#define sizeof_vcpu_shift 3
-
-#ifdef CONFIG_SMP
-#define preempt_disable(reg) incl threadinfo_preempt_count(reg)
-#define preempt_enable(reg) decl threadinfo_preempt_count(reg)
-#define XEN_GET_VCPU_INFO(reg) preempt_disable(%rbp) ; \
- movq %gs:pda_cpunumber,reg ; \
- shl $sizeof_vcpu_shift,reg ; \
- addq HYPERVISOR_shared_info,reg
-#define XEN_PUT_VCPU_INFO(reg) preempt_enable(%rbp) ; \
-#define XEN_PUT_VCPU_INFO_fixup .byte 0xff,0xff,0xff
-#else
-#define XEN_GET_VCPU_INFO(reg) movq HYPERVISOR_shared_info,reg
-#define XEN_PUT_VCPU_INFO(reg)
-#define XEN_PUT_VCPU_INFO_fixup
-#endif
-
-#define XEN_LOCKED_BLOCK_EVENTS(reg) movb $1,evtchn_upcall_mask(reg)
-#define XEN_LOCKED_UNBLOCK_EVENTS(reg) movb $0,evtchn_upcall_mask(reg)
-#define XEN_BLOCK_EVENTS(reg) XEN_GET_VCPU_INFO(reg) ; \
- XEN_LOCKED_BLOCK_EVENTS(reg) ; \
- XEN_PUT_VCPU_INFO(reg)
-#define XEN_UNBLOCK_EVENTS(reg) XEN_GET_VCPU_INFO(reg) ; \
- XEN_LOCKED_UNBLOCK_EVENTS(reg) ; \
- XEN_PUT_VCPU_INFO(reg)
-#define XEN_TEST_PENDING(reg) testb $0xFF,evtchn_upcall_pending(reg)
-
+#include "xen_entry.S"
+
.code64
#ifdef CONFIG_PREEMPT
@@ -172,8 +137,6 @@ VGCF_IN_SYSCALL = (1<<8)
movq %r11,1*8(%rsp)
movq %rcx,2*8(%rsp) # we saved %rcx upon exceptions
movq $\flag,3*8(%rsp)
- movq $__USER_CS,5*8(%rsp)
- movq $__USER_DS,8*8(%rsp)
movq $__HYPERVISOR_switch_to_user,%rax
syscall
.endm
diff --git a/linux-2.6.11-xen-sparse/arch/xen/x86_64/kernel/ldt.c b/linux-2.6.11-xen-sparse/arch/xen/x86_64/kernel/ldt.c
index 9111fb80d0..855128a4b2 100644
--- a/linux-2.6.11-xen-sparse/arch/xen/x86_64/kernel/ldt.c
+++ b/linux-2.6.11-xen-sparse/arch/xen/x86_64/kernel/ldt.c
@@ -62,7 +62,6 @@ static int alloc_ldt(mm_context_t *pc, unsigned mincount, int reload)
if (reload) {
#ifdef CONFIG_SMP
cpumask_t mask;
-
preempt_disable();
#endif
make_pages_readonly(pc->ldt, (pc->size * LDT_ENTRY_SIZE) /
@@ -73,8 +72,6 @@ static int alloc_ldt(mm_context_t *pc, unsigned mincount, int reload)
if (!cpus_equal(current->mm->cpu_vm_mask, mask))
smp_call_function(flush_ldt, NULL, 1, 1);
preempt_enable();
-#else
- load_LDT(pc);
#endif
}
if (oldsize) {
@@ -188,13 +185,12 @@ static int write_ldt(void __user * ptr, unsigned long bytecount, int oldmode)
{
struct task_struct *me = current;
struct mm_struct * mm = me->mm;
- unsigned long entry = 0, *lp;
+ __u32 entry_1, entry_2, *lp;
unsigned long mach_lp;
int error;
struct user_desc ldt_info;
error = -EINVAL;
-
if (bytecount != sizeof(ldt_info))
goto out;
error = -EFAULT;
@@ -218,26 +214,26 @@ static int write_ldt(void __user * ptr, unsigned long bytecount, int oldmode)
goto out_unlock;
}
- lp = (unsigned long *)((ldt_info.entry_number << 3) + (char *) mm->context.ldt);
- mach_lp = arbitrary_virt_to_machine(lp);
+ lp = (__u32 *) ((ldt_info.entry_number << 3) + (char *) mm->context.ldt);
+ mach_lp = arbitrary_virt_to_machine(lp);
/* Allow LDTs to be cleared by the user. */
if (ldt_info.base_addr == 0 && ldt_info.limit == 0) {
if (oldmode || LDT_empty(&ldt_info)) {
- entry = 0;
+ entry_1 = 0;
+ entry_2 = 0;
goto install;
}
}
-#if 0
- entry = LDT_entry(&ldt_info);
-#endif
+ entry_1 = LDT_entry_a(&ldt_info);
+ entry_2 = LDT_entry_b(&ldt_info);
if (oldmode)
- entry &= ~(1 << 20);
+ entry_2 &= ~(1 << 20);
/* Install the new entry ... */
install:
- error = HYPERVISOR_update_descriptor(mach_lp, entry);
+ error = HYPERVISOR_update_descriptor(mach_lp, (unsigned long)((entry_1 | (unsigned long) entry_2 << 32)));
out_unlock:
up(&mm->context.sem);
diff --git a/linux-2.6.11-xen-sparse/arch/xen/x86_64/kernel/traps.c b/linux-2.6.11-xen-sparse/arch/xen/x86_64/kernel/traps.c
index 973d7350c6..8502089472 100644
--- a/linux-2.6.11-xen-sparse/arch/xen/x86_64/kernel/traps.c
+++ b/linux-2.6.11-xen-sparse/arch/xen/x86_64/kernel/traps.c
@@ -931,6 +931,9 @@ static trap_info_t trap_table[] = {
#endif
{ 19, 0, (__KERNEL_CS|0x3), 0, (unsigned long)simd_coprocessor_error },
{ SYSCALL_VECTOR, 3, (__KERNEL_CS|0x3), 0, (unsigned long)system_call },
+#ifdef CONFIG_IA32_EMULATION
+ { IA32_SYSCALL_VECTOR, 3, (__KERNEL_CS|0x3), 0, (unsigned long)ia32_syscall},
+#endif
{ 0, 0, 0, 0, 0 }
};
diff --git a/linux-2.6.11-xen-sparse/arch/xen/x86_64/kernel/xen_entry.S b/linux-2.6.11-xen-sparse/arch/xen/x86_64/kernel/xen_entry.S
new file mode 100644
index 0000000000..e2bd1b561b
--- /dev/null
+++ b/linux-2.6.11-xen-sparse/arch/xen/x86_64/kernel/xen_entry.S
@@ -0,0 +1,38 @@
+/*
+ * Copied from arch/xen/i386/kernel/entry.S
+ */
+/* Offsets into shared_info_t. */
+#define evtchn_upcall_pending 0
+#define evtchn_upcall_mask 1
+
+#define sizeof_vcpu_shift 3
+
+#ifdef CONFIG_SMP
+#define preempt_disable(reg) incl threadinfo_preempt_count(reg)
+#define preempt_enable(reg) decl threadinfo_preempt_count(reg)
+#define XEN_GET_VCPU_INFO(reg) preempt_disable(%rbp) ; \
+ movq %gs:pda_cpunumber,reg ; \
+ shl $sizeof_vcpu_shift,reg ; \
+ addq HYPERVISOR_shared_info,reg
+#define XEN_PUT_VCPU_INFO(reg) preempt_enable(%rbp) ; \
+#define XEN_PUT_VCPU_INFO_fixup .byte 0xff,0xff,0xff
+#else
+#define XEN_GET_VCPU_INFO(reg) movq HYPERVISOR_shared_info,reg
+#define XEN_PUT_VCPU_INFO(reg)
+#define XEN_PUT_VCPU_INFO_fixup
+#endif
+
+#define XEN_LOCKED_BLOCK_EVENTS(reg) movb $1,evtchn_upcall_mask(reg)
+#define XEN_LOCKED_UNBLOCK_EVENTS(reg) movb $0,evtchn_upcall_mask(reg)
+#define XEN_BLOCK_EVENTS(reg) XEN_GET_VCPU_INFO(reg) ; \
+ XEN_LOCKED_BLOCK_EVENTS(reg) ; \
+ XEN_PUT_VCPU_INFO(reg)
+#define XEN_UNBLOCK_EVENTS(reg) XEN_GET_VCPU_INFO(reg) ; \
+ XEN_LOCKED_UNBLOCK_EVENTS(reg) ; \
+ XEN_PUT_VCPU_INFO(reg)
+#define XEN_TEST_PENDING(reg) testb $0xFF,evtchn_upcall_pending(reg)
+
+EVENT_MASK = (CS+4)
+VGCF_IN_SYSCALL = (1<<8)
+
+
diff --git a/linux-2.6.11-xen-sparse/arch/xen/x86_64/mm/fault.c b/linux-2.6.11-xen-sparse/arch/xen/x86_64/mm/fault.c
index 636cd1873f..8b42292232 100644
--- a/linux-2.6.11-xen-sparse/arch/xen/x86_64/mm/fault.c
+++ b/linux-2.6.11-xen-sparse/arch/xen/x86_64/mm/fault.c
@@ -563,6 +563,7 @@ no_context:
*/
out_of_memory:
up_read(&mm->mmap_sem);
+out_of_memory2:
if (current->pid == 1) {
yield();
goto again;
diff --git a/linux-2.6.11-xen-sparse/drivers/xen/netfront/netfront.c b/linux-2.6.11-xen-sparse/drivers/xen/netfront/netfront.c
index 208b00353f..0ae6f7a40a 100644
--- a/linux-2.6.11-xen-sparse/drivers/xen/netfront/netfront.c
+++ b/linux-2.6.11-xen-sparse/drivers/xen/netfront/netfront.c
@@ -1343,7 +1343,7 @@ static int xennet_proc_read(
{
struct net_device *dev = (struct net_device *)((unsigned long)data & ~3UL);
struct net_private *np = netdev_priv(dev);
- int len = 0, which_target = (unsigned long) data & 3;
+ int len = 0, which_target = (long)data & 3;
switch (which_target)
{
@@ -1368,7 +1368,7 @@ static int xennet_proc_write(
{
struct net_device *dev = (struct net_device *)((unsigned long)data & ~3UL);
struct net_private *np = netdev_priv(dev);
- int which_target = (unsigned long) data & 3;
+ int which_target = (long)data & 3;
char string[64];
long target;
diff --git a/linux-2.6.11-xen-sparse/include/asm-xen/asm-x86_64/mmu_context.h b/linux-2.6.11-xen-sparse/include/asm-xen/asm-x86_64/mmu_context.h
index 4e487a06d6..dc194ff88e 100644
--- a/linux-2.6.11-xen-sparse/include/asm-xen/asm-x86_64/mmu_context.h
+++ b/linux-2.6.11-xen-sparse/include/asm-xen/asm-x86_64/mmu_context.h
@@ -33,20 +33,40 @@ static inline void switch_mm(struct mm_struct *prev, struct mm_struct *next,
struct task_struct *tsk)
{
unsigned cpu = smp_processor_id();
+ struct mmuext_op _op[3], *op = _op;
+
if (likely(prev != next)) {
/* stop flush ipis for the previous mm */
clear_bit(cpu, &prev->cpu_vm_mask);
-#ifdef CONFIG_SMP
+#if 0 /* XEN: no lazy tlb */
write_pda(mmu_state, TLBSTATE_OK);
write_pda(active_mm, next);
#endif
set_bit(cpu, &next->cpu_vm_mask);
- load_cr3(next->pgd);
- xen_new_user_pt(__pa(__user_pgd(next->pgd)));
- if (unlikely(next->context.ldt != prev->context.ldt))
- load_LDT_nolock(&next->context, cpu);
+
+ /* load_cr3(next->pgd) */
+ per_cpu(cur_pgd, smp_processor_id()) = next->pgd;
+ op->cmd = MMUEXT_NEW_BASEPTR;
+ op->mfn = pfn_to_mfn(__pa(next->pgd) >> PAGE_SHIFT);
+ op++;
+
+ /* xen_new_user_pt(__pa(__user_pgd(next->pgd))) */
+ op->cmd = MMUEXT_NEW_USER_BASEPTR;
+ op->mfn = pfn_to_mfn(__pa(__user_pgd(next->pgd)) >> PAGE_SHIFT);
+ op++;
+
+ if (unlikely(next->context.ldt != prev->context.ldt)) {
+ /* load_LDT_nolock(&next->context, cpu) */
+ op->cmd = MMUEXT_SET_LDT;
+ op->linear_addr = (unsigned long)next->context.ldt;
+ op->nr_ents = next->context.size;
+ op++;
+ }
+
+ BUG_ON(HYPERVISOR_mmuext_op(_op, op-_op, NULL, DOMID_SELF));
}
-#ifdef CONFIG_SMP
+
+#if 0 /* XEN: no lazy tlb */
else {
write_pda(mmu_state, TLBSTATE_OK);
if (read_pda(active_mm) != next)
diff --git a/linux-2.6.11-xen-sparse/include/asm-xen/asm-x86_64/pgtable.h b/linux-2.6.11-xen-sparse/include/asm-xen/asm-x86_64/pgtable.h
index 5b86bd7978..9745edc23b 100644
--- a/linux-2.6.11-xen-sparse/include/asm-xen/asm-x86_64/pgtable.h
+++ b/linux-2.6.11-xen-sparse/include/asm-xen/asm-x86_64/pgtable.h
@@ -30,7 +30,20 @@ extern void clear_kernel_mapping(unsigned long addr, unsigned long size);
extern unsigned long pgkern_mask;
-#define arbitrary_virt_to_machine(__va) ({0;})
+#define virt_to_ptep(__va) \
+({ \
+ pgd_t *__pgd = pgd_offset_k((unsigned long)(__va)); \
+ pud_t *__pud = pud_offset(__pgd, (unsigned long)(__va)); \
+ pmd_t *__pmd = pmd_offset(__pud, (unsigned long)(__va)); \
+ pte_offset_kernel(__pmd, (unsigned long)(__va)); \
+})
+
+#define arbitrary_virt_to_machine(__va) \
+({ \
+ pte_t *__pte = virt_to_ptep(__va); \
+ unsigned long __pa = (*(unsigned long *)__pte) & PAGE_MASK; \
+ __pa | ((unsigned long)(__va) & (PAGE_SIZE-1)); \
+})
/*
* ZERO_PAGE is a global shared page that is always zero: used
@@ -210,6 +223,7 @@ static inline pte_t ptep_get_and_clear(pte_t *xp)
#define PAGE_KERNEL_EXEC MAKE_GLOBAL(__PAGE_KERNEL_EXEC)
#define PAGE_KERNEL_RO MAKE_GLOBAL(__PAGE_KERNEL_RO)
#define PAGE_KERNEL_NOCACHE MAKE_GLOBAL(__PAGE_KERNEL_NOCACHE)
+#define PAGE_KERNEL_VSYSCALL32 __pgprot(__PAGE_KERNEL_VSYSCALL)
#define PAGE_KERNEL_VSYSCALL MAKE_GLOBAL(__PAGE_KERNEL_VSYSCALL)
#define PAGE_KERNEL_LARGE MAKE_GLOBAL(__PAGE_KERNEL_LARGE)
#define PAGE_KERNEL_VSYSCALL_NOCACHE MAKE_GLOBAL(__PAGE_KERNEL_VSYSCALL_NOCACHE)
diff --git a/tools/ioemu/exec.c b/tools/ioemu/exec.c
index c49975c3c1..e9cdf243e2 100644
--- a/tools/ioemu/exec.c
+++ b/tools/ioemu/exec.c
@@ -386,6 +386,9 @@ void cpu_physical_memory_rw(target_phys_addr_t addr, uint8_t *buf,
io_mem_write[io_index][1](io_mem_opaque[io_index], addr, val);
l = 2;
} else {
+ if (l!=1){
+ fprintf(logfile, "ERROR 8 bit mmio\n");
+ }
/* 8 bit access */
val = ldub_raw(buf);
io_mem_write[io_index][0](io_mem_opaque[io_index], addr, val);
diff --git a/tools/libxc/xc.h b/tools/libxc/xc.h
index 09eff6675f..46802edfb2 100644
--- a/tools/libxc/xc.h
+++ b/tools/libxc/xc.h
@@ -25,6 +25,7 @@ typedef int64_t s64;
#include <xen/dom0_ops.h>
#include <xen/event_channel.h>
#include <xen/sched_ctl.h>
+#include <xen/acm.h>
/*
* DEFINITIONS FOR CPU BARRIERS
diff --git a/tools/libxc/xc_domain.c b/tools/libxc/xc_domain.c
index 2edf11c39d..5cd6b43e8e 100644
--- a/tools/libxc/xc_domain.c
+++ b/tools/libxc/xc_domain.c
@@ -79,6 +79,8 @@ int xc_domain_getinfo(int xc_handle,
dom0_op_t op;
int rc = 0;
+ memset(info, 0, max_doms*sizeof(xc_dominfo_t));
+
for ( nr_doms = 0; nr_doms < max_doms; nr_doms++ )
{
op.cmd = DOM0_GETDOMAININFO;
@@ -128,7 +130,7 @@ int xc_domain_get_vcpu_context(int xc_handle,
u32 vcpu,
vcpu_guest_context_t *ctxt)
{
- int rc, errno_saved;
+ int rc;
dom0_op_t op;
op.cmd = DOM0_GETVCPUCONTEXT;
@@ -143,11 +145,7 @@ int xc_domain_get_vcpu_context(int xc_handle,
rc = do_dom0_op(xc_handle, &op);
if ( ctxt != NULL )
- {
- errno_saved = errno;
- (void)munlock(ctxt, sizeof(*ctxt));
- errno = errno_saved;
- }
+ safe_munlock(ctxt, sizeof(*ctxt));
if ( rc > 0 )
return -ESRCH;
diff --git a/tools/libxc/xc_evtchn.c b/tools/libxc/xc_evtchn.c
index 1c0294d83b..74e8468dd5 100644
--- a/tools/libxc/xc_evtchn.c
+++ b/tools/libxc/xc_evtchn.c
@@ -26,7 +26,7 @@ static int do_evtchn_op(int xc_handle, evtchn_op_t *op)
if ((ret = do_xen_hypercall(xc_handle, &hypercall)) < 0)
ERROR("do_evtchn_op: HYPERVISOR_event_channel_op failed: %d", ret);
- (void)munlock(op, sizeof(*op));
+ safe_munlock(op, sizeof(*op));
out:
return ret;
}
diff --git a/tools/libxc/xc_gnttab.c b/tools/libxc/xc_gnttab.c
index ad23e68013..409539cb14 100644
--- a/tools/libxc/xc_gnttab.c
+++ b/tools/libxc/xc_gnttab.c
@@ -33,7 +33,7 @@ do_gnttab_op( int xc_handle,
if ( (ret = do_xen_hypercall(xc_handle, &hypercall)) < 0 )
ERROR("do_gnttab_op: HYPERVISOR_grant_table_op failed: %d", ret);
- (void)munlock(op, sizeof(*op));
+ safe_munlock(op, sizeof(*op));
out:
return ret;
}
diff --git a/tools/libxc/xc_linux_build.c b/tools/libxc/xc_linux_build.c
index 660c30a3f1..3692dbd5e5 100644
--- a/tools/libxc/xc_linux_build.c
+++ b/tools/libxc/xc_linux_build.c
@@ -527,8 +527,6 @@ int xc_linux_build(int xc_handle,
if ( image != NULL )
free(image);
- ctxt->flags = 0;
-
/*
* Initial register values:
* DS,ES,FS,GS = FLAT_KERNEL_DS
diff --git a/tools/libxc/xc_misc.c b/tools/libxc/xc_misc.c
index 40291bc3ef..ac306b1585 100644
--- a/tools/libxc/xc_misc.c
+++ b/tools/libxc/xc_misc.c
@@ -43,7 +43,7 @@ int xc_readconsolering(int xc_handle,
*pnr_chars = op.u.readconsole.count;
}
- (void)munlock(buffer, nr_chars);
+ safe_munlock(buffer, nr_chars);
return ret;
}
diff --git a/tools/libxc/xc_private.c b/tools/libxc/xc_private.c
index 87e5ecd1f3..fe8f42c1c8 100644
--- a/tools/libxc/xc_private.c
+++ b/tools/libxc/xc_private.c
@@ -22,8 +22,10 @@ void *xc_map_foreign_batch(int xc_handle, u32 dom, int prot,
ioctlx.arr=arr;
if ( ioctl( xc_handle, IOCTL_PRIVCMD_MMAPBATCH, &ioctlx ) < 0 )
{
+ int saved_errno = errno;
perror("XXXXXXXX");
- munmap(addr, num*PAGE_SIZE);
+ (void)munmap(addr, num*PAGE_SIZE);
+ errno = saved_errno;
return NULL;
}
return addr;
@@ -51,7 +53,9 @@ void *xc_map_foreign_range(int xc_handle, u32 dom,
entry.npages=(size+PAGE_SIZE-1)>>PAGE_SHIFT;
if ( ioctl( xc_handle, IOCTL_PRIVCMD_MMAP, &ioctlx ) < 0 )
{
- munmap(addr, size);
+ int saved_errno = errno;
+ (void)munmap(addr, size);
+ errno = saved_errno;
return NULL;
}
return addr;
@@ -134,8 +138,8 @@ static int flush_mmu_updates(int xc_handle, mmu_t *mmu)
}
mmu->idx = 0;
-
- (void)munlock(mmu->updates, sizeof(mmu->updates));
+
+ safe_munlock(mmu->updates, sizeof(mmu->updates));
out:
return err;
@@ -232,7 +236,7 @@ int xc_get_pfn_list(int xc_handle,
ret = do_dom0_op(xc_handle, &op);
- (void)munlock(pfn_buf, max_pfns * sizeof(unsigned long));
+ safe_munlock(pfn_buf, max_pfns * sizeof(unsigned long));
#if 0
#ifdef DEBUG
diff --git a/tools/libxc/xc_private.h b/tools/libxc/xc_private.h
index baf1e5f26d..c50813ee3c 100644
--- a/tools/libxc/xc_private.h
+++ b/tools/libxc/xc_private.h
@@ -101,12 +101,28 @@ struct load_funcs
loadimagefunc loadimage;
};
-#define ERROR(_m, _a...) \
- fprintf(stderr, "ERROR: " _m "\n" , ## _a )
-
-#define PERROR(_m, _a...) \
- fprintf(stderr, "ERROR: " _m " (%d = %s)\n" , ## _a , \
- errno, strerror(errno))
+#define ERROR(_m, _a...) \
+do { \
+ int __saved_errno = errno; \
+ fprintf(stderr, "ERROR: " _m "\n" , ## _a ); \
+ errno = __saved_errno; \
+} while (0)
+
+
+#define PERROR(_m, _a...) \
+do { \
+ int __saved_errno = errno; \
+ fprintf(stderr, "ERROR: " _m " (%d = %s)\n" , ## _a , \
+ __saved_errno, strerror(__saved_errno)); \
+ errno = __saved_errno; \
+} while (0)
+
+static inline void safe_munlock(const void *addr, size_t len)
+{
+ int saved_errno = errno;
+ (void)munlock(addr, len);
+ errno = saved_errno;
+}
static inline int do_privcmd(int xc_handle,
unsigned int cmd,
@@ -125,7 +141,7 @@ static inline int do_xen_hypercall(int xc_handle,
static inline int do_dom0_op(int xc_handle, dom0_op_t *op)
{
- int ret = -1, errno_saved;
+ int ret = -1;
privcmd_hypercall_t hypercall;
op->interface_version = DOM0_INTERFACE_VERSION;
@@ -146,9 +162,7 @@ static inline int do_dom0_op(int xc_handle, dom0_op_t *op)
" rebuild the user-space tool set?\n");
}
- errno_saved = errno;
- (void)munlock(op, sizeof(*op));
- errno = errno_saved;
+ safe_munlock(op, sizeof(*op));
out1:
return ret;
@@ -163,7 +177,6 @@ static inline int do_dom_mem_op(int xc_handle,
{
privcmd_hypercall_t hypercall;
long ret = -EINVAL;
- int errno_saved;
hypercall.op = __HYPERVISOR_dom_mem_op;
hypercall.arg[0] = (unsigned long)memop;
@@ -186,11 +199,7 @@ static inline int do_dom_mem_op(int xc_handle,
}
if ( extent_list != NULL )
- {
- errno_saved = errno;
- (void)munlock(extent_list, nr_extents*sizeof(unsigned long));
- errno = errno_saved;
- }
+ safe_munlock(extent_list, nr_extents*sizeof(unsigned long));
out1:
return ret;
@@ -204,7 +213,6 @@ static inline int do_mmuext_op(
{
privcmd_hypercall_t hypercall;
long ret = -EINVAL;
- int errno_saved;
hypercall.op = __HYPERVISOR_mmuext_op;
hypercall.arg[0] = (unsigned long)op;
@@ -224,9 +232,7 @@ static inline int do_mmuext_op(
" rebuild the user-space tool set?\n",ret,errno);
}
- errno_saved = errno;
- (void)munlock(op, nr_ops*sizeof(*op));
- errno = errno_saved;
+ safe_munlock(op, nr_ops*sizeof(*op));
out1:
return ret;
diff --git a/tools/libxc/xc_ptrace.c b/tools/libxc/xc_ptrace.c
index 1db45a7bbb..a8b39a648d 100644
--- a/tools/libxc/xc_ptrace.c
+++ b/tools/libxc/xc_ptrace.c
@@ -3,6 +3,8 @@
#include "xc_private.h"
#include <time.h>
+#define X86_CR0_PE 0x00000001 /* Enable Protected Mode (RW) */
+#define X86_CR0_PG 0x80000000 /* Paging (RW) */
#define BSD_PAGE_MASK (PAGE_SIZE-1)
#define PG_FRAME (~((unsigned long)BSD_PAGE_MASK)
@@ -132,6 +134,13 @@ static int regs_valid[MAX_VIRT_CPUS];
static unsigned long cr3[MAX_VIRT_CPUS];
static vcpu_guest_context_t ctxt[MAX_VIRT_CPUS];
+static inline int paging_enabled(vcpu_guest_context_t *v)
+{
+ unsigned long cr0 = v->cr0;
+
+ return (cr0 & X86_CR0_PE) && (cr0 & X86_CR0_PG);
+}
+
/* --------------------- */
static void *
@@ -179,7 +188,7 @@ map_domain_va(unsigned long domid, int cpu, void * guest_va, int perm)
}
if ((pde = cr3_virt[cpu][vtopdi(va)]) == 0) /* logical address */
goto error_out;
- if (ctxt[cpu].flags & VGCF_VMX_GUEST)
+ if ((ctxt[cpu].flags & VGCF_VMX_GUEST) && paging_enabled(&ctxt[cpu]))
pde = page_array[pde >> PAGE_SHIFT] << PAGE_SHIFT;
if (pde != pde_phys[cpu])
{
diff --git a/tools/policy/policy_tool.c b/tools/policy/policy_tool.c
index 696a70c282..9c2f7a5a5b 100644
--- a/tools/policy/policy_tool.c
+++ b/tools/policy/policy_tool.c
@@ -14,7 +14,7 @@
*
* sHype policy management tool. This code runs in a domain and
* manages the Xen security policy by interacting with the
- * Xen access control module via a /proc/xen/policycmd proc-ioctl,
+ * Xen access control module via a /proc/xen/privcmd proc-ioctl,
* which is translated into a policy_op hypercall into Xen.
*
* todo: implement setpolicy to dynamically set a policy cache.
@@ -229,7 +229,6 @@ void acm_dump_policy_buffer(void *buf, int buflen) {
default:
printf("UNKNOWN POLICY!\n");
}
- printf("\nPolicy dump End.\n\n");
}
/*************************** set policy ****************************/
@@ -519,39 +518,35 @@ usage(char *progname){
int
main(int argc, char **argv) {
- int policycmd_fd;
+ int policycmd_fd, ret;
- if (argc < 2)
+ if (argc < 2)
usage(argv[0]);
if ((policycmd_fd = open("/proc/xen/privcmd", O_RDONLY)) <= 0) {
- printf("ERROR: Could not open xen policycmd device!\n");
+ printf("ERROR: Could not open xen privcmd device!\n");
exit(-1);
}
if (!strcmp(argv[1], "setpolicy")) {
if (argc != 2)
usage(argv[0]);
- acm_domain_setpolicy(policycmd_fd);
-
+ ret = acm_domain_setpolicy(policycmd_fd);
} else if (!strcmp(argv[1], "getpolicy")) {
if (argc != 2)
usage(argv[0]);
- acm_domain_getpolicy(policycmd_fd);
-
+ ret = acm_domain_getpolicy(policycmd_fd);
} else if (!strcmp(argv[1], "loadpolicy")) {
if (argc != 3)
usage(argv[0]);
- acm_domain_loadpolicy(policycmd_fd, argv[2]);
-
+ ret = acm_domain_loadpolicy(policycmd_fd, argv[2]);
} else if (!strcmp(argv[1], "dumpstats")) {
if (argc != 2)
usage(argv[0]);
- acm_domain_dumpstats(policycmd_fd);
-
+ ret = acm_domain_dumpstats(policycmd_fd);
} else
usage(argv[0]);
close(policycmd_fd);
- return 0;
+ return ret;
}
diff --git a/tools/python/xen/xm/create.py b/tools/python/xen/xm/create.py
index d2219f9668..f218ddd912 100644
--- a/tools/python/xen/xm/create.py
+++ b/tools/python/xen/xm/create.py
@@ -121,7 +121,7 @@ gopts.var('memory', val='MEMORY',
use="Domain memory in MB.")
gopts.var('ssidref', val='SSIDREF',
- fn=set_u32, default=0xffffffff,
+ fn=set_u32, default=-1,
use="Security Identifier.")
gopts.var('maxmem', val='MEMORY',
@@ -618,6 +618,7 @@ def main(argv):
config = opts.vals.config
else:
opts.load_defconfig()
+ opts.vals.vnc = not opts.vals.dryrun
preprocess(opts, opts.vals)
if not opts.getopt('name') and opts.getopt('defconfig'):
opts.setopt('name', os.path.basename(opts.getopt('defconfig')))
diff --git a/xen/arch/x86/dom0_ops.c b/xen/arch/x86/dom0_ops.c
index 2a269f11b6..fdefebd4b6 100644
--- a/xen/arch/x86/dom0_ops.c
+++ b/xen/arch/x86/dom0_ops.c
@@ -393,8 +393,11 @@ void arch_getdomaininfo_ctxt(
#ifdef __i386__
#ifdef CONFIG_VMX
- if ( VMX_DOMAIN(v) )
+ if ( VMX_DOMAIN(v) ) {
save_vmx_cpu_user_regs(&c->user_regs);
+ __vmread(CR0_READ_SHADOW, &c->cr0);
+ __vmread(CR4_READ_SHADOW, &c->cr4);
+ }
#endif
#endif
diff --git a/xen/arch/x86/mm.c b/xen/arch/x86/mm.c
index 0487da6a44..52b4048909 100644
--- a/xen/arch/x86/mm.c
+++ b/xen/arch/x86/mm.c
@@ -288,7 +288,7 @@ int map_ldt_shadow_page(unsigned int off)
struct domain *d = v->domain;
unsigned long gpfn, gmfn;
l1_pgentry_t l1e, nl1e;
- unsigned gva = v->arch.guest_context.ldt_base + (off << PAGE_SHIFT);
+ unsigned long gva = v->arch.guest_context.ldt_base + (off << PAGE_SHIFT);
int res;
#if defined(__x86_64__)
@@ -753,7 +753,7 @@ static inline int l1_backptr(
#else
# define create_pae_xen_mappings(pl3e) (1)
# define l1_backptr(bp,l2o,l2t) \
- ({ *(bp) = (l2o) << L2_PAGETABLE_SHIFT; 1; })
+ ({ *(bp) = (unsigned long)(l2o) << L2_PAGETABLE_SHIFT; 1; })
#endif
static int alloc_l2_table(struct pfn_info *page, unsigned int type)
@@ -821,7 +821,7 @@ static int alloc_l3_table(struct pfn_info *page)
pl3e = map_domain_page(pfn);
for ( i = 0; i < L3_PAGETABLE_ENTRIES; i++ )
{
- vaddr = i << L3_PAGETABLE_SHIFT;
+ vaddr = (unsigned long)i << L3_PAGETABLE_SHIFT;
if ( is_guest_l3_slot(i) &&
unlikely(!get_page_from_l3e(pl3e[i], pfn, d, vaddr)) )
goto fail;
diff --git a/xen/arch/x86/setup.c b/xen/arch/x86/setup.c
index bb0da8c42f..53690b8dbe 100644
--- a/xen/arch/x86/setup.c
+++ b/xen/arch/x86/setup.c
@@ -262,7 +262,8 @@ void __init __start_xen(multiboot_info_t *mbi)
/* Check that we have at least one Multiboot module. */
if ( !(mbi->flags & MBI_MODULES) || (mbi->mods_count == 0) )
{
- printk("FATAL ERROR: Require at least one Multiboot module.\n");
+ printk("FATAL ERROR: dom0 kernel not specified."
+ " Check bootloader configuration.\n");
EARLY_FAIL();
}
diff --git a/xen/arch/x86/vmx.c b/xen/arch/x86/vmx.c
index d4a16823eb..98ad41e5be 100644
--- a/xen/arch/x86/vmx.c
+++ b/xen/arch/x86/vmx.c
@@ -1009,8 +1009,23 @@ static inline void vmx_do_msr_read(struct cpu_user_regs *regs)
VMX_DBG_LOG(DBG_LEVEL_1, "vmx_do_msr_read: ecx=%lx, eax=%lx, edx=%lx",
(unsigned long)regs->ecx, (unsigned long)regs->eax,
(unsigned long)regs->edx);
-
- rdmsr(regs->ecx, regs->eax, regs->edx);
+ switch (regs->ecx) {
+ case MSR_IA32_SYSENTER_CS:
+ __vmread(GUEST_SYSENTER_CS, &regs->eax);
+ regs->edx = 0;
+ break;
+ case MSR_IA32_SYSENTER_ESP:
+ __vmread(GUEST_SYSENTER_ESP, &regs->eax);
+ regs->edx = 0;
+ break;
+ case MSR_IA32_SYSENTER_EIP:
+ __vmread(GUEST_SYSENTER_EIP, &regs->eax);
+ regs->edx = 0;
+ break;
+ default:
+ rdmsr(regs->ecx, regs->eax, regs->edx);
+ break;
+ }
VMX_DBG_LOG(DBG_LEVEL_1, "vmx_do_msr_read returns: "
"ecx=%lx, eax=%lx, edx=%lx",
@@ -1018,6 +1033,31 @@ static inline void vmx_do_msr_read(struct cpu_user_regs *regs)
(unsigned long)regs->edx);
}
+static inline void vmx_do_msr_write(struct cpu_user_regs *regs)
+{
+ VMX_DBG_LOG(DBG_LEVEL_1, "vmx_do_msr_write: ecx=%lx, eax=%lx, edx=%lx",
+ (unsigned long)regs->ecx, (unsigned long)regs->eax,
+ (unsigned long)regs->edx);
+ switch (regs->ecx) {
+ case MSR_IA32_SYSENTER_CS:
+ __vmwrite(GUEST_SYSENTER_CS, regs->eax);
+ break;
+ case MSR_IA32_SYSENTER_ESP:
+ __vmwrite(GUEST_SYSENTER_ESP, regs->eax);
+ break;
+ case MSR_IA32_SYSENTER_EIP:
+ __vmwrite(GUEST_SYSENTER_EIP, regs->eax);
+ break;
+ default:
+ break;
+ }
+
+ VMX_DBG_LOG(DBG_LEVEL_1, "vmx_do_msr_write returns: "
+ "ecx=%lx, eax=%lx, edx=%lx",
+ (unsigned long)regs->ecx, (unsigned long)regs->eax,
+ (unsigned long)regs->edx);
+}
+
/*
* Need to use this exit to reschedule
*/
@@ -1332,9 +1372,7 @@ asmlinkage void vmx_vmexit_handler(struct cpu_user_regs regs)
break;
case EXIT_REASON_MSR_WRITE:
__vmread(GUEST_RIP, &eip);
- VMX_DBG_LOG(DBG_LEVEL_1, "MSR_WRITE: eip=%lx, eax=%lx, edx=%lx",
- eip, (unsigned long)regs.eax, (unsigned long)regs.edx);
- /* just ignore this point */
+ vmx_do_msr_write(&regs);
__get_instruction_length(inst_len);
__update_guest_eip(inst_len);
break;
diff --git a/xen/arch/x86/vmx_intercept.c b/xen/arch/x86/vmx_intercept.c
index cd7e464904..e64b626d29 100644
--- a/xen/arch/x86/vmx_intercept.c
+++ b/xen/arch/x86/vmx_intercept.c
@@ -214,6 +214,14 @@ void vmx_hooks_assist(struct vcpu *d)
/* load init count*/
if (p->state == STATE_IORESP_HOOK) {
+ /* set up actimer, handle re-init */
+ if ( active_ac_timer(&(vpit->pit_timer)) ) {
+ VMX_DBG_LOG(DBG_LEVEL_1, "VMX_PIT: guest reset PIT with channel %lx!\n", (unsigned long) ((p->u.data >> 24) & 0x3) );
+ rem_ac_timer(&(vpit->pit_timer));
+ }
+ else
+ init_ac_timer(&vpit->pit_timer, pit_timer_fn, vpit, 0);
+
/* init count for this channel */
vpit->init_val = (p->u.data & 0xFFFF) ;
/* frequency(ms) of pit */
@@ -248,9 +256,7 @@ void vmx_hooks_assist(struct vcpu *d)
vpit->intr_bitmap = intr;
- /* set up the actimer */
- init_ac_timer(&vpit->pit_timer, pit_timer_fn, vpit, 0);
- pit_timer_fn(vpit); /* timer seed */
+ set_ac_timer(&vpit->pit_timer, NOW() + MILLISECS(vpit->period));
/*restore the state*/
p->state = STATE_IORESP_READY;
diff --git a/xen/arch/x86/vmx_io.c b/xen/arch/x86/vmx_io.c
index 370c3bb5db..905df193b4 100644
--- a/xen/arch/x86/vmx_io.c
+++ b/xen/arch/x86/vmx_io.c
@@ -86,7 +86,8 @@ static void set_reg_value (int size, int index, int seg, struct cpu_user_regs *r
regs->ebx |= ((value & 0xFF) << 8);
break;
default:
- printk("size:%x, index:%x are invalid!\n", size, index);
+ printk("Error: size:%x, index:%x are invalid!\n", size, index);
+ domain_crash_synchronous();
break;
}
@@ -127,7 +128,8 @@ static void set_reg_value (int size, int index, int seg, struct cpu_user_regs *r
regs->edi |= (value & 0xFFFF);
break;
default:
- printk("size:%x, index:%x are invalid!\n", size, index);
+ printk("Error: size:%x, index:%x are invalid!\n", size, index);
+ domain_crash_synchronous();
break;
}
break;
@@ -158,25 +160,150 @@ static void set_reg_value (int size, int index, int seg, struct cpu_user_regs *r
regs->edi = value;
break;
default:
- printk("size:%x, index:%x are invalid!\n", size, index);
+ printk("Error: size:%x, index:%x are invalid!\n", size, index);
+ domain_crash_synchronous();
break;
}
break;
default:
- printk("size:%x, index:%x are invalid!\n", size, index);
+ printk("Error: size:%x, index:%x are invalid!\n", size, index);
+ domain_crash_synchronous();
break;
}
}
#else
static void load_cpu_user_regs(struct cpu_user_regs *regs)
-{
- /* XXX: TBD */
- return;
+{
+ __vmwrite(GUEST_SS_SELECTOR, regs->ss);
+ __vmwrite(GUEST_RSP, regs->rsp);
+ __vmwrite(GUEST_RFLAGS, regs->rflags);
+ __vmwrite(GUEST_CS_SELECTOR, regs->cs);
+ __vmwrite(GUEST_RIP, regs->rip);
}
+
+static inline void __set_reg_value(long *reg, int size, long value)
+{
+ switch (size) {
+ case BYTE_64:
+ *reg &= ~0xFF;
+ *reg |= (value & 0xFF);
+ break;
+ case WORD:
+ *reg &= ~0xFFFF;
+ *reg |= (value & 0xFFFF);
+ break;
+
+ case LONG:
+ *reg &= ~0xFFFFFFFF;
+ *reg |= (value & 0xFFFFFFFF);
+ break;
+ case QUAD:
+ *reg = value;
+ break;
+ default:
+ printk("Error: <__set_reg_value> : Unknown size for register\n");
+ domain_crash_synchronous();
+ }
+}
+
static void set_reg_value (int size, int index, int seg, struct cpu_user_regs *regs, long value)
{
- /* XXX: TBD */
- return;
+ if (size == BYTE) {
+ switch (index) {
+ case 0:
+ regs->rax &= ~0xFF;
+ regs->rax |= (value & 0xFF);
+ break;
+ case 1:
+ regs->rcx &= ~0xFF;
+ regs->rcx |= (value & 0xFF);
+ break;
+ case 2:
+ regs->rdx &= ~0xFF;
+ regs->rdx |= (value & 0xFF);
+ break;
+ case 3:
+ regs->rbx &= ~0xFF;
+ regs->rbx |= (value & 0xFF);
+ break;
+ case 4:
+ regs->rax &= 0xFFFFFFFFFFFF00FF;
+ regs->rax |= ((value & 0xFF) << 8);
+ break;
+ case 5:
+ regs->rcx &= 0xFFFFFFFFFFFF00FF;
+ regs->rcx |= ((value & 0xFF) << 8);
+ break;
+ case 6:
+ regs->rdx &= 0xFFFFFFFFFFFF00FF;
+ regs->rdx |= ((value & 0xFF) << 8);
+ break;
+ case 7:
+ regs->rbx &= 0xFFFFFFFFFFFF00FF;
+ regs->rbx |= ((value & 0xFF) << 8);
+ break;
+ default:
+ printk("Error: size:%x, index:%x are invalid!\n", size, index);
+ domain_crash_synchronous();
+ break;
+ }
+
+ }
+
+ switch (index) {
+ case 0:
+ __set_reg_value(&regs->rax, size, value);
+ break;
+ case 1:
+ __set_reg_value(&regs->rcx, size, value);
+ break;
+ case 2:
+ __set_reg_value(&regs->rdx, size, value);
+ break;
+ case 3:
+ __set_reg_value(&regs->rbx, size, value);
+ break;
+ case 4:
+ __set_reg_value(&regs->rsp, size, value);
+ break;
+ case 5:
+ __set_reg_value(&regs->rbp, size, value);
+ break;
+ case 6:
+ __set_reg_value(&regs->rsi, size, value);
+ break;
+ case 7:
+ __set_reg_value(&regs->rdi, size, value);
+ break;
+ case 8:
+ __set_reg_value(&regs->r8, size, value);
+ break;
+ case 9:
+ __set_reg_value(&regs->r9, size, value);
+ break;
+ case 10:
+ __set_reg_value(&regs->r10, size, value);
+ break;
+ case 11:
+ __set_reg_value(&regs->r11, size, value);
+ break;
+ case 12:
+ __set_reg_value(&regs->r12, size, value);
+ break;
+ case 13:
+ __set_reg_value(&regs->r13, size, value);
+ break;
+ case 14:
+ __set_reg_value(&regs->r14, size, value);
+ break;
+ case 15:
+ __set_reg_value(&regs->r15, size, value);
+ break;
+ default:
+ printk("Error: <set_reg_value> Invalid index\n");
+ domain_crash_synchronous();
+ }
+ return;
}
#endif
@@ -269,7 +396,8 @@ void vmx_io_assist(struct vcpu *v)
regs->eax = (p->u.data & 0xffffffff);
break;
default:
- BUG();
+ printk("Error: %s unknwon port size\n", __FUNCTION__);
+ domain_crash_synchronous();
}
}
diff --git a/xen/arch/x86/vmx_platform.c b/xen/arch/x86/vmx_platform.c
index 339f4da59b..36a95f5117 100644
--- a/xen/arch/x86/vmx_platform.c
+++ b/xen/arch/x86/vmx_platform.c
@@ -41,12 +41,79 @@
#if defined (__x86_64__)
void store_cpu_user_regs(struct cpu_user_regs *regs)
{
+ __vmread(GUEST_SS_SELECTOR, &regs->ss);
+ __vmread(GUEST_RSP, &regs->rsp);
+ __vmread(GUEST_RFLAGS, &regs->rflags);
+ __vmread(GUEST_CS_SELECTOR, &regs->cs);
+ __vmread(GUEST_DS_SELECTOR, &regs->ds);
+ __vmread(GUEST_ES_SELECTOR, &regs->es);
+ __vmread(GUEST_RIP, &regs->rip);
+}
+static inline long __get_reg_value(unsigned long reg, int size)
+{
+ switch(size) {
+ case BYTE_64:
+ return (char)(reg & 0xFF);
+ case WORD:
+ return (short)(reg & 0xFFFF);
+ case LONG:
+ return (int)(reg & 0xFFFFFFFF);
+ case QUAD:
+ return (long)(reg);
+ default:
+ printk("Error: <__get_reg_value>Invalid reg size\n");
+ domain_crash_synchronous();
+ }
}
static long get_reg_value(int size, int index, int seg, struct cpu_user_regs *regs)
{
- return 0;
+ if (size == BYTE) {
+ switch (index) {
+ case 0: //%al
+ return (char)(regs->rax & 0xFF);
+ case 1: //%cl
+ return (char)(regs->rcx & 0xFF);
+ case 2: //%dl
+ return (char)(regs->rdx & 0xFF);
+ case 3: //%bl
+ return (char)(regs->rbx & 0xFF);
+ case 4: //%ah
+ return (char)((regs->rax & 0xFF00) >> 8);
+ case 5: //%ch
+ return (char)((regs->rcx & 0xFF00) >> 8);
+ case 6: //%dh
+ return (char)((regs->rdx & 0xFF00) >> 8);
+ case 7: //%bh
+ return (char)((regs->rbx & 0xFF00) >> 8);
+ default:
+ printk("Error: (get_reg_value)Invalid index value\n");
+ domain_crash_synchronous();
+ }
+
+ }
+ switch (index) {
+ case 0: return __get_reg_value(regs->rax, size);
+ case 1: return __get_reg_value(regs->rcx, size);
+ case 2: return __get_reg_value(regs->rdx, size);
+ case 3: return __get_reg_value(regs->rbx, size);
+ case 4: return __get_reg_value(regs->rsp, size);
+ case 5: return __get_reg_value(regs->rbp, size);
+ case 6: return __get_reg_value(regs->rsi, size);
+ case 7: return __get_reg_value(regs->rdi, size);
+ case 8: return __get_reg_value(regs->r8, size);
+ case 9: return __get_reg_value(regs->r9, size);
+ case 10: return __get_reg_value(regs->r10, size);
+ case 11: return __get_reg_value(regs->r11, size);
+ case 12: return __get_reg_value(regs->r12, size);
+ case 13: return __get_reg_value(regs->r13, size);
+ case 14: return __get_reg_value(regs->r14, size);
+ case 15: return __get_reg_value(regs->r15, size);
+ default:
+ printk("Error: (get_reg_value)Invalid index value\n");
+ domain_crash_synchronous();
+ }
}
#elif defined (__i386__)
void store_cpu_user_regs(struct cpu_user_regs *regs)
@@ -85,8 +152,8 @@ static long get_reg_value(int size, int index, int seg, struct cpu_user_regs *re
case 7: //%bh
return (char)((regs->ebx & 0xFF00) >> 8);
default:
- printk("(get_reg_value)size case 0 error\n");
- return -1;
+ printk("Error: (get_reg_value)size case 0 error\n");
+ domain_crash_synchronous();
}
case WORD:
switch (index) {
@@ -108,8 +175,8 @@ static long get_reg_value(int size, int index, int seg, struct cpu_user_regs *re
case 7: //%di
return (short)(regs->edi & 0xFFFF);
default:
- printk("(get_reg_value)size case 1 error\n");
- return -1;
+ printk("Error: (get_reg_value)size case 1 error\n");
+ domain_crash_synchronous();
}
case LONG:
switch (index) {
@@ -131,42 +198,47 @@ static long get_reg_value(int size, int index, int seg, struct cpu_user_regs *re
case 7: //%edi
return regs->edi;
default:
- printk("(get_reg_value)size case 2 error\n");
- return -1;
+ printk("Error: (get_reg_value)size case 2 error\n");
+ domain_crash_synchronous();
}
default:
- printk("(get_reg_value)size case error\n");
- return -1;
+ printk("Error: (get_reg_value)size case error\n");
+ domain_crash_synchronous();
}
}
#endif
-static inline unsigned char *check_prefix(unsigned char *inst, struct instruction *thread_inst)
+static inline const unsigned char *check_prefix(const unsigned char *inst, struct instruction *thread_inst, unsigned char *rex_p)
{
while (1) {
switch (*inst) {
+ /* rex prefix for em64t instructions*/
+ case 0x40 ... 0x4e:
+ *rex_p = *inst;
+ break;
+
case 0xf3: //REPZ
- thread_inst->flags = REPZ;
- break;
+ thread_inst->flags = REPZ;
+ break;
case 0xf2: //REPNZ
- thread_inst->flags = REPNZ;
- break;
+ thread_inst->flags = REPNZ;
+ break;
case 0xf0: //LOCK
- break;
+ break;
case 0x2e: //CS
case 0x36: //SS
case 0x3e: //DS
case 0x26: //ES
case 0x64: //FS
case 0x65: //GS
- thread_inst->seg_sel = *inst;
+ thread_inst->seg_sel = *inst;
break;
case 0x66: //32bit->16bit
thread_inst->op_size = WORD;
break;
case 0x67:
- printf("Not handling 0x67 (yet)\n");
- domain_crash_synchronous();
+ printf("Error: Not handling 0x67 (yet)\n");
+ domain_crash_synchronous();
break;
default:
return inst;
@@ -193,9 +265,9 @@ static inline unsigned long get_immediate(int op16, const unsigned char *inst, i
switch(mod) {
case 0:
if (rm == 5) {
- if (op16)
+ if (op16)
inst = inst + 2; //disp16, skip 2 bytes
- else
+ else
inst = inst + 4; //disp32, skip 4 bytes
}
break;
@@ -203,44 +275,81 @@ static inline unsigned long get_immediate(int op16, const unsigned char *inst, i
inst++; //disp8, skip 1 byte
break;
case 2:
- if (op16)
+ if (op16)
inst = inst + 2; //disp16, skip 2 bytes
- else
+ else
inst = inst + 4; //disp32, skip 4 bytes
break;
}
+
+ if (op_size == QUAD)
+ op_size = LONG;
+
for (i = 0; i < op_size; i++) {
val |= (*inst++ & 0xff) << (8 * i);
}
-
+
return val;
}
-static inline int get_index(const unsigned char *inst)
+static inline int get_index(const unsigned char *inst, unsigned char rex)
{
int mod, reg, rm;
+ int rex_r, rex_b;
mod = (*inst >> 6) & 3;
reg = (*inst >> 3) & 7;
rm = *inst & 7;
+ rex_r = (rex >> 2) & 1;
+ rex_b = rex & 1;
+
//Only one operand in the instruction is register
if (mod == 3) {
- return rm;
+ return (rm + (rex_b << 3));
} else {
- return reg;
+ return (reg + (rex_r << 3));
}
return 0;
}
+static void init_instruction(struct instruction *mmio_inst)
+{
+ memset(mmio_inst->i_name, '0', I_NAME_LEN);
+ mmio_inst->op_size = 0;
+ mmio_inst->offset = 0;
+ mmio_inst->immediate = 0;
+ mmio_inst->seg_sel = 0;
+ mmio_inst->op_num = 0;
+
+ mmio_inst->operand[0] = 0;
+ mmio_inst->operand[1] = 0;
+ mmio_inst->operand[2] = 0;
+
+ mmio_inst->flags = 0;
+}
+
+#define GET_OP_SIZE_FOR_BYTE(op_size) \
+ do {if (rex) op_size = BYTE_64;else op_size = BYTE;} while(0)
+
+#define GET_OP_SIZE_FOR_NONEBYTE(op_size) \
+ do {if (rex & 0x8) op_size = QUAD; else if (op_size != WORD) op_size = LONG;} while(0)
+
static int vmx_decode(const unsigned char *inst, struct instruction *thread_inst)
{
unsigned long eflags;
int index, vm86 = 0;
+ unsigned char rex = 0;
+ unsigned char tmp_size = 0;
+
+
+ init_instruction(thread_inst);
+
+ inst = check_prefix(inst, thread_inst, &rex);
__vmread(GUEST_RFLAGS, &eflags);
if (eflags & X86_EFLAGS_VM)
- vm86 = 1;
+ vm86 = 1;
if (vm86) { /* meaning is reversed */
if (thread_inst->op_size == WORD)
@@ -255,34 +364,30 @@ static int vmx_decode(const unsigned char *inst, struct instruction *thread_inst
case 0x88:
/* mov r8 to m8 */
thread_inst->op_size = BYTE;
- index = get_index((inst + 1));
- thread_inst->operand[0] = mk_operand(BYTE, index, 0, REGISTER);
+ index = get_index((inst + 1), rex);
+ GET_OP_SIZE_FOR_BYTE(tmp_size);
+ thread_inst->operand[0] = mk_operand(tmp_size, index, 0, REGISTER);
+
break;
case 0x89:
/* mov r32/16 to m32/16 */
- index = get_index((inst + 1));
- if (thread_inst->op_size == WORD) {
- thread_inst->operand[0] = mk_operand(WORD, index, 0, REGISTER);
- } else {
- thread_inst->op_size = LONG;
- thread_inst->operand[0] = mk_operand(LONG, index, 0, REGISTER);
- }
+ index = get_index((inst + 1), rex);
+ GET_OP_SIZE_FOR_NONEBYTE(thread_inst->op_size);
+ thread_inst->operand[0] = mk_operand(thread_inst->op_size, index, 0, REGISTER);
+
break;
case 0x8a:
/* mov m8 to r8 */
thread_inst->op_size = BYTE;
- index = get_index((inst + 1));
- thread_inst->operand[1] = mk_operand(BYTE, index, 0, REGISTER);
+ index = get_index((inst + 1), rex);
+ GET_OP_SIZE_FOR_BYTE(tmp_size);
+ thread_inst->operand[1] = mk_operand(tmp_size, index, 0, REGISTER);
break;
case 0x8b:
/* mov r32/16 to m32/16 */
- index = get_index((inst + 1));
- if (thread_inst->op_size == WORD) {
- thread_inst->operand[1] = mk_operand(WORD, index, 0, REGISTER);
- } else {
- thread_inst->op_size = LONG;
- thread_inst->operand[1] = mk_operand(LONG, index, 0, REGISTER);
- }
+ index = get_index((inst + 1), rex);
+ GET_OP_SIZE_FOR_NONEBYTE(thread_inst->op_size);
+ thread_inst->operand[1] = mk_operand(thread_inst->op_size, index, 0, REGISTER);
break;
case 0x8c:
case 0x8e:
@@ -292,30 +397,25 @@ static int vmx_decode(const unsigned char *inst, struct instruction *thread_inst
case 0xa0:
/* mov byte to al */
thread_inst->op_size = BYTE;
- thread_inst->operand[1] = mk_operand(BYTE, 0, 0, REGISTER);
+ GET_OP_SIZE_FOR_BYTE(tmp_size);
+ thread_inst->operand[1] = mk_operand(tmp_size, 0, 0, REGISTER);
break;
case 0xa1:
/* mov word/doubleword to ax/eax */
- if (thread_inst->op_size == WORD) {
- thread_inst->operand[1] = mk_operand(WORD, 0, 0, REGISTER);
- } else {
- thread_inst->op_size = LONG;
- thread_inst->operand[1] = mk_operand(LONG, 0, 0, REGISTER);
- }
+ GET_OP_SIZE_FOR_NONEBYTE(thread_inst->op_size);
+ thread_inst->operand[1] = mk_operand(thread_inst->op_size, 0, 0, REGISTER);
+
break;
case 0xa2:
/* mov al to (seg:offset) */
thread_inst->op_size = BYTE;
- thread_inst->operand[0] = mk_operand(BYTE, 0, 0, REGISTER);
+ GET_OP_SIZE_FOR_BYTE(tmp_size);
+ thread_inst->operand[0] = mk_operand(tmp_size, 0, 0, REGISTER);
break;
case 0xa3:
/* mov ax/eax to (seg:offset) */
- if (thread_inst->op_size == WORD) {
- thread_inst->operand[0] = mk_operand(WORD, 0, 0, REGISTER);
- } else {
- thread_inst->op_size = LONG;
- thread_inst->operand[0] = mk_operand(LONG, 0, 0, REGISTER);
- }
+ GET_OP_SIZE_FOR_NONEBYTE(thread_inst->op_size);
+ thread_inst->operand[0] = mk_operand(thread_inst->op_size, 0, 0, REGISTER);
break;
case 0xa4:
/* movsb */
@@ -324,11 +424,8 @@ static int vmx_decode(const unsigned char *inst, struct instruction *thread_inst
return DECODE_success;
case 0xa5:
/* movsw/movsl */
- if (thread_inst->op_size == WORD) {
- } else {
- thread_inst->op_size = LONG;
- }
- strcpy((char *)thread_inst->i_name, "movs");
+ GET_OP_SIZE_FOR_NONEBYTE(thread_inst->op_size);
+ strcpy((char *)thread_inst->i_name, "movs");
return DECODE_success;
case 0xaa:
/* stosb */
@@ -353,14 +450,10 @@ static int vmx_decode(const unsigned char *inst, struct instruction *thread_inst
break;
case 0xc7:
/* mov imm16/32 to m16/32 */
- if (thread_inst->op_size == WORD) {
- thread_inst->operand[0] = mk_operand(WORD, 0, 0, IMMEDIATE);
- } else {
- thread_inst->op_size = LONG;
- thread_inst->operand[0] = mk_operand(LONG, 0, 0, IMMEDIATE);
- }
- thread_inst->immediate = get_immediate(vm86,
- (inst+1), thread_inst->op_size);
+ GET_OP_SIZE_FOR_NONEBYTE(thread_inst->op_size);
+ thread_inst->operand[0] = mk_operand(thread_inst->op_size, 0, 0, IMMEDIATE);
+ thread_inst->immediate = get_immediate(vm86, (inst+1), thread_inst->op_size);
+
break;
case 0x0f:
break;
@@ -379,21 +472,23 @@ static int vmx_decode(const unsigned char *inst, struct instruction *thread_inst
/* movz */
case 0xb6:
- index = get_index((inst + 1));
- if (thread_inst->op_size == WORD) {
- thread_inst->operand[1] = mk_operand(WORD, index, 0, REGISTER);
- } else {
- thread_inst->operand[1] = mk_operand(LONG, index, 0, REGISTER);
-
- }
+ index = get_index((inst + 1), rex);
+ GET_OP_SIZE_FOR_NONEBYTE(thread_inst->op_size);
+ thread_inst->operand[1] = mk_operand(thread_inst->op_size, index, 0, REGISTER);
thread_inst->op_size = BYTE;
strcpy((char *)thread_inst->i_name, "movzb");
return DECODE_success;
case 0xb7:
- thread_inst->op_size = WORD;
- index = get_index((inst + 1));
- thread_inst->operand[1] = mk_operand(LONG, index, 0, REGISTER);
+ index = get_index((inst + 1), rex);
+ if (rex & 0x8) {
+ thread_inst->op_size = LONG;
+ thread_inst->operand[1] = mk_operand(QUAD, index, 0, REGISTER);
+ } else {
+ thread_inst->op_size = WORD;
+ thread_inst->operand[1] = mk_operand(LONG, index, 0, REGISTER);
+ }
+
strcpy((char *)thread_inst->i_name, "movzw");
return DECODE_success;
@@ -406,8 +501,7 @@ static int vmx_decode(const unsigned char *inst, struct instruction *thread_inst
return DECODE_failure;
}
-int inst_copy_from_guest(unsigned char *buf, unsigned long guest_eip,
- int inst_len)
+int inst_copy_from_guest(unsigned char *buf, unsigned long guest_eip, int inst_len)
{
l1_pgentry_t gpte;
unsigned long mfn;
@@ -449,22 +543,6 @@ int inst_copy_from_guest(unsigned char *buf, unsigned long guest_eip,
return inst_len+remaining;
}
-static void init_instruction(struct instruction *mmio_inst)
-{
- memset(mmio_inst->i_name, '0', I_NAME_LEN);
- mmio_inst->op_size = 0;
- mmio_inst->offset = 0;
- mmio_inst->immediate = 0;
- mmio_inst->seg_sel = 0;
- mmio_inst->op_num = 0;
-
- mmio_inst->operand[0] = 0;
- mmio_inst->operand[1] = 0;
- mmio_inst->operand[2] = 0;
-
- mmio_inst->flags = 0;
-}
-
static int read_from_mmio(struct instruction *inst_p)
{
// Only for mov instruction now!!!
@@ -570,17 +648,10 @@ void handle_mmio(unsigned long va, unsigned long gpa)
domain_crash_synchronous();
}
-#if 0
- printk("handle_mmio: cs:eip 0x%lx:0x%lx(0x%lx): opcode",
- cs, eip, inst_addr, inst_len);
- for (ret = 0; ret < inst_len; ret++)
- printk(" %02x", inst[ret]);
- printk("\n");
-#endif
init_instruction(&mmio_inst);
- if (vmx_decode(check_prefix(inst, &mmio_inst), &mmio_inst) == DECODE_failure)
+ if (vmx_decode(inst, &mmio_inst) == DECODE_failure)
domain_crash_synchronous();
__vmwrite(GUEST_RIP, eip + inst_len);
@@ -654,8 +725,8 @@ void handle_mmio(unsigned long va, unsigned long gpa)
}
if (!strncmp((char *)mmio_inst.i_name, "stos", 4)) {
- send_mmio_req(gpa, &mmio_inst,
- inst_decoder_regs->eax, IOREQ_WRITE, 0);
+ send_mmio_req(gpa, &mmio_inst,
+ inst_decoder_regs->eax, IOREQ_WRITE, 0);
}
domain_crash_synchronous();
diff --git a/xen/include/asm-x86/ldt.h b/xen/include/asm-x86/ldt.h
index 8288ffa118..107e67720b 100644
--- a/xen/include/asm-x86/ldt.h
+++ b/xen/include/asm-x86/ldt.h
@@ -18,9 +18,7 @@ static inline void load_LDT(struct vcpu *v)
{
cpu = smp_processor_id();
desc = gdt_table + __LDT(cpu) - FIRST_RESERVED_GDT_ENTRY;
- desc->a = ((LDT_VIRT_START(v)&0xffff)<<16) | (ents*8-1);
- desc->b = (LDT_VIRT_START(v)&(0xff<<24)) | 0x8200 |
- ((LDT_VIRT_START(v)&0xff0000)>>16);
+ _set_tssldt_desc(desc, LDT_VIRT_START(v), ents*8-1, 2);
__asm__ __volatile__ ( "lldt %%ax" : : "a" (__LDT(cpu)<<3) );
}
}
diff --git a/xen/include/asm-x86/page.h b/xen/include/asm-x86/page.h
index 29b9f518a8..7732648f37 100644
--- a/xen/include/asm-x86/page.h
+++ b/xen/include/asm-x86/page.h
@@ -6,7 +6,11 @@
* It is important that the masks are signed quantities. This ensures that
* the compiler sign-extends a 32-bit mask to 64 bits if that is required.
*/
+#ifndef __ASSEMBLY__
+#define PAGE_SIZE (1L << PAGE_SHIFT)
+#else
#define PAGE_SIZE (1 << PAGE_SHIFT)
+#endif
#define PAGE_MASK (~(PAGE_SIZE-1))
#define PAGE_FLAG_MASK (~0)
diff --git a/xen/include/asm-x86/vmx_platform.h b/xen/include/asm-x86/vmx_platform.h
index 2382ebbc7a..2b2682394c 100644
--- a/xen/include/asm-x86/vmx_platform.h
+++ b/xen/include/asm-x86/vmx_platform.h
@@ -40,6 +40,7 @@
#define WORD 2
#define LONG 4
#define QUAD 8
+#define BYTE_64 16
//For instruction.operand[].flag
#define REGISTER 0x1
diff --git a/xen/include/public/arch-x86_32.h b/xen/include/public/arch-x86_32.h
index 1a11a3be86..44bc8dd46f 100644
--- a/xen/include/public/arch-x86_32.h
+++ b/xen/include/public/arch-x86_32.h
@@ -137,6 +137,8 @@ typedef struct vcpu_guest_context {
unsigned long gdt_frames[16], gdt_ents; /* GDT (machine frames, # ents) */
unsigned long kernel_ss, kernel_sp; /* Virtual TSS (only SS1/SP1) */
unsigned long pt_base; /* CR3 (pagetable base) */
+ unsigned long cr0; /* CR0 */
+ unsigned long cr4; /* CR4 */
unsigned long debugreg[8]; /* DB0-DB7 (debug registers) */
unsigned long event_callback_cs; /* CS:EIP of event callback */
unsigned long event_callback_eip;