aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorcl349@firebug.cl.cam.ac.uk <cl349@firebug.cl.cam.ac.uk>2006-05-04 17:38:25 +0100
committercl349@firebug.cl.cam.ac.uk <cl349@firebug.cl.cam.ac.uk>2006-05-04 17:38:25 +0100
commit44cf443c793f6ab6c26b920da590a178c65a2712 (patch)
tree2248f6d0afdb26e8bf9c5835b402befc3c6dbfa4
parentf6238c87af2ea3b1307f426c7bf6b92925c7d9fd (diff)
downloadxen-44cf443c793f6ab6c26b920da590a178c65a2712.tar.gz
xen-44cf443c793f6ab6c26b920da590a178c65a2712.tar.bz2
xen-44cf443c793f6ab6c26b920da590a178c65a2712.zip
Linux: upgrade to 2.6.16.13.
Signed-off-by: Christian Limpach <Christian.Limpach@cl.cam.ac.uk>
-rw-r--r--buildconfigs/mk.linux-2.6-xen2
-rw-r--r--linux-2.6-xen-sparse/arch/i386/kernel/vm86.c12
-rw-r--r--linux-2.6-xen-sparse/arch/x86_64/ia32/Makefile6
-rw-r--r--linux-2.6-xen-sparse/arch/x86_64/kernel/entry-xen.S28
-rw-r--r--linux-2.6-xen-sparse/arch/x86_64/kernel/process-xen.c4
-rw-r--r--linux-2.6-xen-sparse/arch/x86_64/kernel/setup-xen.c4
-rw-r--r--linux-2.6-xen-sparse/drivers/char/tty_io.c8
-rw-r--r--linux-2.6-xen-sparse/include/asm-i386/mach-xen/asm/pgtable-2level.h3
-rw-r--r--linux-2.6-xen-sparse/include/asm-i386/mach-xen/asm/pgtable-3level.h20
-rw-r--r--linux-2.6-xen-sparse/include/asm-i386/mach-xen/asm/pgtable.h13
-rw-r--r--linux-2.6-xen-sparse/include/linux/mm.h5
-rw-r--r--linux-2.6-xen-sparse/mm/page_alloc.c31
-rw-r--r--linux-2.6-xen-sparse/net/core/dev.c2
13 files changed, 83 insertions, 55 deletions
diff --git a/buildconfigs/mk.linux-2.6-xen b/buildconfigs/mk.linux-2.6-xen
index d784d175e0..04070337f1 100644
--- a/buildconfigs/mk.linux-2.6-xen
+++ b/buildconfigs/mk.linux-2.6-xen
@@ -1,5 +1,5 @@
LINUX_SERIES = 2.6
-LINUX_VER = 2.6.16
+LINUX_VER = 2.6.16.13
EXTRAVERSION ?= xen
diff --git a/linux-2.6-xen-sparse/arch/i386/kernel/vm86.c b/linux-2.6-xen-sparse/arch/i386/kernel/vm86.c
index da2d48e178..c1e240926d 100644
--- a/linux-2.6-xen-sparse/arch/i386/kernel/vm86.c
+++ b/linux-2.6-xen-sparse/arch/i386/kernel/vm86.c
@@ -43,6 +43,7 @@
#include <linux/smp_lock.h>
#include <linux/highmem.h>
#include <linux/ptrace.h>
+#include <linux/audit.h>
#include <asm/uaccess.h>
#include <asm/io.h>
@@ -258,6 +259,7 @@ static void do_sys_vm86(struct kernel_vm86_struct *info, struct task_struct *tsk
#ifndef CONFIG_X86_NO_TSS
struct tss_struct *tss;
#endif
+ long eax;
/*
* make sure the vm86() system call doesn't try to do anything silly
*/
@@ -313,13 +315,19 @@ static void do_sys_vm86(struct kernel_vm86_struct *info, struct task_struct *tsk
tsk->thread.screen_bitmap = info->screen_bitmap;
if (info->flags & VM86_SCREEN_BITMAP)
mark_screen_rdonly(tsk->mm);
+ __asm__ __volatile__("xorl %eax,%eax; movl %eax,%fs; movl %eax,%gs\n\t");
+ __asm__ __volatile__("movl %%eax, %0\n" :"=r"(eax));
+
+ /*call audit_syscall_exit since we do not exit via the normal paths */
+ if (unlikely(current->audit_context))
+ audit_syscall_exit(current, AUDITSC_RESULT(eax), eax);
+
__asm__ __volatile__(
- "xorl %%eax,%%eax; movl %%eax,%%fs; movl %%eax,%%gs\n\t"
"movl %0,%%esp\n\t"
"movl %1,%%ebp\n\t"
"jmp resume_userspace"
: /* no outputs */
- :"r" (&info->regs), "r" (task_thread_info(tsk)) : "ax");
+ :"r" (&info->regs), "r" (task_thread_info(tsk)));
/* we never return here */
}
diff --git a/linux-2.6-xen-sparse/arch/x86_64/ia32/Makefile b/linux-2.6-xen-sparse/arch/x86_64/ia32/Makefile
index 6dcd7cf5ac..a84151e4ca 100644
--- a/linux-2.6-xen-sparse/arch/x86_64/ia32/Makefile
+++ b/linux-2.6-xen-sparse/arch/x86_64/ia32/Makefile
@@ -28,11 +28,11 @@ $(obj)/vsyscall-sysenter.so $(obj)/vsyscall-syscall.so: \
$(obj)/vsyscall-%.so: $(src)/vsyscall.lds $(obj)/vsyscall-%.o FORCE
$(call if_changed,syscall)
-AFLAGS_vsyscall-sysenter.o = -m32 -Iarch/i386/kernel
-AFLAGS_vsyscall-syscall.o = -m32 -Iarch/i386/kernel
+AFLAGS_vsyscall-sysenter.o = -m32 -Wa,-32 -Iarch/i386/kernel
+AFLAGS_vsyscall-syscall.o = -m32 -Wa,-32 -Iarch/i386/kernel
ifdef CONFIG_XEN
-AFLAGS_vsyscall-int80.o = -m32 -Iarch/i386/kernel
+AFLAGS_vsyscall-int80.o = -m32 -Wa,-32 -Iarch/i386/kernel
CFLAGS_syscall32-xen.o += -DUSE_INT80
AFLAGS_syscall32_syscall-xen.o += -DUSE_INT80
diff --git a/linux-2.6-xen-sparse/arch/x86_64/kernel/entry-xen.S b/linux-2.6-xen-sparse/arch/x86_64/kernel/entry-xen.S
index 2930d58da2..05e7ce0b7e 100644
--- a/linux-2.6-xen-sparse/arch/x86_64/kernel/entry-xen.S
+++ b/linux-2.6-xen-sparse/arch/x86_64/kernel/entry-xen.S
@@ -221,6 +221,10 @@ rff_trace:
*
* XXX if we had a free scratch register we could save the RSP into the stack frame
* and report it properly in ps. Unfortunately we haven't.
+ *
+ * When user can change the frames always force IRET. That is because
+ * it deals with uncanonical addresses better. SYSRET has trouble
+ * with them due to bugs in both AMD and Intel CPUs.
*/
ENTRY(system_call)
@@ -289,7 +293,10 @@ sysret_signal:
xorl %esi,%esi # oldset -> arg2
call ptregscall_common
1: movl $_TIF_NEED_RESCHED,%edi
- jmp sysret_check
+ /* Use IRET because user could have changed frame. This
+ works because ptregscall_common has called FIXUP_TOP_OF_STACK. */
+ cli
+ jmp int_with_check
badsys:
movq $-ENOSYS,RAX-ARGOFFSET(%rsp)
@@ -315,7 +322,8 @@ tracesys:
call syscall_trace_leave
RESTORE_TOP_OF_STACK %rbx
RESTORE_REST
- jmp ret_from_sys_call
+ /* Use IRET because user could have changed frame */
+ jmp int_ret_from_sys_call
CFI_ENDPROC
/*
@@ -449,25 +457,9 @@ ENTRY(stub_execve)
CFI_ADJUST_CFA_OFFSET -8
CFI_REGISTER rip, r11
SAVE_REST
- movq %r11, %r15
- CFI_REGISTER rip, r15
FIXUP_TOP_OF_STACK %r11
call sys_execve
- GET_THREAD_INFO(%rcx)
- bt $TIF_IA32,threadinfo_flags(%rcx)
- CFI_REMEMBER_STATE
- jc exec_32bit
RESTORE_TOP_OF_STACK %r11
- movq %r15, %r11
- CFI_REGISTER rip, r11
- RESTORE_REST
- pushq %r11
- CFI_ADJUST_CFA_OFFSET 8
- CFI_REL_OFFSET rip, 0
- ret
-
-exec_32bit:
- CFI_RESTORE_STATE
movq %rax,RAX(%rsp)
RESTORE_REST
jmp int_ret_from_sys_call
diff --git a/linux-2.6-xen-sparse/arch/x86_64/kernel/process-xen.c b/linux-2.6-xen-sparse/arch/x86_64/kernel/process-xen.c
index c761d703ed..6e4e62e86c 100644
--- a/linux-2.6-xen-sparse/arch/x86_64/kernel/process-xen.c
+++ b/linux-2.6-xen-sparse/arch/x86_64/kernel/process-xen.c
@@ -484,6 +484,10 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p)
* This is basically '__unlazy_fpu', except that we queue a
* multicall to indicate FPU task switch, rather than
* synchronously trapping to Xen.
+ * This must be here to ensure both math_state_restore() and
+ * kernel_fpu_begin() work consistently.
+ * The AMD workaround requires it to be after DS reload, or
+ * after DS has been cleared, which we do in __prepare_arch_switch.
*/
if (prev_p->thread_info->status & TS_USEDFPU) {
__save_init_fpu(prev_p); /* _not_ save_init_fpu() */
diff --git a/linux-2.6-xen-sparse/arch/x86_64/kernel/setup-xen.c b/linux-2.6-xen-sparse/arch/x86_64/kernel/setup-xen.c
index df9c1ddbf6..b6ed190a2d 100644
--- a/linux-2.6-xen-sparse/arch/x86_64/kernel/setup-xen.c
+++ b/linux-2.6-xen-sparse/arch/x86_64/kernel/setup-xen.c
@@ -1157,6 +1157,10 @@ static int __init init_amd(struct cpuinfo_x86 *c)
if (c->x86 == 15 && ((level >= 0x0f48 && level < 0x0f50) || level >= 0x0f58))
set_bit(X86_FEATURE_REP_GOOD, &c->x86_capability);
+ /* Enable workaround for FXSAVE leak */
+ if (c->x86 >= 6)
+ set_bit(X86_FEATURE_FXSAVE_LEAK, &c->x86_capability);
+
r = get_model_name(c);
if (!r) {
switch (c->x86) {
diff --git a/linux-2.6-xen-sparse/drivers/char/tty_io.c b/linux-2.6-xen-sparse/drivers/char/tty_io.c
index 752834fd13..f6f0689771 100644
--- a/linux-2.6-xen-sparse/drivers/char/tty_io.c
+++ b/linux-2.6-xen-sparse/drivers/char/tty_io.c
@@ -2708,7 +2708,11 @@ static void __do_SAK(void *arg)
}
task_lock(p);
if (p->files) {
- rcu_read_lock();
+ /*
+ * We don't take a ref to the file, so we must
+ * hold ->file_lock instead.
+ */
+ spin_lock(&p->files->file_lock);
fdt = files_fdtable(p->files);
for (i=0; i < fdt->max_fds; i++) {
filp = fcheck_files(p->files, i);
@@ -2723,7 +2727,7 @@ static void __do_SAK(void *arg)
break;
}
}
- rcu_read_unlock();
+ spin_unlock(&p->files->file_lock);
}
task_unlock(p);
} while_each_task_pid(session, PIDTYPE_SID, p);
diff --git a/linux-2.6-xen-sparse/include/asm-i386/mach-xen/asm/pgtable-2level.h b/linux-2.6-xen-sparse/include/asm-i386/mach-xen/asm/pgtable-2level.h
index d883e06607..00fd80db97 100644
--- a/linux-2.6-xen-sparse/include/asm-i386/mach-xen/asm/pgtable-2level.h
+++ b/linux-2.6-xen-sparse/include/asm-i386/mach-xen/asm/pgtable-2level.h
@@ -33,6 +33,9 @@
#define set_pmd(pmdptr, pmdval) xen_l2_entry_update((pmdptr), (pmdval))
+#define pte_clear(mm,addr,xp) do { set_pte_at(mm, addr, xp, __pte(0)); } while (0)
+#define pmd_clear(xp) do { set_pmd(xp, __pmd(0)); } while (0)
+
#define ptep_get_and_clear(mm,addr,xp) __pte_ma(xchg(&(xp)->pte_low, 0))
#define pte_same(a, b) ((a).pte_low == (b).pte_low)
#define pte_mfn(_pte) ((_pte).pte_low >> PAGE_SHIFT)
diff --git a/linux-2.6-xen-sparse/include/asm-i386/mach-xen/asm/pgtable-3level.h b/linux-2.6-xen-sparse/include/asm-i386/mach-xen/asm/pgtable-3level.h
index 9b2d6c547f..528cc0478f 100644
--- a/linux-2.6-xen-sparse/include/asm-i386/mach-xen/asm/pgtable-3level.h
+++ b/linux-2.6-xen-sparse/include/asm-i386/mach-xen/asm/pgtable-3level.h
@@ -107,6 +107,26 @@ static inline void pud_clear (pud_t * pud) { }
#define pmd_offset(pud, address) ((pmd_t *) pud_page(*(pud)) + \
pmd_index(address))
+/*
+ * For PTEs and PDEs, we must clear the P-bit first when clearing a page table
+ * entry, so clear the bottom half first and enforce ordering with a compiler
+ * barrier.
+ */
+static inline void pte_clear(struct mm_struct *mm, unsigned long addr, pte_t *ptep)
+{
+ ptep->pte_low = 0;
+ smp_wmb();
+ ptep->pte_high = 0;
+}
+
+static inline void pmd_clear(pmd_t *pmd)
+{
+ u32 *tmp = (u32 *)pmd;
+ *tmp = 0;
+ smp_wmb();
+ *(tmp + 1) = 0;
+}
+
static inline pte_t ptep_get_and_clear(struct mm_struct *mm, unsigned long addr, pte_t *ptep)
{
pte_t res;
diff --git a/linux-2.6-xen-sparse/include/asm-i386/mach-xen/asm/pgtable.h b/linux-2.6-xen-sparse/include/asm-i386/mach-xen/asm/pgtable.h
index 224c1032c7..6bed6669c5 100644
--- a/linux-2.6-xen-sparse/include/asm-i386/mach-xen/asm/pgtable.h
+++ b/linux-2.6-xen-sparse/include/asm-i386/mach-xen/asm/pgtable.h
@@ -205,14 +205,12 @@ extern unsigned long long __PAGE_KERNEL, __PAGE_KERNEL_EXEC;
extern unsigned long pg0[];
#define pte_present(x) ((x).pte_low & (_PAGE_PRESENT | _PAGE_PROTNONE))
-#define pte_clear(mm,addr,xp) do { set_pte_at(mm, addr, xp, __pte(0)); } while (0)
/* To avoid harmful races, pmd_none(x) should check only the lower when PAE */
#define pmd_none(x) (!(unsigned long)pmd_val(x))
/* pmd_present doesn't just test the _PAGE_PRESENT bit since wr.p.t.
can temporarily clear it. */
#define pmd_present(x) (pmd_val(x))
-#define pmd_clear(xp) do { set_pmd(xp, __pmd(0)); } while (0)
#define pmd_bad(x) ((pmd_val(x) & (~PAGE_MASK & ~_PAGE_USER & ~_PAGE_PRESENT)) != (_KERNPG_TABLE & ~_PAGE_PRESENT))
@@ -272,16 +270,7 @@ static inline pte_t ptep_get_and_clear_full(struct mm_struct *mm, unsigned long
pte_t pte;
if (full) {
pte = *ptep;
-#ifdef CONFIG_X86_PAE
- /* Cannot do this in a single step, as the compiler may
- issue the two stores in either order, but the hypervisor
- must not see the high part before the low one. */
- ptep->pte_low = 0;
- barrier();
- ptep->pte_high = 0;
-#else
- *ptep = __pte(0);
-#endif
+ pte_clear(mm, addr, ptep);
} else {
pte = ptep_get_and_clear(mm, addr, ptep);
}
diff --git a/linux-2.6-xen-sparse/include/linux/mm.h b/linux-2.6-xen-sparse/include/linux/mm.h
index 338f482358..778439edc7 100644
--- a/linux-2.6-xen-sparse/include/linux/mm.h
+++ b/linux-2.6-xen-sparse/include/linux/mm.h
@@ -232,10 +232,9 @@ struct page {
unsigned long private; /* Mapping-private opaque data:
* usually used for buffer_heads
* if PagePrivate set; used for
- * swp_entry_t if PageSwapCache.
- * When page is free, this
+ * swp_entry_t if PageSwapCache;
* indicates order in the buddy
- * system.
+ * system if PG_buddy is set.
*/
struct address_space *mapping; /* If low bit clear, points to
* inode address_space, or NULL.
diff --git a/linux-2.6-xen-sparse/mm/page_alloc.c b/linux-2.6-xen-sparse/mm/page_alloc.c
index 3ef836209d..c0f3c60537 100644
--- a/linux-2.6-xen-sparse/mm/page_alloc.c
+++ b/linux-2.6-xen-sparse/mm/page_alloc.c
@@ -153,7 +153,8 @@ static void bad_page(struct page *page)
1 << PG_reclaim |
1 << PG_slab |
1 << PG_swapcache |
- 1 << PG_writeback );
+ 1 << PG_writeback |
+ 1 << PG_buddy );
set_page_count(page, 0);
reset_page_mapcount(page);
page->mapping = NULL;
@@ -224,12 +225,12 @@ static inline unsigned long page_order(struct page *page) {
static inline void set_page_order(struct page *page, int order) {
set_page_private(page, order);
- __SetPagePrivate(page);
+ __SetPageBuddy(page);
}
static inline void rmv_page_order(struct page *page)
{
- __ClearPagePrivate(page);
+ __ClearPageBuddy(page);
set_page_private(page, 0);
}
@@ -268,11 +269,13 @@ __find_combined_index(unsigned long page_idx, unsigned int order)
* This function checks whether a page is free && is the buddy
* we can do coalesce a page and its buddy if
* (a) the buddy is not in a hole &&
- * (b) the buddy is free &&
- * (c) the buddy is on the buddy system &&
- * (d) a page and its buddy have the same order.
- * for recording page's order, we use page_private(page) and PG_private.
+ * (b) the buddy is in the buddy system &&
+ * (c) a page and its buddy have the same order.
+ *
+ * For recording whether a page is in the buddy system, we use PG_buddy.
+ * Setting, clearing, and testing PG_buddy is serialized by zone->lock.
*
+ * For recording page's order, we use page_private(page).
*/
static inline int page_is_buddy(struct page *page, int order)
{
@@ -281,10 +284,10 @@ static inline int page_is_buddy(struct page *page, int order)
return 0;
#endif
- if (PagePrivate(page) &&
- (page_order(page) == order) &&
- page_count(page) == 0)
+ if (PageBuddy(page) && page_order(page) == order) {
+ BUG_ON(page_count(page) != 0);
return 1;
+ }
return 0;
}
@@ -301,7 +304,7 @@ static inline int page_is_buddy(struct page *page, int order)
* as necessary, plus some accounting needed to play nicely with other
* parts of the VM system.
* At each level, we keep a list of pages, which are heads of continuous
- * free pages of length of (1 << order) and marked with PG_Private.Page's
+ * free pages of length of (1 << order) and marked with PG_buddy. Page's
* order is recorded in page_private(page) field.
* So when we are allocating or freeing one, we can derive the state of the
* other. That is, if we allocate a small block, and both were
@@ -364,7 +367,8 @@ static inline int free_pages_check(struct page *page)
1 << PG_slab |
1 << PG_swapcache |
1 << PG_writeback |
- 1 << PG_reserved ))))
+ 1 << PG_reserved |
+ 1 << PG_buddy ))))
bad_page(page);
if (PageDirty(page))
__ClearPageDirty(page);
@@ -523,7 +527,8 @@ static int prep_new_page(struct page *page, int order)
1 << PG_slab |
1 << PG_swapcache |
1 << PG_writeback |
- 1 << PG_reserved ))))
+ 1 << PG_reserved |
+ 1 << PG_buddy ))))
bad_page(page);
/*
diff --git a/linux-2.6-xen-sparse/net/core/dev.c b/linux-2.6-xen-sparse/net/core/dev.c
index 55870e1254..87c770eedc 100644
--- a/linux-2.6-xen-sparse/net/core/dev.c
+++ b/linux-2.6-xen-sparse/net/core/dev.c
@@ -2994,11 +2994,11 @@ void netdev_run_todo(void)
switch(dev->reg_state) {
case NETREG_REGISTERING:
+ dev->reg_state = NETREG_REGISTERED;
err = netdev_register_sysfs(dev);
if (err)
printk(KERN_ERR "%s: failed sysfs registration (%d)\n",
dev->name, err);
- dev->reg_state = NETREG_REGISTERED;
break;
case NETREG_UNREGISTERING: