aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--tools/domain_builder/dom_builder.c9
-rw-r--r--xen/arch/i386/boot/boot.S22
-rw-r--r--xen/arch/i386/entry.S28
-rw-r--r--xen/arch/i386/ioremap.c7
-rw-r--r--xen/arch/i386/mm.c189
-rw-r--r--xen/arch/i386/process.c14
-rw-r--r--xen/arch/i386/traps.c34
-rw-r--r--xen/common/domain.c19
-rw-r--r--xen/include/asm-i386/desc.h23
-rw-r--r--xen/include/asm-i386/processor.h11
-rw-r--r--xen/include/asm-i386/ptrace.h1
-rw-r--r--xen/include/hypervisor-ifs/hypervisor-if.h75
-rw-r--r--xen/include/xeno/config.h6
-rw-r--r--xen/include/xeno/sched.h22
-rw-r--r--xenolinux-2.4.21-pre4-sparse/arch/xeno/kernel/setup.c7
-rw-r--r--xenolinux-2.4.21-pre4-sparse/include/asm-xeno/hypervisor.h9
-rw-r--r--xenolinux-2.4.21-pre4-sparse/include/asm-xeno/page.h9
-rw-r--r--xenolinux-2.4.21-pre4-sparse/include/asm-xeno/segment.h10
18 files changed, 339 insertions, 156 deletions
diff --git a/tools/domain_builder/dom_builder.c b/tools/domain_builder/dom_builder.c
index a402aef99c..1370c96df5 100644
--- a/tools/domain_builder/dom_builder.c
+++ b/tools/domain_builder/dom_builder.c
@@ -23,8 +23,13 @@
#define GUEST_SIG "XenoGues"
#define SIG_LEN 8
-#define L1_PROT (_PAGE_PRESENT|_PAGE_RW|_PAGE_USER|_PAGE_ACCESSED)
-#define L2_PROT (_PAGE_PRESENT|_PAGE_RW|_PAGE_USER|_PAGE_ACCESSED|_PAGE_DIRTY)
+/*
+ * NB. No ring-3 access in initial guestOS pagetables. Note that we allow
+ * ring-3 privileges in the page directories, so that the guestOS may later
+ * decide to share a 4MB region with applications.
+ */
+#define L1_PROT (_PAGE_PRESENT|_PAGE_RW|_PAGE_ACCESSED)
+#define L2_PROT (_PAGE_PRESENT|_PAGE_RW|_PAGE_ACCESSED|_PAGE_DIRTY|_PAGE_USER)
/* standardized error reporting function */
static void dberr(char *msg)
diff --git a/xen/arch/i386/boot/boot.S b/xen/arch/i386/boot/boot.S
index 1ef335d030..a83bebfa77 100644
--- a/xen/arch/i386/boot/boot.S
+++ b/xen/arch/i386/boot/boot.S
@@ -208,28 +208,28 @@ SYMBOL_NAME(idt):
.word 0
gdt_descr:
- .word 256*8-1
+ .word (2*NR_CPUS+8)*8-1
SYMBOL_NAME(gdt):
.long SYMBOL_NAME(gdt_table) /* gdt base */
.word 0
nopaging_gdt_descr:
- .word 256*8-1
+ .word (2*NR_CPUS+8)*8-1
.long SYMBOL_NAME(gdt_table)-__PAGE_OFFSET
ALIGN
/* NB. Rings != 0 get access up to 0xFC400000. This allows access to the */
/* machine->physical mapping table. Ring 0 can access all memory. */
ENTRY(gdt_table)
- .quad 0x0000000000000000 /* NULL descriptor */
- .quad 0x0000000000000000 /* not used */
- .quad 0x00cfba000000c3ff /* 0x11 ring 1 3.95GB code at 0x0 */
- .quad 0x00cfb2000000c3ff /* 0x19 ring 1 3.95GB data at 0x0 */
- .quad 0x00cffa000000c3ff /* 0x23 ring 3 3.95GB code at 0x0 */
- .quad 0x00cff2000000c3ff /* 0x2b ring 3 3.95GB data at 0x0 */
- .quad 0x00cf9a000000ffff /* 0x30 ring 0 4.00GB code at 0x0 */
- .quad 0x00cf92000000ffff /* 0x38 ring 0 4.00GB data at 0x0 */
- .fill NR_CPUS,8,0 /* space for TSS's */
+ .quad 0x0000000000000000 /* 0x0000 NULL descriptor */
+ .quad 0x00cf9a000000ffff /* 0x0008 ring 0 4.00GB code at 0x0 */
+ .quad 0x00cf92000000ffff /* 0x0010 ring 0 4.00GB data at 0x0 */
+ .quad 0x00cfba000000c3ff /* 0x0019 ring 1 3.95GB code at 0x0 */
+ .quad 0x00cfb2000000c3ff /* 0x0021 ring 1 3.95GB data at 0x0 */
+ .quad 0x00cffa000000c3ff /* 0x002b ring 3 3.95GB code at 0x0 */
+ .quad 0x00cff2000000c3ff /* 0x0033 ring 3 3.95GB data at 0x0 */
+ .quad 0x0000000000000000 /* unused */
+ .fill 2*NR_CPUS,8,0 /* space for TSS and LDT per CPU */
# The following adds 12kB to the kernel file size.
.org 0x1000
diff --git a/xen/arch/i386/entry.S b/xen/arch/i386/entry.S
index a6fadb31e9..166ceeb862 100644
--- a/xen/arch/i386/entry.S
+++ b/xen/arch/i386/entry.S
@@ -36,10 +36,8 @@
* in that it means we don't have to do messy GDT/LDT lookups to find
* out which the privilege-level of the return code-selector. That code
* would just be a hassle to write, and would need to account for running
- * off the end of the GDT/LDT, for example. The event callback has quite
- * a constrained callback method: the guest OS provides a linear address
- * which we call back to using the hard-coded __GUEST_CS descriptor (which
- * is a ring 1 descriptor). For IDT callbacks, we check that the provided
+ * off the end of the GDT/LDT, for example. For all callbacks we check
+ * that the provided
* return CS is not == __HYPERVISOR_{CS,DS}. Apart from that we're safe as
* don't allow a guest OS to install ring-0 privileges into the GDT/LDT.
* It's up to the guest OS to ensure all returns via the IDT are to ring 1.
@@ -105,12 +103,14 @@ STATE = 4
HYP_EVENTS = 8
DOMAIN = 12
SHARED_INFO = 16
+EVENT_SEL = 20
+EVENT_ADDR = 24
+FAILSAFE_SEL = 28
+FAILSAFE_ADDR = 32
/* Offsets in shared_info_t */
EVENTS = 0
EVENTS_ENABLE = 4
-EVENT_ADDR = 8
-FAILSAFE_ADDR = 12
/* Offsets in guest_trap_bounce */
GTB_ERROR_CODE = 0
@@ -290,14 +290,14 @@ test_all_events:
/* Prevent unnecessary reentry of event callback (stack overflow!) */
xorl %ecx,%ecx
movl %ecx,EVENTS_ENABLE(%eax)
-/* %eax == shared_info, %ebx == task_struct */
-process_guest_events:
+/*process_guest_events:*/
mov PROCESSOR(%ebx),%edx
shl $4,%edx # sizeof(guest_trap_bounce) == 16
lea guest_trap_bounce(%edx),%edx
- movl EVENT_ADDR(%eax),%eax
+ movl EVENT_ADDR(%ebx),%eax
movl %eax,GTB_EIP(%edx)
- movw $__GUEST_CS,GTB_CS(%edx)
+ movl EVENT_SEL(%ebx),%eax
+ movw %ax,GTB_CS(%edx)
call create_bounce_frame
jmp restore_all
@@ -319,10 +319,10 @@ failsafe_callback:
mov PROCESSOR(%ebx),%eax
shl $4,%eax
lea guest_trap_bounce(%eax),%edx
- movl SHARED_INFO(%ebx),%eax
- movl FAILSAFE_ADDR(%eax),%eax
+ movl FAILSAFE_ADDR(%ebx),%eax
movl %eax,GTB_EIP(%edx)
- movw $__GUEST_CS,GTB_CS(%edx)
+ movl FAILSAFE_SEL(%ebx),%eax
+ movw %ax,GTB_CS(%edx)
call create_bounce_frame
subl $8,%esi # add DS/ES to failsafe stack frame
movl DS(%esp),%eax
@@ -590,7 +590,7 @@ ENTRY(hypervisor_call_table)
.long SYMBOL_NAME(do_console_write)
.long SYMBOL_NAME(do_set_gdt)
.long SYMBOL_NAME(do_stack_switch)
- .long SYMBOL_NAME(do_ldt_switch)
+ .long SYMBOL_NAME(do_set_callbacks)
.long SYMBOL_NAME(do_net_update)
.long SYMBOL_NAME(do_fpu_taskswitch)
.long SYMBOL_NAME(do_yield)
diff --git a/xen/arch/i386/ioremap.c b/xen/arch/i386/ioremap.c
index 8487c535fb..cd97e124d7 100644
--- a/xen/arch/i386/ioremap.c
+++ b/xen/arch/i386/ioremap.c
@@ -15,9 +15,6 @@
static unsigned long remap_base = 0;
-#define L1_PROT (_PAGE_PRESENT|_PAGE_RW|_PAGE_ACCESSED)
-#define L2_PROT (_PAGE_PRESENT|_PAGE_RW|_PAGE_ACCESSED|_PAGE_DIRTY)
-
#define PAGE_ALIGN(addr) (((addr)+PAGE_SIZE-1)&PAGE_MASK)
static void new_l2e(l2_pgentry_t *pl2e)
@@ -25,7 +22,7 @@ static void new_l2e(l2_pgentry_t *pl2e)
l1_pgentry_t *pl1e = (l1_pgentry_t *)get_free_page(GFP_KERNEL);
if ( !pl1e ) BUG();
clear_page(pl1e);
- *pl2e = mk_l2_pgentry(__pa(pl1e)|L2_PROT);
+ *pl2e = mk_l2_pgentry(__pa(pl1e)|__PAGE_HYPERVISOR);
}
@@ -89,7 +86,7 @@ void * __ioremap(unsigned long phys_addr, unsigned long size, unsigned long flag
for ( ; ; )
{
if ( !l1_pgentry_empty(*pl1e) ) BUG();
- *pl1e++ = mk_l1_pgentry((phys_addr+cur)|L1_PROT|flags);
+ *pl1e++ = mk_l1_pgentry((phys_addr+cur)|PAGE_HYPERVISOR|flags);
cur += PAGE_SIZE;
if ( cur == size ) break;
if ( !((unsigned long)pl1e & (PAGE_SIZE-1)) )
diff --git a/xen/arch/i386/mm.c b/xen/arch/i386/mm.c
index e330c092c6..c18d088cfd 100644
--- a/xen/arch/i386/mm.c
+++ b/xen/arch/i386/mm.c
@@ -5,6 +5,7 @@
#include <asm/page.h>
#include <asm/pgalloc.h>
#include <asm/fixmap.h>
+#include <asm/domain_page.h>
static inline void set_pte_phys (unsigned long vaddr,
l1_pgentry_t entry)
@@ -114,31 +115,193 @@ long do_stack_switch(unsigned long ss, unsigned long esp)
}
-long do_ldt_switch(unsigned long ldts)
+/* Returns TRUE if given descriptor is valid for GDT or LDT. */
+static int check_descriptor(unsigned long a, unsigned long b)
{
- unsigned long *ptabent;
+ unsigned long base, limit;
- ptabent = (unsigned long *)GET_GDT_ADDRESS(current);
- /* Out of range for GDT table? */
- if ( (ldts * 8) > GET_GDT_ENTRIES(current) ) return -1;
- ptabent += ldts * 2; /* 8 bytes per desc == 2 * unsigned long */
- /* Not an LDT entry? (S=0b, type =0010b) */
- if ( ldts && ((*ptabent & 0x00001f00) != 0x00000200) ) return -1;
- current->mm.ldt_sel = ldts;
- __load_LDT(ldts);
+ /* A not-present descriptor will always fault, so is safe. */
+ if ( !(a & _SEGMENT_P) )
+ goto good;
+ /*
+ * We don't allow a DPL of zero. There is no legitimate reason for
+ * specifying DPL==0, and it gets rather dangerous if we also accept call
+ * gates (consider a call gate pointing at another guestos descriptor with
+ * DPL 0 -- this would get the OS ring-0 privileges).
+ */
+ if ( (a & _SEGMENT_DPL) == 0 )
+ goto bad;
+
+ if ( !(a & _SEGMENT_S) )
+ {
+ /*
+ * System segment:
+ * 1. Don't allow interrupt or trap gates as they belong in the IDT.
+ * 2. Don't allow TSS descriptors or task gates as we don't
+ * virtualise x86 tasks.
+ * 3. Don't allow LDT descriptors because they're unnecessary and
+ * I'm uneasy about allowing an LDT page to contain LDT
+ * descriptors. In any case, Xen automatically creates the
+ * required descriptor when reloading the LDT register.
+ * 4. We allow call gates but they must not jump to a private segment.
+ */
+
+ /* Disallow everything but call gates. */
+ if ( (a & _SEGMENT_TYPE) != 0xc00 )
+ goto bad;
+
+ /* Can't allow far jump to a Xen-private segment. */
+ if ( !VALID_CODESEL(b>>16) )
+ goto bad;
+
+ /* Reserved bits must be zero. */
+ if ( (a & 0xe0) != 0 )
+ goto bad;
+
+ /* No base/limit check is needed for a call gate. */
+ goto good;
+ }
+
+ /* Check that base/limit do not overlap Xen-private space. */
+ base = (a&(0xff<<24)) | ((a&0xff)<<16) | (b>>16);
+ limit = (a&0xf0000) | (b&0xffff);
+ limit++; /* We add one because limit is inclusive. */
+ if ( (a & _SEGMENT_G) )
+ limit <<= 12;
+ if ( ((base + limit) <= base) ||
+ ((base + limit) >= PAGE_OFFSET) )
+ goto bad;
+
+ good:
+ return 1;
+ bad:
return 0;
}
-long do_set_gdt(unsigned long *frame_list, int entries)
+long do_set_gdt(unsigned long *frame_list, unsigned int entries)
{
- return -ENOSYS;
+ /* NB. There are 512 8-byte entries per GDT page. */
+ unsigned int i, nr_pages = (entries + 511) / 512;
+ unsigned long frames[16], pfn, *gdt_page, flags;
+ long ret = -EINVAL;
+ struct pfn_info *page;
+
+ if ( (entries < FIRST_DOMAIN_GDT_ENTRY) || (entries > 8192) )
+ return -EINVAL;
+
+ if ( copy_from_user(frames, frame_list, nr_pages * sizeof(unsigned long)) )
+ return -EFAULT;
+
+ spin_lock_irqsave(&current->page_lock, flags);
+
+ /* Check the new GDT. */
+ for ( i = 0; i < nr_pages; i++ )
+ {
+ if ( frames[i] >= max_page )
+ goto out;
+
+ page = frame_table + frames[i];
+ if ( (page->flags & PG_domain_mask) != current->domain )
+ goto out;
+
+ if ( (page->flags & PG_type_mask) != PGT_gdt_page )
+ {
+ if ( page->type_count != 0 )
+ goto out;
+
+ /* Check all potential GDT entries in the page. */
+ gdt_page = map_domain_mem(frames[0] << PAGE_SHIFT);
+ for ( i = 0; i < 512; i++ )
+ if ( !check_descriptor(gdt_page[i*2], gdt_page[i*2]+1) )
+ goto out;
+ unmap_domain_mem(gdt_page);
+ }
+ }
+
+ /* Tear down the old GDT. */
+ for ( i = 0; i < 16; i++ )
+ {
+ pfn = l1_pgentry_to_pagenr(current->mm.perdomain_pt[i]);
+ current->mm.perdomain_pt[i] = mk_l1_pgentry(0);
+ if ( pfn == 0 ) continue;
+ page = frame_table + pfn;
+ put_page_type(page);
+ put_page_tot(page);
+ }
+
+ /* Install the new GDT. */
+ for ( i = 0; i < nr_pages; i++ )
+ {
+ current->mm.perdomain_pt[i] =
+ mk_l1_pgentry((frames[i] << PAGE_SHIFT) | __PAGE_HYPERVISOR);
+
+ page = frame_table + frames[i];
+ page->flags &= ~PG_type_mask;
+ page->flags |= PGT_gdt_page;
+ get_page_type(page);
+ get_page_tot(page);
+ }
+
+ flush_tlb();
+
+ /* Copy over first entries of the new GDT. */
+ memcpy((void *)PERDOMAIN_VIRT_START, gdt_table, FIRST_DOMAIN_GDT_ENTRY*8);
+
+ SET_GDT_ADDRESS(current, PERDOMAIN_VIRT_START);
+ SET_GDT_ENTRIES(current, (entries*8)-1);
+ __asm__ __volatile__ ("lgdt %0" : "=m" (*current->mm.gdt));
+
+ ret = 0; /* success */
+
+ out:
+ spin_unlock_irqrestore(&current->page_lock, flags);
+ return ret;
}
long do_update_descriptor(
unsigned long pa, unsigned long word1, unsigned long word2)
{
- return -ENOSYS;
+ unsigned long *gdt_pent, flags, pfn = pa >> PAGE_SHIFT;
+ struct pfn_info *page;
+ long ret = -EINVAL;
+
+ if ( (pa & 7) || (pfn >= max_page) || !check_descriptor(word1, word2) )
+ return -EINVAL;
+
+ spin_lock_irqsave(&current->page_lock, flags);
+
+ page = frame_table + pfn;
+ if ( (page->flags & PG_domain_mask) != current->domain )
+ goto out;
+
+ /* Check if the given frame is in use in an unsafe context. */
+ switch ( (page->flags & PG_type_mask) )
+ {
+ case PGT_gdt_page:
+ /* Disallow updates of Xen-private descriptors in the current GDT. */
+ if ( (l1_pgentry_to_pagenr(current->mm.perdomain_pt[0]) == pfn) &&
+ (((pa&(PAGE_SIZE-1))>>3) < FIRST_DOMAIN_GDT_ENTRY) )
+ goto out;
+ case PGT_ldt_page:
+ case PGT_writeable_page:
+ break;
+ default:
+ if ( page->type_count != 0 )
+ goto out;
+ }
+
+ /* All is good so make the update. */
+ gdt_pent = map_domain_mem(pa);
+ gdt_pent[0] = word1;
+ gdt_pent[1] = word2;
+ unmap_domain_mem(gdt_pent);
+
+ ret = 0; /* success */
+
+ out:
+ spin_unlock_irqrestore(&current->page_lock, flags);
+ return ret;
}
diff --git a/xen/arch/i386/process.c b/xen/arch/i386/process.c
index c9736a2093..05a475e11d 100644
--- a/xen/arch/i386/process.c
+++ b/xen/arch/i386/process.c
@@ -312,15 +312,15 @@ void new_thread(struct task_struct *p,
/*
* Initial register values:
- * DS,ES,FS,GS = __GUEST_DS
- * CS:EIP = __GUEST_CS:start_pc
- * SS:ESP = __GUEST_DS:start_stack
+ * DS,ES,FS,GS = FLAT_RING1_DS
+ * CS:EIP = FLAT_RING1_CS:start_pc
+ * SS:ESP = FLAT_RING1_DS:start_stack
* ESI = start_info
* [EAX,EBX,ECX,EDX,EDI,EBP are zero]
*/
- p->thread.fs = p->thread.gs = __GUEST_DS;
- regs->xds = regs->xes = regs->xss = __GUEST_DS;
- regs->xcs = __GUEST_CS;
+ p->thread.fs = p->thread.gs = FLAT_RING1_DS;
+ regs->xds = regs->xes = regs->xss = FLAT_RING1_DS;
+ regs->xcs = FLAT_RING1_CS;
regs->eip = start_pc;
regs->esp = start_stack;
regs->esi = start_info;
@@ -395,7 +395,7 @@ void __switch_to(struct task_struct *prev_p, struct task_struct *next_p)
/* Switch GDT and LDT. */
__asm__ __volatile__ ("lgdt %0" : "=m" (*next_p->mm.gdt));
- __load_LDT(next_p->mm.ldt_sel);
+// __load_LDT(0);
/*
* Restore %fs and %gs.
diff --git a/xen/arch/i386/traps.c b/xen/arch/i386/traps.c
index 5fe0858ba3..f0b15e081f 100644
--- a/xen/arch/i386/traps.c
+++ b/xen/arch/i386/traps.c
@@ -325,6 +325,7 @@ asmlinkage void do_general_protection(struct pt_regs * regs, long error_code)
return;
gp_in_kernel:
+
if ( (fixup = search_exception_table(regs->eip)) != 0 )
{
regs->eip = fixup;
@@ -568,23 +569,38 @@ long do_set_trap_table(trap_info_t *traps)
trap_info_t cur;
trap_info_t *dst = current->thread.traps;
- /*
- * I'm removing the next line, since it seems more intuitive to use this
- * as an interface to incrementally update a domain's trap table. Clearing
- * out old entries automatically is rather antisocial!
- */
- /*memset(dst, 0, sizeof(*dst) * 256);*/
-
for ( ; ; )
{
if ( copy_from_user(&cur, traps, sizeof(cur)) ) return -EFAULT;
- if ( (cur.cs & 3) == 0 ) return -EPERM;
+
if ( cur.address == 0 ) break;
+
+ if ( !VALID_CODESEL(cur.cs) ) return -EPERM;
+
memcpy(dst+cur.vector, &cur, sizeof(cur));
traps++;
}
- return(0);
+ return 0;
+}
+
+
+long do_set_callbacks(unsigned long event_selector,
+ unsigned long event_address,
+ unsigned long failsafe_selector,
+ unsigned long failsafe_address)
+{
+ struct task_struct *p = current;
+
+ if ( !VALID_CODESEL(event_selector) || !VALID_CODESEL(failsafe_selector) )
+ return -EPERM;
+
+ p->event_selector = event_selector;
+ p->event_address = event_address;
+ p->failsafe_selector = failsafe_selector;
+ p->failsafe_address = failsafe_address;
+
+ return 0;
}
diff --git a/xen/common/domain.c b/xen/common/domain.c
index 2102e29ee3..da62effffd 100644
--- a/xen/common/domain.c
+++ b/xen/common/domain.c
@@ -16,8 +16,13 @@
#include <asm/msr.h>
#include <xeno/blkdev.h>
-#define L1_PROT (_PAGE_PRESENT|_PAGE_RW|_PAGE_USER|_PAGE_ACCESSED)
-#define L2_PROT (_PAGE_PRESENT|_PAGE_RW|_PAGE_USER|_PAGE_ACCESSED|_PAGE_DIRTY)
+/*
+ * NB. No ring-3 access in initial guestOS pagetables. Note that we allow
+ * ring-3 privileges in the page directories, so that the guestOS may later
+ * decide to share a 4MB region with applications.
+ */
+#define L1_PROT (_PAGE_PRESENT|_PAGE_RW|_PAGE_ACCESSED)
+#define L2_PROT (_PAGE_PRESENT|_PAGE_RW|_PAGE_ACCESSED|_PAGE_DIRTY|_PAGE_USER)
rwlock_t tasklist_lock __cacheline_aligned = RW_LOCK_UNLOCKED;
@@ -47,6 +52,9 @@ struct task_struct *do_newdomain(unsigned int dom_id, unsigned int cpu)
memset(p->shared_info, 0, PAGE_SIZE);
SHARE_PFN_WITH_DOMAIN(virt_to_page(p->shared_info), dom_id);
+ p->mm.perdomain_pt = (l1_pgentry_t *)get_free_page(GFP_KERNEL);
+ memset(p->mm.perdomain_pt, 0, PAGE_SIZE);
+
init_blkdev_info(p);
SET_GDT_ENTRIES(p, DEFAULT_GDT_ENTRIES);
@@ -224,7 +232,8 @@ void release_task(struct task_struct *p)
{
destroy_net_vif(p);
}
- if ( p->mm.perdomain_pt ) free_page((unsigned long)p->mm.perdomain_pt);
+
+ free_page((unsigned long)p->mm.perdomain_pt);
destroy_blkdev_info(p);
@@ -268,7 +277,7 @@ int final_setup_guestos(struct task_struct * p, dom_meminfo_t * meminfo)
net_ring_t *net_ring;
net_vif_t *net_vif;
- /* entries 0xe0000000 onwards in page table must contain hypervisor
+ /* High entries in page table must contain hypervisor
* mem mappings - set them up.
*/
phys_l2tab = meminfo->l2_pgt_addr;
@@ -279,7 +288,7 @@ int final_setup_guestos(struct task_struct * p, dom_meminfo_t * meminfo)
(ENTRIES_PER_L2_PAGETABLE - DOMAIN_ENTRIES_PER_L2_PAGETABLE)
* sizeof(l2_pgentry_t));
l2tab[PERDOMAIN_VIRT_START >> L2_PAGETABLE_SHIFT] =
- mk_l2_pgentry(__pa(p->mm.perdomain_pt) | PAGE_HYPERVISOR);
+ mk_l2_pgentry(__pa(p->mm.perdomain_pt) | __PAGE_HYPERVISOR);
p->mm.pagetable = mk_pagetable(phys_l2tab);
unmap_domain_mem(l2tab);
diff --git a/xen/include/asm-i386/desc.h b/xen/include/asm-i386/desc.h
index 2cb90769b5..f1d11e33f7 100644
--- a/xen/include/asm-i386/desc.h
+++ b/xen/include/asm-i386/desc.h
@@ -2,7 +2,24 @@
#define __ARCH_DESC_H
#define __FIRST_TSS_ENTRY 8
-#define __TSS(n) ((n) + __FIRST_TSS_ENTRY)
+#define __FIRST_LDT_ENTRY (__FIRST_TSS_ENTRY+1)
+
+#define __TSS(n) (((n)<<1) + __FIRST_TSS_ENTRY)
+#define __LDT(n) (((n)<<1) + __FIRST_LDT_ENTRY)
+
+#define load_TR(n) __asm__ __volatile__ ( "ltr %%ax" : : "a" (__TSS(n)<<3) )
+#define __load_LDT(n) __asm__ __volatile__ ( "lldt %%ax" : : "a" (n) )
+
+/* Guest OS must provide its own code selectors, or use the one we provide. */
+#define VALID_CODESEL(_s) \
+ ((((_s)>>2) >= FIRST_DOMAIN_GDT_ENTRY) || ((_s) == FLAT_RING1_CS))
+
+/* These are bitmasks for the first 32 bits of a descriptor table entry. */
+#define _SEGMENT_TYPE (15<< 8)
+#define _SEGMENT_S ( 1<<12) /* System descriptor (yes iff S==0) */
+#define _SEGMENT_DPL ( 3<<13) /* Descriptor Privilege Level */
+#define _SEGMENT_P ( 1<<15) /* Segment Present */
+#define _SEGMENT_G ( 1<<23) /* Granularity */
#ifndef __ASSEMBLY__
struct desc_struct {
@@ -20,10 +37,6 @@ struct Xgt_desc_struct {
#define idt_descr (*(struct Xgt_desc_struct *)((char *)&idt - 2))
#define gdt_descr (*(struct Xgt_desc_struct *)((char *)&gdt - 2))
-#define load_TR(n) __asm__ __volatile__("ltr %%ax"::"a" (__TSS(n)<<3))
-
-#define __load_LDT(n) __asm__ __volatile__("lldt %%ax"::"a" ((n)<<3))
-
extern void set_intr_gate(unsigned int irq, void * addr);
extern void set_tss_desc(unsigned int n, void *addr);
diff --git a/xen/include/asm-i386/processor.h b/xen/include/asm-i386/processor.h
index e5d2e420ac..a46e61f048 100644
--- a/xen/include/asm-i386/processor.h
+++ b/xen/include/asm-i386/processor.h
@@ -401,17 +401,6 @@ extern struct desc_struct *idt_tables[];
{~0, } /* ioperm */ \
}
-#define start_thread(regs, new_eip, new_esp) do { \
- __asm__("movl %0,%%fs ; movl %0,%%gs": :"r" (0)); \
- set_fs(USER_DS); \
- regs->xds = __USER_DS; \
- regs->xes = __USER_DS; \
- regs->xss = __USER_DS; \
- regs->xcs = __USER_CS; \
- regs->eip = new_eip; \
- regs->esp = new_esp; \
-} while (0)
-
/* Forward declaration, a strange C thing */
struct task_struct;
struct mm_struct;
diff --git a/xen/include/asm-i386/ptrace.h b/xen/include/asm-i386/ptrace.h
index 509001cf57..540a3b372a 100644
--- a/xen/include/asm-i386/ptrace.h
+++ b/xen/include/asm-i386/ptrace.h
@@ -79,7 +79,6 @@ enum EFLAGS {
#ifdef __KERNEL__
#define user_mode(regs) ((3 & (regs)->xcs))
-#define instruction_pointer(regs) ((regs)->eip)
extern void show_regs(struct pt_regs *);
#endif
diff --git a/xen/include/hypervisor-ifs/hypervisor-if.h b/xen/include/hypervisor-ifs/hypervisor-if.h
index 797605e9c1..5d23765aca 100644
--- a/xen/include/hypervisor-ifs/hypervisor-if.h
+++ b/xen/include/hypervisor-ifs/hypervisor-if.h
@@ -10,13 +10,17 @@
/*
* SEGMENT DESCRIPTOR TABLES
*/
-/* 8 entries, plus a TSS entry for each CPU (up to 32 CPUs). */
+/* The first few GDT entries are reserved by Xen. */
#define FIRST_DOMAIN_GDT_ENTRY 40
-/* These are flat segments for domain bootstrap and fallback. */
-#define FLAT_RING1_CS 0x11
-#define FLAT_RING1_DS 0x19
-#define FLAT_RING3_CS 0x23
-#define FLAT_RING3_DS 0x2b
+/*
+ * These flat segments are in the Xen-private section of every GDT. Since
+ * these are also present in the initial GDT, many OSes will be able to avoid
+ * installing their own GDT.
+ */
+#define FLAT_RING1_CS 0x0019
+#define FLAT_RING1_DS 0x0021
+#define FLAT_RING3_CS 0x002b
+#define FLAT_RING3_DS 0x0033
/*
@@ -29,7 +33,7 @@
#define __HYPERVISOR_console_write 2
#define __HYPERVISOR_set_gdt 3
#define __HYPERVISOR_stack_switch 4
-#define __HYPERVISOR_ldt_switch 5
+#define __HYPERVISOR_set_callbacks 5
#define __HYPERVISOR_net_update 6
#define __HYPERVISOR_fpu_taskswitch 7
#define __HYPERVISOR_yield 8
@@ -97,28 +101,32 @@
/*
* PAGE UPDATE COMMANDS AND FLAGS
*
- * PGREQ_XXX: specified in least-significant bits of 'ptr' field.
- * All requests specify relevent PTE or PT address in 'ptr'.
+ * PGREQ_XXX: specified in least 2 bits of 'ptr' field. These bits are masked
+ * off to get the real 'ptr' value.
+ * All requests specify relevent machine address in 'ptr'.
* Normal requests specify update value in 'value'.
- * Extended requests specify command in least 8 bits of 'value'.
+ * Extended requests specify command in least 8 bits of 'value'. These bits
+ * are masked off to get the real 'val' value. Except for PGEXT_SET_LDT
+ * which shifts the least bits out.
*/
/* A normal page-table update request. */
-#define PGREQ_NORMAL 0
+#define PGREQ_NORMAL 0 /* does a checked form of '*ptr = val' */
/* Update an entry in the machine->physical mapping table. */
-#define PGREQ_MPT_UPDATE 1
+#define PGREQ_MPT_UPDATE 1 /* ptr = frame to modify table entry for */
/* An extended command. */
-#define PGREQ_EXTENDED_COMMAND 2
+#define PGREQ_EXTENDED_COMMAND 2 /* least 8 bits of val demux further */
/* DOM0 can make entirely unchecked updates which do not affect refcnts. */
-#define PGREQ_UNCHECKED_UPDATE 3
-/* Announce a new top-level page table. */
-#define PGEXT_PIN_L1_TABLE 0
-#define PGEXT_PIN_L2_TABLE 1
-#define PGEXT_PIN_L3_TABLE 2
-#define PGEXT_PIN_L4_TABLE 3
-#define PGEXT_UNPIN_TABLE 4
-#define PGEXT_NEW_BASEPTR 5
-#define PGEXT_TLB_FLUSH 6
-#define PGEXT_INVLPG 7
+#define PGREQ_UNCHECKED_UPDATE 3 /* does an unchecked '*ptr = val' */
+/* Extended commands: */
+#define PGEXT_PIN_L1_TABLE 0 /* ptr = frame to pin */
+#define PGEXT_PIN_L2_TABLE 1 /* ptr = frame to pin */
+#define PGEXT_PIN_L3_TABLE 2 /* ptr = frame to pin */
+#define PGEXT_PIN_L4_TABLE 3 /* ptr = frame to pin */
+#define PGEXT_UNPIN_TABLE 4 /* ptr = frame to unpin */
+#define PGEXT_NEW_BASEPTR 5 /* ptr = new pagetable base to install */
+#define PGEXT_TLB_FLUSH 6 /* ptr = NULL */
+#define PGEXT_INVLPG 7 /* ptr = NULL ; val = page to invalidate */
+#define PGEXT_SET_LDT 8 /* ptr = linear address; val = # entries */
#define PGEXT_CMD_MASK 255
#define PGEXT_CMD_SHIFT 8
@@ -173,27 +181,6 @@ typedef struct shared_info_st {
unsigned long events_enable;
/*
- * Address for callbacks hypervisor -> guest OS.
- * Stack frame looks like that of an interrupt.
- * Code segment is the default flat selector.
- * This handler will only be called when events_enable is non-zero.
- */
- unsigned long event_address;
-
- /*
- * Hypervisor uses this callback when it takes a fault on behalf of
- * an application. This can happen when returning from interrupts for
- * example: various faults can occur when reloading the segment
- * registers, and executing 'iret'.
- * This callback is provided with an extended stack frame, augmented
- * with saved values for segment registers %ds and %es:
- * %ds, %es, %eip, %cs, %eflags [, %oldesp, %oldss]
- * Code segment is the default flat selector.
- * FAULTS WHEN CALLING THIS HANDLER WILL TERMINATE THE DOMAIN!!!
- */
- unsigned long failsafe_address;
-
- /*
* Time: The following abstractions are exposed: System Time, Clock Time,
* Domain Virtual Time. Domains can access Cycle counter time directly.
* XXX RN: Need something to pass NTP scaling to GuestOS.
diff --git a/xen/include/xeno/config.h b/xen/include/xeno/config.h
index 4067f52dc7..ec92fa031c 100644
--- a/xen/include/xeno/config.h
+++ b/xen/include/xeno/config.h
@@ -120,10 +120,8 @@
#define barrier() __asm__ __volatile__("": : :"memory")
-#define __HYPERVISOR_CS 0x30
-#define __HYPERVISOR_DS 0x38
-#define __GUEST_CS 0x11
-#define __GUEST_DS 0x19
+#define __HYPERVISOR_CS 0x0008
+#define __HYPERVISOR_DS 0x0010
#define NR_syscalls 256
diff --git a/xen/include/xeno/sched.h b/xen/include/xeno/sched.h
index 147f3c40fd..f67e20983f 100644
--- a/xen/include/xeno/sched.h
+++ b/xen/include/xeno/sched.h
@@ -27,10 +27,10 @@ struct mm_struct {
* Every domain has a L1 pagetable of its own. Per-domain mappings
* are put in this table (eg. the current GDT is mapped here).
*/
- l2_pgentry_t *perdomain_pt;
+ l1_pgentry_t *perdomain_pt;
pagetable_t pagetable;
- /* Current LDT selector. */
- unsigned int ldt_sel;
+ /* Current LDT descriptor. */
+ unsigned long ldt[2];
/* Next entry is passed to LGDT on domain switch. */
char gdt[6];
};
@@ -65,18 +65,30 @@ struct task_struct {
/*
* DO NOT CHANGE THE ORDER OF THE FOLLOWING.
- * There offsets are hardcoded in entry.S
+ * Their offsets are hardcoded in entry.S
*/
int processor; /* 00: current processor */
int state; /* 04: current run state */
- int hyp_events; /* 08: pending events */
+ int hyp_events; /* 08: pending intra-Xen events */
unsigned int domain; /* 12: domain id */
/* An unsafe pointer into a shared data area. */
shared_info_t *shared_info; /* 16: shared data area */
/*
+ * Return vectors pushed to us by guest OS.
+ * The stack frame for events is exactly that of an x86 hardware interrupt.
+ * The stack frame for a failsafe callback is augmented with saved values
+ * for segment registers %ds and %es:
+ * %ds, %es, %eip, %cs, %eflags [, %oldesp, %oldss]
+ */
+ unsigned long event_selector; /* 20: entry CS */
+ unsigned long event_address; /* 24: entry EIP */
+ unsigned long failsafe_selector; /* 28: entry CS */
+ unsigned long failsafe_address; /* 32: entry EIP */
+
+ /*
* From here on things can be added and shuffled without special attention
*/
diff --git a/xenolinux-2.4.21-pre4-sparse/arch/xeno/kernel/setup.c b/xenolinux-2.4.21-pre4-sparse/arch/xeno/kernel/setup.c
index 00c68a836f..6ac4ff242e 100644
--- a/xenolinux-2.4.21-pre4-sparse/arch/xeno/kernel/setup.c
+++ b/xenolinux-2.4.21-pre4-sparse/arch/xeno/kernel/setup.c
@@ -153,10 +153,9 @@ void __init setup_arch(char **cmdline_p)
extern unsigned long cpu0_pte_quicklist[];
extern unsigned long cpu0_pgd_quicklist[];
- HYPERVISOR_shared_info->event_address =
- (unsigned long)hypervisor_callback;
- HYPERVISOR_shared_info->failsafe_address =
- (unsigned long)failsafe_callback;
+ HYPERVISOR_set_callbacks(
+ __KERNEL_CS, (unsigned long)hypervisor_callback,
+ __KERNEL_CS, (unsigned long)failsafe_callback);
boot_cpu_data.pgd_quick = cpu0_pgd_quicklist;
boot_cpu_data.pte_quick = cpu0_pte_quicklist;
diff --git a/xenolinux-2.4.21-pre4-sparse/include/asm-xeno/hypervisor.h b/xenolinux-2.4.21-pre4-sparse/include/asm-xeno/hypervisor.h
index 4b9591102c..35de4c20eb 100644
--- a/xenolinux-2.4.21-pre4-sparse/include/asm-xeno/hypervisor.h
+++ b/xenolinux-2.4.21-pre4-sparse/include/asm-xeno/hypervisor.h
@@ -195,13 +195,16 @@ static inline int HYPERVISOR_stack_switch(unsigned long ss, unsigned long esp)
return ret;
}
-static inline int HYPERVISOR_ldt_switch(unsigned long ldts)
+static inline int HYPERVISOR_set_callbacks(
+ unsigned long event_selector, unsigned long event_address,
+ unsigned long failsafe_selector, unsigned long failsafe_address)
{
int ret;
__asm__ __volatile__ (
TRAP_INSTR
- : "=a" (ret) : "0" (__HYPERVISOR_ldt_switch),
- "b" (ldts) : "memory" );
+ : "=a" (ret) : "0" (__HYPERVISOR_set_callbacks),
+ "b" (event_selector), "c" (event_address),
+ "d" (failsafe_selector), "S" (failsafe_address) : "memory" );
return ret;
}
diff --git a/xenolinux-2.4.21-pre4-sparse/include/asm-xeno/page.h b/xenolinux-2.4.21-pre4-sparse/include/asm-xeno/page.h
index aad36820b7..d15646fcb5 100644
--- a/xenolinux-2.4.21-pre4-sparse/include/asm-xeno/page.h
+++ b/xenolinux-2.4.21-pre4-sparse/include/asm-xeno/page.h
@@ -116,12 +116,6 @@ static inline pmd_t __pmd(unsigned long x)
#define __PAGE_OFFSET (0xC0000000)
-/*
- * This much address space is reserved for vmalloc() and iomap()
- * as well as fixmap mappings.
- */
-#define __VMALLOC_RESERVE (128 << 20)
-
#ifndef __ASSEMBLY__
/*
@@ -162,9 +156,6 @@ static __inline__ int get_order(unsigned long size)
#endif /* __ASSEMBLY__ */
#define PAGE_OFFSET ((unsigned long)__PAGE_OFFSET)
-#define VMALLOC_RESERVE ((unsigned long)__VMALLOC_RESERVE)
-#define __MAXMEM (-__PAGE_OFFSET-__VMALLOC_RESERVE)
-#define MAXMEM ((unsigned long)(-PAGE_OFFSET-VMALLOC_RESERVE))
#define __pa(x) ((unsigned long)(x)-PAGE_OFFSET)
#define __va(x) ((void *)((unsigned long)(x)+PAGE_OFFSET))
#define virt_to_page(kaddr) (mem_map + (__pa(kaddr) >> PAGE_SHIFT))
diff --git a/xenolinux-2.4.21-pre4-sparse/include/asm-xeno/segment.h b/xenolinux-2.4.21-pre4-sparse/include/asm-xeno/segment.h
index 5623211570..35862eb1f2 100644
--- a/xenolinux-2.4.21-pre4-sparse/include/asm-xeno/segment.h
+++ b/xenolinux-2.4.21-pre4-sparse/include/asm-xeno/segment.h
@@ -1,10 +1,12 @@
#ifndef _ASM_SEGMENT_H
#define _ASM_SEGMENT_H
-#define __KERNEL_CS 0x11
-#define __KERNEL_DS 0x19
+#include <asm/hypervisor-ifs/hypervisor-if.h>
-#define __USER_CS 0x23
-#define __USER_DS 0x2B
+#define __KERNEL_CS FLAT_RING1_CS
+#define __KERNEL_DS FLAT_RING1_DS
+
+#define __USER_CS FLAT_RING3_CS
+#define __USER_DS FLAT_RING3_DS
#endif