diff options
-rw-r--r-- | tools/domain_builder/dom_builder.c | 9 | ||||
-rw-r--r-- | xen/arch/i386/boot/boot.S | 22 | ||||
-rw-r--r-- | xen/arch/i386/entry.S | 28 | ||||
-rw-r--r-- | xen/arch/i386/ioremap.c | 7 | ||||
-rw-r--r-- | xen/arch/i386/mm.c | 189 | ||||
-rw-r--r-- | xen/arch/i386/process.c | 14 | ||||
-rw-r--r-- | xen/arch/i386/traps.c | 34 | ||||
-rw-r--r-- | xen/common/domain.c | 19 | ||||
-rw-r--r-- | xen/include/asm-i386/desc.h | 23 | ||||
-rw-r--r-- | xen/include/asm-i386/processor.h | 11 | ||||
-rw-r--r-- | xen/include/asm-i386/ptrace.h | 1 | ||||
-rw-r--r-- | xen/include/hypervisor-ifs/hypervisor-if.h | 75 | ||||
-rw-r--r-- | xen/include/xeno/config.h | 6 | ||||
-rw-r--r-- | xen/include/xeno/sched.h | 22 | ||||
-rw-r--r-- | xenolinux-2.4.21-pre4-sparse/arch/xeno/kernel/setup.c | 7 | ||||
-rw-r--r-- | xenolinux-2.4.21-pre4-sparse/include/asm-xeno/hypervisor.h | 9 | ||||
-rw-r--r-- | xenolinux-2.4.21-pre4-sparse/include/asm-xeno/page.h | 9 | ||||
-rw-r--r-- | xenolinux-2.4.21-pre4-sparse/include/asm-xeno/segment.h | 10 |
18 files changed, 339 insertions, 156 deletions
diff --git a/tools/domain_builder/dom_builder.c b/tools/domain_builder/dom_builder.c index a402aef99c..1370c96df5 100644 --- a/tools/domain_builder/dom_builder.c +++ b/tools/domain_builder/dom_builder.c @@ -23,8 +23,13 @@ #define GUEST_SIG "XenoGues" #define SIG_LEN 8 -#define L1_PROT (_PAGE_PRESENT|_PAGE_RW|_PAGE_USER|_PAGE_ACCESSED) -#define L2_PROT (_PAGE_PRESENT|_PAGE_RW|_PAGE_USER|_PAGE_ACCESSED|_PAGE_DIRTY) +/* + * NB. No ring-3 access in initial guestOS pagetables. Note that we allow + * ring-3 privileges in the page directories, so that the guestOS may later + * decide to share a 4MB region with applications. + */ +#define L1_PROT (_PAGE_PRESENT|_PAGE_RW|_PAGE_ACCESSED) +#define L2_PROT (_PAGE_PRESENT|_PAGE_RW|_PAGE_ACCESSED|_PAGE_DIRTY|_PAGE_USER) /* standardized error reporting function */ static void dberr(char *msg) diff --git a/xen/arch/i386/boot/boot.S b/xen/arch/i386/boot/boot.S index 1ef335d030..a83bebfa77 100644 --- a/xen/arch/i386/boot/boot.S +++ b/xen/arch/i386/boot/boot.S @@ -208,28 +208,28 @@ SYMBOL_NAME(idt): .word 0 gdt_descr: - .word 256*8-1 + .word (2*NR_CPUS+8)*8-1 SYMBOL_NAME(gdt): .long SYMBOL_NAME(gdt_table) /* gdt base */ .word 0 nopaging_gdt_descr: - .word 256*8-1 + .word (2*NR_CPUS+8)*8-1 .long SYMBOL_NAME(gdt_table)-__PAGE_OFFSET ALIGN /* NB. Rings != 0 get access up to 0xFC400000. This allows access to the */ /* machine->physical mapping table. Ring 0 can access all memory. */ ENTRY(gdt_table) - .quad 0x0000000000000000 /* NULL descriptor */ - .quad 0x0000000000000000 /* not used */ - .quad 0x00cfba000000c3ff /* 0x11 ring 1 3.95GB code at 0x0 */ - .quad 0x00cfb2000000c3ff /* 0x19 ring 1 3.95GB data at 0x0 */ - .quad 0x00cffa000000c3ff /* 0x23 ring 3 3.95GB code at 0x0 */ - .quad 0x00cff2000000c3ff /* 0x2b ring 3 3.95GB data at 0x0 */ - .quad 0x00cf9a000000ffff /* 0x30 ring 0 4.00GB code at 0x0 */ - .quad 0x00cf92000000ffff /* 0x38 ring 0 4.00GB data at 0x0 */ - .fill NR_CPUS,8,0 /* space for TSS's */ + .quad 0x0000000000000000 /* 0x0000 NULL descriptor */ + .quad 0x00cf9a000000ffff /* 0x0008 ring 0 4.00GB code at 0x0 */ + .quad 0x00cf92000000ffff /* 0x0010 ring 0 4.00GB data at 0x0 */ + .quad 0x00cfba000000c3ff /* 0x0019 ring 1 3.95GB code at 0x0 */ + .quad 0x00cfb2000000c3ff /* 0x0021 ring 1 3.95GB data at 0x0 */ + .quad 0x00cffa000000c3ff /* 0x002b ring 3 3.95GB code at 0x0 */ + .quad 0x00cff2000000c3ff /* 0x0033 ring 3 3.95GB data at 0x0 */ + .quad 0x0000000000000000 /* unused */ + .fill 2*NR_CPUS,8,0 /* space for TSS and LDT per CPU */ # The following adds 12kB to the kernel file size. .org 0x1000 diff --git a/xen/arch/i386/entry.S b/xen/arch/i386/entry.S index a6fadb31e9..166ceeb862 100644 --- a/xen/arch/i386/entry.S +++ b/xen/arch/i386/entry.S @@ -36,10 +36,8 @@ * in that it means we don't have to do messy GDT/LDT lookups to find * out which the privilege-level of the return code-selector. That code * would just be a hassle to write, and would need to account for running - * off the end of the GDT/LDT, for example. The event callback has quite - * a constrained callback method: the guest OS provides a linear address - * which we call back to using the hard-coded __GUEST_CS descriptor (which - * is a ring 1 descriptor). For IDT callbacks, we check that the provided + * off the end of the GDT/LDT, for example. For all callbacks we check + * that the provided * return CS is not == __HYPERVISOR_{CS,DS}. Apart from that we're safe as * don't allow a guest OS to install ring-0 privileges into the GDT/LDT. * It's up to the guest OS to ensure all returns via the IDT are to ring 1. @@ -105,12 +103,14 @@ STATE = 4 HYP_EVENTS = 8 DOMAIN = 12 SHARED_INFO = 16 +EVENT_SEL = 20 +EVENT_ADDR = 24 +FAILSAFE_SEL = 28 +FAILSAFE_ADDR = 32 /* Offsets in shared_info_t */ EVENTS = 0 EVENTS_ENABLE = 4 -EVENT_ADDR = 8 -FAILSAFE_ADDR = 12 /* Offsets in guest_trap_bounce */ GTB_ERROR_CODE = 0 @@ -290,14 +290,14 @@ test_all_events: /* Prevent unnecessary reentry of event callback (stack overflow!) */ xorl %ecx,%ecx movl %ecx,EVENTS_ENABLE(%eax) -/* %eax == shared_info, %ebx == task_struct */ -process_guest_events: +/*process_guest_events:*/ mov PROCESSOR(%ebx),%edx shl $4,%edx # sizeof(guest_trap_bounce) == 16 lea guest_trap_bounce(%edx),%edx - movl EVENT_ADDR(%eax),%eax + movl EVENT_ADDR(%ebx),%eax movl %eax,GTB_EIP(%edx) - movw $__GUEST_CS,GTB_CS(%edx) + movl EVENT_SEL(%ebx),%eax + movw %ax,GTB_CS(%edx) call create_bounce_frame jmp restore_all @@ -319,10 +319,10 @@ failsafe_callback: mov PROCESSOR(%ebx),%eax shl $4,%eax lea guest_trap_bounce(%eax),%edx - movl SHARED_INFO(%ebx),%eax - movl FAILSAFE_ADDR(%eax),%eax + movl FAILSAFE_ADDR(%ebx),%eax movl %eax,GTB_EIP(%edx) - movw $__GUEST_CS,GTB_CS(%edx) + movl FAILSAFE_SEL(%ebx),%eax + movw %ax,GTB_CS(%edx) call create_bounce_frame subl $8,%esi # add DS/ES to failsafe stack frame movl DS(%esp),%eax @@ -590,7 +590,7 @@ ENTRY(hypervisor_call_table) .long SYMBOL_NAME(do_console_write) .long SYMBOL_NAME(do_set_gdt) .long SYMBOL_NAME(do_stack_switch) - .long SYMBOL_NAME(do_ldt_switch) + .long SYMBOL_NAME(do_set_callbacks) .long SYMBOL_NAME(do_net_update) .long SYMBOL_NAME(do_fpu_taskswitch) .long SYMBOL_NAME(do_yield) diff --git a/xen/arch/i386/ioremap.c b/xen/arch/i386/ioremap.c index 8487c535fb..cd97e124d7 100644 --- a/xen/arch/i386/ioremap.c +++ b/xen/arch/i386/ioremap.c @@ -15,9 +15,6 @@ static unsigned long remap_base = 0; -#define L1_PROT (_PAGE_PRESENT|_PAGE_RW|_PAGE_ACCESSED) -#define L2_PROT (_PAGE_PRESENT|_PAGE_RW|_PAGE_ACCESSED|_PAGE_DIRTY) - #define PAGE_ALIGN(addr) (((addr)+PAGE_SIZE-1)&PAGE_MASK) static void new_l2e(l2_pgentry_t *pl2e) @@ -25,7 +22,7 @@ static void new_l2e(l2_pgentry_t *pl2e) l1_pgentry_t *pl1e = (l1_pgentry_t *)get_free_page(GFP_KERNEL); if ( !pl1e ) BUG(); clear_page(pl1e); - *pl2e = mk_l2_pgentry(__pa(pl1e)|L2_PROT); + *pl2e = mk_l2_pgentry(__pa(pl1e)|__PAGE_HYPERVISOR); } @@ -89,7 +86,7 @@ void * __ioremap(unsigned long phys_addr, unsigned long size, unsigned long flag for ( ; ; ) { if ( !l1_pgentry_empty(*pl1e) ) BUG(); - *pl1e++ = mk_l1_pgentry((phys_addr+cur)|L1_PROT|flags); + *pl1e++ = mk_l1_pgentry((phys_addr+cur)|PAGE_HYPERVISOR|flags); cur += PAGE_SIZE; if ( cur == size ) break; if ( !((unsigned long)pl1e & (PAGE_SIZE-1)) ) diff --git a/xen/arch/i386/mm.c b/xen/arch/i386/mm.c index e330c092c6..c18d088cfd 100644 --- a/xen/arch/i386/mm.c +++ b/xen/arch/i386/mm.c @@ -5,6 +5,7 @@ #include <asm/page.h> #include <asm/pgalloc.h> #include <asm/fixmap.h> +#include <asm/domain_page.h> static inline void set_pte_phys (unsigned long vaddr, l1_pgentry_t entry) @@ -114,31 +115,193 @@ long do_stack_switch(unsigned long ss, unsigned long esp) } -long do_ldt_switch(unsigned long ldts) +/* Returns TRUE if given descriptor is valid for GDT or LDT. */ +static int check_descriptor(unsigned long a, unsigned long b) { - unsigned long *ptabent; + unsigned long base, limit; - ptabent = (unsigned long *)GET_GDT_ADDRESS(current); - /* Out of range for GDT table? */ - if ( (ldts * 8) > GET_GDT_ENTRIES(current) ) return -1; - ptabent += ldts * 2; /* 8 bytes per desc == 2 * unsigned long */ - /* Not an LDT entry? (S=0b, type =0010b) */ - if ( ldts && ((*ptabent & 0x00001f00) != 0x00000200) ) return -1; - current->mm.ldt_sel = ldts; - __load_LDT(ldts); + /* A not-present descriptor will always fault, so is safe. */ + if ( !(a & _SEGMENT_P) ) + goto good; + /* + * We don't allow a DPL of zero. There is no legitimate reason for + * specifying DPL==0, and it gets rather dangerous if we also accept call + * gates (consider a call gate pointing at another guestos descriptor with + * DPL 0 -- this would get the OS ring-0 privileges). + */ + if ( (a & _SEGMENT_DPL) == 0 ) + goto bad; + + if ( !(a & _SEGMENT_S) ) + { + /* + * System segment: + * 1. Don't allow interrupt or trap gates as they belong in the IDT. + * 2. Don't allow TSS descriptors or task gates as we don't + * virtualise x86 tasks. + * 3. Don't allow LDT descriptors because they're unnecessary and + * I'm uneasy about allowing an LDT page to contain LDT + * descriptors. In any case, Xen automatically creates the + * required descriptor when reloading the LDT register. + * 4. We allow call gates but they must not jump to a private segment. + */ + + /* Disallow everything but call gates. */ + if ( (a & _SEGMENT_TYPE) != 0xc00 ) + goto bad; + + /* Can't allow far jump to a Xen-private segment. */ + if ( !VALID_CODESEL(b>>16) ) + goto bad; + + /* Reserved bits must be zero. */ + if ( (a & 0xe0) != 0 ) + goto bad; + + /* No base/limit check is needed for a call gate. */ + goto good; + } + + /* Check that base/limit do not overlap Xen-private space. */ + base = (a&(0xff<<24)) | ((a&0xff)<<16) | (b>>16); + limit = (a&0xf0000) | (b&0xffff); + limit++; /* We add one because limit is inclusive. */ + if ( (a & _SEGMENT_G) ) + limit <<= 12; + if ( ((base + limit) <= base) || + ((base + limit) >= PAGE_OFFSET) ) + goto bad; + + good: + return 1; + bad: return 0; } -long do_set_gdt(unsigned long *frame_list, int entries) +long do_set_gdt(unsigned long *frame_list, unsigned int entries) { - return -ENOSYS; + /* NB. There are 512 8-byte entries per GDT page. */ + unsigned int i, nr_pages = (entries + 511) / 512; + unsigned long frames[16], pfn, *gdt_page, flags; + long ret = -EINVAL; + struct pfn_info *page; + + if ( (entries < FIRST_DOMAIN_GDT_ENTRY) || (entries > 8192) ) + return -EINVAL; + + if ( copy_from_user(frames, frame_list, nr_pages * sizeof(unsigned long)) ) + return -EFAULT; + + spin_lock_irqsave(¤t->page_lock, flags); + + /* Check the new GDT. */ + for ( i = 0; i < nr_pages; i++ ) + { + if ( frames[i] >= max_page ) + goto out; + + page = frame_table + frames[i]; + if ( (page->flags & PG_domain_mask) != current->domain ) + goto out; + + if ( (page->flags & PG_type_mask) != PGT_gdt_page ) + { + if ( page->type_count != 0 ) + goto out; + + /* Check all potential GDT entries in the page. */ + gdt_page = map_domain_mem(frames[0] << PAGE_SHIFT); + for ( i = 0; i < 512; i++ ) + if ( !check_descriptor(gdt_page[i*2], gdt_page[i*2]+1) ) + goto out; + unmap_domain_mem(gdt_page); + } + } + + /* Tear down the old GDT. */ + for ( i = 0; i < 16; i++ ) + { + pfn = l1_pgentry_to_pagenr(current->mm.perdomain_pt[i]); + current->mm.perdomain_pt[i] = mk_l1_pgentry(0); + if ( pfn == 0 ) continue; + page = frame_table + pfn; + put_page_type(page); + put_page_tot(page); + } + + /* Install the new GDT. */ + for ( i = 0; i < nr_pages; i++ ) + { + current->mm.perdomain_pt[i] = + mk_l1_pgentry((frames[i] << PAGE_SHIFT) | __PAGE_HYPERVISOR); + + page = frame_table + frames[i]; + page->flags &= ~PG_type_mask; + page->flags |= PGT_gdt_page; + get_page_type(page); + get_page_tot(page); + } + + flush_tlb(); + + /* Copy over first entries of the new GDT. */ + memcpy((void *)PERDOMAIN_VIRT_START, gdt_table, FIRST_DOMAIN_GDT_ENTRY*8); + + SET_GDT_ADDRESS(current, PERDOMAIN_VIRT_START); + SET_GDT_ENTRIES(current, (entries*8)-1); + __asm__ __volatile__ ("lgdt %0" : "=m" (*current->mm.gdt)); + + ret = 0; /* success */ + + out: + spin_unlock_irqrestore(¤t->page_lock, flags); + return ret; } long do_update_descriptor( unsigned long pa, unsigned long word1, unsigned long word2) { - return -ENOSYS; + unsigned long *gdt_pent, flags, pfn = pa >> PAGE_SHIFT; + struct pfn_info *page; + long ret = -EINVAL; + + if ( (pa & 7) || (pfn >= max_page) || !check_descriptor(word1, word2) ) + return -EINVAL; + + spin_lock_irqsave(¤t->page_lock, flags); + + page = frame_table + pfn; + if ( (page->flags & PG_domain_mask) != current->domain ) + goto out; + + /* Check if the given frame is in use in an unsafe context. */ + switch ( (page->flags & PG_type_mask) ) + { + case PGT_gdt_page: + /* Disallow updates of Xen-private descriptors in the current GDT. */ + if ( (l1_pgentry_to_pagenr(current->mm.perdomain_pt[0]) == pfn) && + (((pa&(PAGE_SIZE-1))>>3) < FIRST_DOMAIN_GDT_ENTRY) ) + goto out; + case PGT_ldt_page: + case PGT_writeable_page: + break; + default: + if ( page->type_count != 0 ) + goto out; + } + + /* All is good so make the update. */ + gdt_pent = map_domain_mem(pa); + gdt_pent[0] = word1; + gdt_pent[1] = word2; + unmap_domain_mem(gdt_pent); + + ret = 0; /* success */ + + out: + spin_unlock_irqrestore(¤t->page_lock, flags); + return ret; } diff --git a/xen/arch/i386/process.c b/xen/arch/i386/process.c index c9736a2093..05a475e11d 100644 --- a/xen/arch/i386/process.c +++ b/xen/arch/i386/process.c @@ -312,15 +312,15 @@ void new_thread(struct task_struct *p, /* * Initial register values: - * DS,ES,FS,GS = __GUEST_DS - * CS:EIP = __GUEST_CS:start_pc - * SS:ESP = __GUEST_DS:start_stack + * DS,ES,FS,GS = FLAT_RING1_DS + * CS:EIP = FLAT_RING1_CS:start_pc + * SS:ESP = FLAT_RING1_DS:start_stack * ESI = start_info * [EAX,EBX,ECX,EDX,EDI,EBP are zero] */ - p->thread.fs = p->thread.gs = __GUEST_DS; - regs->xds = regs->xes = regs->xss = __GUEST_DS; - regs->xcs = __GUEST_CS; + p->thread.fs = p->thread.gs = FLAT_RING1_DS; + regs->xds = regs->xes = regs->xss = FLAT_RING1_DS; + regs->xcs = FLAT_RING1_CS; regs->eip = start_pc; regs->esp = start_stack; regs->esi = start_info; @@ -395,7 +395,7 @@ void __switch_to(struct task_struct *prev_p, struct task_struct *next_p) /* Switch GDT and LDT. */ __asm__ __volatile__ ("lgdt %0" : "=m" (*next_p->mm.gdt)); - __load_LDT(next_p->mm.ldt_sel); +// __load_LDT(0); /* * Restore %fs and %gs. diff --git a/xen/arch/i386/traps.c b/xen/arch/i386/traps.c index 5fe0858ba3..f0b15e081f 100644 --- a/xen/arch/i386/traps.c +++ b/xen/arch/i386/traps.c @@ -325,6 +325,7 @@ asmlinkage void do_general_protection(struct pt_regs * regs, long error_code) return; gp_in_kernel: + if ( (fixup = search_exception_table(regs->eip)) != 0 ) { regs->eip = fixup; @@ -568,23 +569,38 @@ long do_set_trap_table(trap_info_t *traps) trap_info_t cur; trap_info_t *dst = current->thread.traps; - /* - * I'm removing the next line, since it seems more intuitive to use this - * as an interface to incrementally update a domain's trap table. Clearing - * out old entries automatically is rather antisocial! - */ - /*memset(dst, 0, sizeof(*dst) * 256);*/ - for ( ; ; ) { if ( copy_from_user(&cur, traps, sizeof(cur)) ) return -EFAULT; - if ( (cur.cs & 3) == 0 ) return -EPERM; + if ( cur.address == 0 ) break; + + if ( !VALID_CODESEL(cur.cs) ) return -EPERM; + memcpy(dst+cur.vector, &cur, sizeof(cur)); traps++; } - return(0); + return 0; +} + + +long do_set_callbacks(unsigned long event_selector, + unsigned long event_address, + unsigned long failsafe_selector, + unsigned long failsafe_address) +{ + struct task_struct *p = current; + + if ( !VALID_CODESEL(event_selector) || !VALID_CODESEL(failsafe_selector) ) + return -EPERM; + + p->event_selector = event_selector; + p->event_address = event_address; + p->failsafe_selector = failsafe_selector; + p->failsafe_address = failsafe_address; + + return 0; } diff --git a/xen/common/domain.c b/xen/common/domain.c index 2102e29ee3..da62effffd 100644 --- a/xen/common/domain.c +++ b/xen/common/domain.c @@ -16,8 +16,13 @@ #include <asm/msr.h> #include <xeno/blkdev.h> -#define L1_PROT (_PAGE_PRESENT|_PAGE_RW|_PAGE_USER|_PAGE_ACCESSED) -#define L2_PROT (_PAGE_PRESENT|_PAGE_RW|_PAGE_USER|_PAGE_ACCESSED|_PAGE_DIRTY) +/* + * NB. No ring-3 access in initial guestOS pagetables. Note that we allow + * ring-3 privileges in the page directories, so that the guestOS may later + * decide to share a 4MB region with applications. + */ +#define L1_PROT (_PAGE_PRESENT|_PAGE_RW|_PAGE_ACCESSED) +#define L2_PROT (_PAGE_PRESENT|_PAGE_RW|_PAGE_ACCESSED|_PAGE_DIRTY|_PAGE_USER) rwlock_t tasklist_lock __cacheline_aligned = RW_LOCK_UNLOCKED; @@ -47,6 +52,9 @@ struct task_struct *do_newdomain(unsigned int dom_id, unsigned int cpu) memset(p->shared_info, 0, PAGE_SIZE); SHARE_PFN_WITH_DOMAIN(virt_to_page(p->shared_info), dom_id); + p->mm.perdomain_pt = (l1_pgentry_t *)get_free_page(GFP_KERNEL); + memset(p->mm.perdomain_pt, 0, PAGE_SIZE); + init_blkdev_info(p); SET_GDT_ENTRIES(p, DEFAULT_GDT_ENTRIES); @@ -224,7 +232,8 @@ void release_task(struct task_struct *p) { destroy_net_vif(p); } - if ( p->mm.perdomain_pt ) free_page((unsigned long)p->mm.perdomain_pt); + + free_page((unsigned long)p->mm.perdomain_pt); destroy_blkdev_info(p); @@ -268,7 +277,7 @@ int final_setup_guestos(struct task_struct * p, dom_meminfo_t * meminfo) net_ring_t *net_ring; net_vif_t *net_vif; - /* entries 0xe0000000 onwards in page table must contain hypervisor + /* High entries in page table must contain hypervisor * mem mappings - set them up. */ phys_l2tab = meminfo->l2_pgt_addr; @@ -279,7 +288,7 @@ int final_setup_guestos(struct task_struct * p, dom_meminfo_t * meminfo) (ENTRIES_PER_L2_PAGETABLE - DOMAIN_ENTRIES_PER_L2_PAGETABLE) * sizeof(l2_pgentry_t)); l2tab[PERDOMAIN_VIRT_START >> L2_PAGETABLE_SHIFT] = - mk_l2_pgentry(__pa(p->mm.perdomain_pt) | PAGE_HYPERVISOR); + mk_l2_pgentry(__pa(p->mm.perdomain_pt) | __PAGE_HYPERVISOR); p->mm.pagetable = mk_pagetable(phys_l2tab); unmap_domain_mem(l2tab); diff --git a/xen/include/asm-i386/desc.h b/xen/include/asm-i386/desc.h index 2cb90769b5..f1d11e33f7 100644 --- a/xen/include/asm-i386/desc.h +++ b/xen/include/asm-i386/desc.h @@ -2,7 +2,24 @@ #define __ARCH_DESC_H #define __FIRST_TSS_ENTRY 8 -#define __TSS(n) ((n) + __FIRST_TSS_ENTRY) +#define __FIRST_LDT_ENTRY (__FIRST_TSS_ENTRY+1) + +#define __TSS(n) (((n)<<1) + __FIRST_TSS_ENTRY) +#define __LDT(n) (((n)<<1) + __FIRST_LDT_ENTRY) + +#define load_TR(n) __asm__ __volatile__ ( "ltr %%ax" : : "a" (__TSS(n)<<3) ) +#define __load_LDT(n) __asm__ __volatile__ ( "lldt %%ax" : : "a" (n) ) + +/* Guest OS must provide its own code selectors, or use the one we provide. */ +#define VALID_CODESEL(_s) \ + ((((_s)>>2) >= FIRST_DOMAIN_GDT_ENTRY) || ((_s) == FLAT_RING1_CS)) + +/* These are bitmasks for the first 32 bits of a descriptor table entry. */ +#define _SEGMENT_TYPE (15<< 8) +#define _SEGMENT_S ( 1<<12) /* System descriptor (yes iff S==0) */ +#define _SEGMENT_DPL ( 3<<13) /* Descriptor Privilege Level */ +#define _SEGMENT_P ( 1<<15) /* Segment Present */ +#define _SEGMENT_G ( 1<<23) /* Granularity */ #ifndef __ASSEMBLY__ struct desc_struct { @@ -20,10 +37,6 @@ struct Xgt_desc_struct { #define idt_descr (*(struct Xgt_desc_struct *)((char *)&idt - 2)) #define gdt_descr (*(struct Xgt_desc_struct *)((char *)&gdt - 2)) -#define load_TR(n) __asm__ __volatile__("ltr %%ax"::"a" (__TSS(n)<<3)) - -#define __load_LDT(n) __asm__ __volatile__("lldt %%ax"::"a" ((n)<<3)) - extern void set_intr_gate(unsigned int irq, void * addr); extern void set_tss_desc(unsigned int n, void *addr); diff --git a/xen/include/asm-i386/processor.h b/xen/include/asm-i386/processor.h index e5d2e420ac..a46e61f048 100644 --- a/xen/include/asm-i386/processor.h +++ b/xen/include/asm-i386/processor.h @@ -401,17 +401,6 @@ extern struct desc_struct *idt_tables[]; {~0, } /* ioperm */ \ } -#define start_thread(regs, new_eip, new_esp) do { \ - __asm__("movl %0,%%fs ; movl %0,%%gs": :"r" (0)); \ - set_fs(USER_DS); \ - regs->xds = __USER_DS; \ - regs->xes = __USER_DS; \ - regs->xss = __USER_DS; \ - regs->xcs = __USER_CS; \ - regs->eip = new_eip; \ - regs->esp = new_esp; \ -} while (0) - /* Forward declaration, a strange C thing */ struct task_struct; struct mm_struct; diff --git a/xen/include/asm-i386/ptrace.h b/xen/include/asm-i386/ptrace.h index 509001cf57..540a3b372a 100644 --- a/xen/include/asm-i386/ptrace.h +++ b/xen/include/asm-i386/ptrace.h @@ -79,7 +79,6 @@ enum EFLAGS { #ifdef __KERNEL__ #define user_mode(regs) ((3 & (regs)->xcs)) -#define instruction_pointer(regs) ((regs)->eip) extern void show_regs(struct pt_regs *); #endif diff --git a/xen/include/hypervisor-ifs/hypervisor-if.h b/xen/include/hypervisor-ifs/hypervisor-if.h index 797605e9c1..5d23765aca 100644 --- a/xen/include/hypervisor-ifs/hypervisor-if.h +++ b/xen/include/hypervisor-ifs/hypervisor-if.h @@ -10,13 +10,17 @@ /* * SEGMENT DESCRIPTOR TABLES */ -/* 8 entries, plus a TSS entry for each CPU (up to 32 CPUs). */ +/* The first few GDT entries are reserved by Xen. */ #define FIRST_DOMAIN_GDT_ENTRY 40 -/* These are flat segments for domain bootstrap and fallback. */ -#define FLAT_RING1_CS 0x11 -#define FLAT_RING1_DS 0x19 -#define FLAT_RING3_CS 0x23 -#define FLAT_RING3_DS 0x2b +/* + * These flat segments are in the Xen-private section of every GDT. Since + * these are also present in the initial GDT, many OSes will be able to avoid + * installing their own GDT. + */ +#define FLAT_RING1_CS 0x0019 +#define FLAT_RING1_DS 0x0021 +#define FLAT_RING3_CS 0x002b +#define FLAT_RING3_DS 0x0033 /* @@ -29,7 +33,7 @@ #define __HYPERVISOR_console_write 2 #define __HYPERVISOR_set_gdt 3 #define __HYPERVISOR_stack_switch 4 -#define __HYPERVISOR_ldt_switch 5 +#define __HYPERVISOR_set_callbacks 5 #define __HYPERVISOR_net_update 6 #define __HYPERVISOR_fpu_taskswitch 7 #define __HYPERVISOR_yield 8 @@ -97,28 +101,32 @@ /* * PAGE UPDATE COMMANDS AND FLAGS * - * PGREQ_XXX: specified in least-significant bits of 'ptr' field. - * All requests specify relevent PTE or PT address in 'ptr'. + * PGREQ_XXX: specified in least 2 bits of 'ptr' field. These bits are masked + * off to get the real 'ptr' value. + * All requests specify relevent machine address in 'ptr'. * Normal requests specify update value in 'value'. - * Extended requests specify command in least 8 bits of 'value'. + * Extended requests specify command in least 8 bits of 'value'. These bits + * are masked off to get the real 'val' value. Except for PGEXT_SET_LDT + * which shifts the least bits out. */ /* A normal page-table update request. */ -#define PGREQ_NORMAL 0 +#define PGREQ_NORMAL 0 /* does a checked form of '*ptr = val' */ /* Update an entry in the machine->physical mapping table. */ -#define PGREQ_MPT_UPDATE 1 +#define PGREQ_MPT_UPDATE 1 /* ptr = frame to modify table entry for */ /* An extended command. */ -#define PGREQ_EXTENDED_COMMAND 2 +#define PGREQ_EXTENDED_COMMAND 2 /* least 8 bits of val demux further */ /* DOM0 can make entirely unchecked updates which do not affect refcnts. */ -#define PGREQ_UNCHECKED_UPDATE 3 -/* Announce a new top-level page table. */ -#define PGEXT_PIN_L1_TABLE 0 -#define PGEXT_PIN_L2_TABLE 1 -#define PGEXT_PIN_L3_TABLE 2 -#define PGEXT_PIN_L4_TABLE 3 -#define PGEXT_UNPIN_TABLE 4 -#define PGEXT_NEW_BASEPTR 5 -#define PGEXT_TLB_FLUSH 6 -#define PGEXT_INVLPG 7 +#define PGREQ_UNCHECKED_UPDATE 3 /* does an unchecked '*ptr = val' */ +/* Extended commands: */ +#define PGEXT_PIN_L1_TABLE 0 /* ptr = frame to pin */ +#define PGEXT_PIN_L2_TABLE 1 /* ptr = frame to pin */ +#define PGEXT_PIN_L3_TABLE 2 /* ptr = frame to pin */ +#define PGEXT_PIN_L4_TABLE 3 /* ptr = frame to pin */ +#define PGEXT_UNPIN_TABLE 4 /* ptr = frame to unpin */ +#define PGEXT_NEW_BASEPTR 5 /* ptr = new pagetable base to install */ +#define PGEXT_TLB_FLUSH 6 /* ptr = NULL */ +#define PGEXT_INVLPG 7 /* ptr = NULL ; val = page to invalidate */ +#define PGEXT_SET_LDT 8 /* ptr = linear address; val = # entries */ #define PGEXT_CMD_MASK 255 #define PGEXT_CMD_SHIFT 8 @@ -173,27 +181,6 @@ typedef struct shared_info_st { unsigned long events_enable; /* - * Address for callbacks hypervisor -> guest OS. - * Stack frame looks like that of an interrupt. - * Code segment is the default flat selector. - * This handler will only be called when events_enable is non-zero. - */ - unsigned long event_address; - - /* - * Hypervisor uses this callback when it takes a fault on behalf of - * an application. This can happen when returning from interrupts for - * example: various faults can occur when reloading the segment - * registers, and executing 'iret'. - * This callback is provided with an extended stack frame, augmented - * with saved values for segment registers %ds and %es: - * %ds, %es, %eip, %cs, %eflags [, %oldesp, %oldss] - * Code segment is the default flat selector. - * FAULTS WHEN CALLING THIS HANDLER WILL TERMINATE THE DOMAIN!!! - */ - unsigned long failsafe_address; - - /* * Time: The following abstractions are exposed: System Time, Clock Time, * Domain Virtual Time. Domains can access Cycle counter time directly. * XXX RN: Need something to pass NTP scaling to GuestOS. diff --git a/xen/include/xeno/config.h b/xen/include/xeno/config.h index 4067f52dc7..ec92fa031c 100644 --- a/xen/include/xeno/config.h +++ b/xen/include/xeno/config.h @@ -120,10 +120,8 @@ #define barrier() __asm__ __volatile__("": : :"memory") -#define __HYPERVISOR_CS 0x30 -#define __HYPERVISOR_DS 0x38 -#define __GUEST_CS 0x11 -#define __GUEST_DS 0x19 +#define __HYPERVISOR_CS 0x0008 +#define __HYPERVISOR_DS 0x0010 #define NR_syscalls 256 diff --git a/xen/include/xeno/sched.h b/xen/include/xeno/sched.h index 147f3c40fd..f67e20983f 100644 --- a/xen/include/xeno/sched.h +++ b/xen/include/xeno/sched.h @@ -27,10 +27,10 @@ struct mm_struct { * Every domain has a L1 pagetable of its own. Per-domain mappings * are put in this table (eg. the current GDT is mapped here). */ - l2_pgentry_t *perdomain_pt; + l1_pgentry_t *perdomain_pt; pagetable_t pagetable; - /* Current LDT selector. */ - unsigned int ldt_sel; + /* Current LDT descriptor. */ + unsigned long ldt[2]; /* Next entry is passed to LGDT on domain switch. */ char gdt[6]; }; @@ -65,18 +65,30 @@ struct task_struct { /* * DO NOT CHANGE THE ORDER OF THE FOLLOWING. - * There offsets are hardcoded in entry.S + * Their offsets are hardcoded in entry.S */ int processor; /* 00: current processor */ int state; /* 04: current run state */ - int hyp_events; /* 08: pending events */ + int hyp_events; /* 08: pending intra-Xen events */ unsigned int domain; /* 12: domain id */ /* An unsafe pointer into a shared data area. */ shared_info_t *shared_info; /* 16: shared data area */ /* + * Return vectors pushed to us by guest OS. + * The stack frame for events is exactly that of an x86 hardware interrupt. + * The stack frame for a failsafe callback is augmented with saved values + * for segment registers %ds and %es: + * %ds, %es, %eip, %cs, %eflags [, %oldesp, %oldss] + */ + unsigned long event_selector; /* 20: entry CS */ + unsigned long event_address; /* 24: entry EIP */ + unsigned long failsafe_selector; /* 28: entry CS */ + unsigned long failsafe_address; /* 32: entry EIP */ + + /* * From here on things can be added and shuffled without special attention */ diff --git a/xenolinux-2.4.21-pre4-sparse/arch/xeno/kernel/setup.c b/xenolinux-2.4.21-pre4-sparse/arch/xeno/kernel/setup.c index 00c68a836f..6ac4ff242e 100644 --- a/xenolinux-2.4.21-pre4-sparse/arch/xeno/kernel/setup.c +++ b/xenolinux-2.4.21-pre4-sparse/arch/xeno/kernel/setup.c @@ -153,10 +153,9 @@ void __init setup_arch(char **cmdline_p) extern unsigned long cpu0_pte_quicklist[]; extern unsigned long cpu0_pgd_quicklist[]; - HYPERVISOR_shared_info->event_address = - (unsigned long)hypervisor_callback; - HYPERVISOR_shared_info->failsafe_address = - (unsigned long)failsafe_callback; + HYPERVISOR_set_callbacks( + __KERNEL_CS, (unsigned long)hypervisor_callback, + __KERNEL_CS, (unsigned long)failsafe_callback); boot_cpu_data.pgd_quick = cpu0_pgd_quicklist; boot_cpu_data.pte_quick = cpu0_pte_quicklist; diff --git a/xenolinux-2.4.21-pre4-sparse/include/asm-xeno/hypervisor.h b/xenolinux-2.4.21-pre4-sparse/include/asm-xeno/hypervisor.h index 4b9591102c..35de4c20eb 100644 --- a/xenolinux-2.4.21-pre4-sparse/include/asm-xeno/hypervisor.h +++ b/xenolinux-2.4.21-pre4-sparse/include/asm-xeno/hypervisor.h @@ -195,13 +195,16 @@ static inline int HYPERVISOR_stack_switch(unsigned long ss, unsigned long esp) return ret; } -static inline int HYPERVISOR_ldt_switch(unsigned long ldts) +static inline int HYPERVISOR_set_callbacks( + unsigned long event_selector, unsigned long event_address, + unsigned long failsafe_selector, unsigned long failsafe_address) { int ret; __asm__ __volatile__ ( TRAP_INSTR - : "=a" (ret) : "0" (__HYPERVISOR_ldt_switch), - "b" (ldts) : "memory" ); + : "=a" (ret) : "0" (__HYPERVISOR_set_callbacks), + "b" (event_selector), "c" (event_address), + "d" (failsafe_selector), "S" (failsafe_address) : "memory" ); return ret; } diff --git a/xenolinux-2.4.21-pre4-sparse/include/asm-xeno/page.h b/xenolinux-2.4.21-pre4-sparse/include/asm-xeno/page.h index aad36820b7..d15646fcb5 100644 --- a/xenolinux-2.4.21-pre4-sparse/include/asm-xeno/page.h +++ b/xenolinux-2.4.21-pre4-sparse/include/asm-xeno/page.h @@ -116,12 +116,6 @@ static inline pmd_t __pmd(unsigned long x) #define __PAGE_OFFSET (0xC0000000) -/* - * This much address space is reserved for vmalloc() and iomap() - * as well as fixmap mappings. - */ -#define __VMALLOC_RESERVE (128 << 20) - #ifndef __ASSEMBLY__ /* @@ -162,9 +156,6 @@ static __inline__ int get_order(unsigned long size) #endif /* __ASSEMBLY__ */ #define PAGE_OFFSET ((unsigned long)__PAGE_OFFSET) -#define VMALLOC_RESERVE ((unsigned long)__VMALLOC_RESERVE) -#define __MAXMEM (-__PAGE_OFFSET-__VMALLOC_RESERVE) -#define MAXMEM ((unsigned long)(-PAGE_OFFSET-VMALLOC_RESERVE)) #define __pa(x) ((unsigned long)(x)-PAGE_OFFSET) #define __va(x) ((void *)((unsigned long)(x)+PAGE_OFFSET)) #define virt_to_page(kaddr) (mem_map + (__pa(kaddr) >> PAGE_SHIFT)) diff --git a/xenolinux-2.4.21-pre4-sparse/include/asm-xeno/segment.h b/xenolinux-2.4.21-pre4-sparse/include/asm-xeno/segment.h index 5623211570..35862eb1f2 100644 --- a/xenolinux-2.4.21-pre4-sparse/include/asm-xeno/segment.h +++ b/xenolinux-2.4.21-pre4-sparse/include/asm-xeno/segment.h @@ -1,10 +1,12 @@ #ifndef _ASM_SEGMENT_H #define _ASM_SEGMENT_H -#define __KERNEL_CS 0x11 -#define __KERNEL_DS 0x19 +#include <asm/hypervisor-ifs/hypervisor-if.h> -#define __USER_CS 0x23 -#define __USER_DS 0x2B +#define __KERNEL_CS FLAT_RING1_CS +#define __KERNEL_DS FLAT_RING1_DS + +#define __USER_CS FLAT_RING3_CS +#define __USER_DS FLAT_RING3_DS #endif |