diff options
30 files changed, 832 insertions, 557 deletions
@@ -1377,6 +1377,8 @@ 41c0c412lQ0NVVN9PsOSznQ-qhOiPA xen/include/asm-x86/vmx_vmcs.h 418fbcfe_WliJPToeVM-9VStvym-hw xen/include/asm-x86/x86_32/asm_defns.h 3e20b82fl1jmQiKdLy7fxMcutfpjWA xen/include/asm-x86/x86_32/domain_page.h +429c852fi3pvfa9kIjryYK5AGBmXAg xen/include/asm-x86/x86_32/page-2level.h +429c852fskvSOgcD5EC25_m9um9t4g xen/include/asm-x86/x86_32/page-3level.h 4208e2a3ZNFroNXbX9OYaOB-xtUyDQ xen/include/asm-x86/x86_32/page.h 3ddb79c3mbqEM7QQr3zVq7NiBNhouA xen/include/asm-x86/x86_32/regs.h 3e7f358aG11EvMI9VJ4_9hD4LUO7rQ xen/include/asm-x86/x86_32/string.h diff --git a/xen/arch/x86/audit.c b/xen/arch/x86/audit.c index c15c3de31e..3750ea788a 100644 --- a/xen/arch/x86/audit.c +++ b/xen/arch/x86/audit.c @@ -408,9 +408,9 @@ int audit_adjust_pgtables(struct domain *d, int dir, int noisy) for_each_exec_domain(d, ed) { - if ( pagetable_val(ed->arch.guest_table) ) + if ( pagetable_get_phys(ed->arch.guest_table) ) adjust(&frame_table[pagetable_get_pfn(ed->arch.guest_table)], 1); - if ( pagetable_val(ed->arch.shadow_table) ) + if ( pagetable_get_phys(ed->arch.shadow_table) ) adjust(&frame_table[pagetable_get_pfn(ed->arch.shadow_table)], 0); if ( ed->arch.monitor_shadow_ref ) adjust(&frame_table[ed->arch.monitor_shadow_ref], 0); diff --git a/xen/arch/x86/boot/x86_32.S b/xen/arch/x86/boot/x86_32.S index 9f7580ab98..75d3b57bf0 100644 --- a/xen/arch/x86/boot/x86_32.S +++ b/xen/arch/x86/boot/x86_32.S @@ -101,6 +101,22 @@ __start: xor %eax,%eax rep stosb +#ifdef CONFIG_X86_PAE + /* Initialize low and high mappings of all memory with 2MB pages */ + mov $idle_pg_table_l2-__PAGE_OFFSET,%edi + mov $0xe3,%eax /* PRESENT+RW+A+D+2MB */ +1: mov %eax,__PAGE_OFFSET>>18(%edi) /* high mapping */ + stosl /* low mapping */ + add $4,%edi + add $(1<<L2_PAGETABLE_SHIFT),%eax + cmp $DIRECTMAP_PHYS_END+0xe3,%eax + jne 1b +1: stosl /* low mappings cover as much physmem as possible */ + add $4,%edi + add $(1<<L2_PAGETABLE_SHIFT),%eax + cmp $__HYPERVISOR_VIRT_START+0xe3,%eax + jne 1b +#else /* Initialize low and high mappings of all memory with 4MB pages */ mov $idle_pg_table-__PAGE_OFFSET,%edi mov $0xe3,%eax /* PRESENT+RW+A+D+4MB */ @@ -113,6 +129,7 @@ __start: add $(1<<L2_PAGETABLE_SHIFT),%eax cmp $__HYPERVISOR_VIRT_START+0xe3,%eax jne 1b +#endif /* Initialise IDT with simple error defaults. */ lea ignore_int,%edx @@ -204,10 +221,17 @@ ENTRY(gdt_table) .quad 0x0000000000000000 /* unused */ .quad 0x00cf9a000000ffff /* 0xe008 ring 0 4.00GB code at 0x0 */ .quad 0x00cf92000000ffff /* 0xe010 ring 0 4.00GB data at 0x0 */ +#ifdef CONFIG_X86_PAE + .quad 0x00cfba00000067ff + .quad 0x00cfb200000067ff + .quad 0x00cffa00000067ff + .quad 0x00cff200000067ff +#else .quad 0x00cfba000000c3ff /* 0xe019 ring 1 3.95GB code at 0x0 */ .quad 0x00cfb2000000c3ff /* 0xe021 ring 1 3.95GB data at 0x0 */ .quad 0x00cffa000000c3ff /* 0xe02b ring 3 3.95GB code at 0x0 */ .quad 0x00cff2000000c3ff /* 0xe033 ring 3 3.95GB data at 0x0 */ +#endif .quad 0x0000000000000000 /* unused */ .fill 2*NR_CPUS,8,0 /* space for TSS and LDT per CPU */ @@ -215,10 +239,27 @@ ENTRY(gdt_table) /* Maximum STACK_ORDER for x86/32 is 1. We must therefore ensure that the */ /* CPU0 stack is aligned on an even page boundary! */ ENTRY(cpu0_stack) - .org 0x2000 + STACK_SIZE + +#ifdef CONFIG_X86_PAE + ENTRY(idle_pg_table) +ENTRY(idle_pg_table_l3) + .quad 0x100000 + 0x2000 + STACK_SIZE + 1*PAGE_SIZE + 0x01 + .quad 0x100000 + 0x2000 + STACK_SIZE + 2*PAGE_SIZE + 0x01 + .quad 0x100000 + 0x2000 + STACK_SIZE + 3*PAGE_SIZE + 0x01 + .quad 0x100000 + 0x2000 + STACK_SIZE + 4*PAGE_SIZE + 0x01 + .org 0x2000 + STACK_SIZE + 1*PAGE_SIZE +ENTRY(idle_pg_table_l2) + .org 0x2000 + STACK_SIZE + 5*PAGE_SIZE + +#else /* CONFIG_X86_PAE */ +ENTRY(idle_pg_table) +ENTRY(idle_pg_table_l2) # Initial page directory is 4kB .org 0x2000 + STACK_SIZE + PAGE_SIZE + +#endif /* CONFIG_X86_PAE */ + ENTRY(stext) ENTRY(_stext) diff --git a/xen/arch/x86/dom0_ops.c b/xen/arch/x86/dom0_ops.c index 4232911978..d8fee9e15d 100644 --- a/xen/arch/x86/dom0_ops.c +++ b/xen/arch/x86/dom0_ops.c @@ -405,7 +405,7 @@ void arch_getdomaininfo_ctxt( c->flags |= VGCF_VMX_GUEST; #endif - c->pt_base = pagetable_val(ed->arch.guest_table); + c->pt_base = pagetable_get_phys(ed->arch.guest_table); c->vm_assist = ed->domain->vm_assist; } diff --git a/xen/arch/x86/domain.c b/xen/arch/x86/domain.c index f7f7818de0..200db3be4c 100644 --- a/xen/arch/x86/domain.c +++ b/xen/arch/x86/domain.c @@ -460,7 +460,7 @@ int arch_set_info_guest( // trust the VMX domain builder. Xen should validate this // page table, and/or build the table itself, or ??? // - if ( !pagetable_val(d->arch.phys_table) ) + if ( !pagetable_get_phys(d->arch.phys_table) ) d->arch.phys_table = ed->arch.guest_table; if ( (error = vmx_final_setup_guest(ed, c)) ) @@ -660,7 +660,7 @@ long do_switch_to_user(void) struct exec_domain *ed = current; if ( unlikely(copy_from_user(&stu, (void *)regs->rsp, sizeof(stu))) || - unlikely(pagetable_val(ed->arch.guest_table_user) == 0) ) + unlikely(pagetable_get_phys(ed->arch.guest_table_user) == 0) ) return -EFAULT; toggle_guest_mode(ed); @@ -978,7 +978,7 @@ void domain_relinquish_resources(struct domain *d) /* Drop the in-use references to page-table bases. */ for_each_exec_domain ( d, ed ) { - if ( pagetable_val(ed->arch.guest_table) != 0 ) + if ( pagetable_get_phys(ed->arch.guest_table) != 0 ) { if ( shadow_mode_refcounts(d) ) put_page(&frame_table[pagetable_get_pfn(ed->arch.guest_table)]); @@ -988,7 +988,7 @@ void domain_relinquish_resources(struct domain *d) ed->arch.guest_table = mk_pagetable(0); } - if ( pagetable_val(ed->arch.guest_table_user) != 0 ) + if ( pagetable_get_phys(ed->arch.guest_table_user) != 0 ) { if ( shadow_mode_refcounts(d) ) put_page(&frame_table[pagetable_get_pfn(ed->arch.guest_table_user)]); diff --git a/xen/arch/x86/domain_build.c b/xen/arch/x86/domain_build.c index ebdbb622c8..01e80e89be 100644 --- a/xen/arch/x86/domain_build.c +++ b/xen/arch/x86/domain_build.c @@ -44,15 +44,15 @@ boolean_param("dom0_translate", opt_dom0_translate); #if defined(__i386__) /* No ring-3 access in initial leaf page tables. */ #define L1_PROT (_PAGE_PRESENT|_PAGE_RW|_PAGE_ACCESSED) +#define L2_PROT (_PAGE_PRESENT|_PAGE_RW|_PAGE_ACCESSED|_PAGE_DIRTY|_PAGE_USER) +#define L3_PROT (_PAGE_PRESENT) #elif defined(__x86_64__) /* Allow ring-3 access in long mode as guest cannot use ring 1. */ #define L1_PROT (_PAGE_PRESENT|_PAGE_RW|_PAGE_ACCESSED|_PAGE_USER) -#endif -/* Don't change these: Linux expects just these bits to be set. */ -/* (And that includes the bogus _PAGE_DIRTY!) */ #define L2_PROT (_PAGE_PRESENT|_PAGE_RW|_PAGE_ACCESSED|_PAGE_DIRTY|_PAGE_USER) #define L3_PROT (_PAGE_PRESENT|_PAGE_RW|_PAGE_ACCESSED|_PAGE_DIRTY|_PAGE_USER) #define L4_PROT (_PAGE_PRESENT|_PAGE_RW|_PAGE_ACCESSED|_PAGE_DIRTY|_PAGE_USER) +#endif #define round_pgup(_p) (((_p)+(PAGE_SIZE-1))&PAGE_MASK) #define round_pgdown(_p) ((_p)&PAGE_MASK) @@ -91,7 +91,11 @@ int construct_dom0(struct domain *d, #elif defined(__x86_64__) char *image_start = __va(_image_start); char *initrd_start = __va(_initrd_start); +#endif +#if CONFIG_PAGING_LEVELS >= 4 l4_pgentry_t *l4tab = NULL, *l4start = NULL; +#endif +#if CONFIG_PAGING_LEVELS >= 3 l3_pgentry_t *l3tab = NULL, *l3start = NULL; #endif l2_pgentry_t *l2tab = NULL, *l2start = NULL; @@ -143,7 +147,7 @@ int construct_dom0(struct domain *d, panic("Not enough RAM for DOM0 reservation.\n"); alloc_start = page_to_phys(page); alloc_end = alloc_start + (d->tot_pages << PAGE_SHIFT); - + if ( (rc = parseelfimage(&dsi)) != 0 ) return rc; @@ -172,10 +176,15 @@ int construct_dom0(struct domain *d, v_end = (vstack_end + (1UL<<22)-1) & ~((1UL<<22)-1); if ( (v_end - vstack_end) < (512UL << 10) ) v_end += 1UL << 22; /* Add extra 4MB to get >= 512kB padding. */ -#if defined(__i386__) +#if defined(__i386__) && !defined(CONFIG_X86_PAE) if ( (((v_end - dsi.v_start + ((1UL<<L2_PAGETABLE_SHIFT)-1)) >> L2_PAGETABLE_SHIFT) + 1) <= nr_pt_pages ) break; +#elif defined(__i386__) && defined(CONFIG_X86_PAE) + /* 5 pages: 1x 3rd + 4x 2nd level */ + if ( (((v_end - dsi.v_start + ((1UL<<L2_PAGETABLE_SHIFT)-1)) >> + L2_PAGETABLE_SHIFT) + 5) <= nr_pt_pages ) + break; #elif defined(__x86_64__) #define NR(_l,_h,_s) \ (((((_h) + ((1UL<<(_s))-1)) & ~((1UL<<(_s))-1)) - \ @@ -249,6 +258,24 @@ int construct_dom0(struct domain *d, } /* WARNING: The new domain must have its 'processor' field filled in! */ +#if CONFIG_PAGING_LEVELS == 3 + l3start = l3tab = (l3_pgentry_t *)mpt_alloc; mpt_alloc += PAGE_SIZE; + l2start = l2tab = (l2_pgentry_t *)mpt_alloc; mpt_alloc += 4*PAGE_SIZE; + memcpy(l2tab, idle_pg_table_l2, 4*PAGE_SIZE); + for (i = 0; i < 4; i++) { + l3tab[i] = l3e_create_phys((u32)l2tab + i*PAGE_SIZE, L3_PROT); + l2tab[(LINEAR_PT_VIRT_START >> L2_PAGETABLE_SHIFT)+i] = + l2e_create_phys((u32)l2tab + i*PAGE_SIZE, __PAGE_HYPERVISOR); + } + unsigned long v; + for (v = PERDOMAIN_VIRT_START; v < PERDOMAIN_VIRT_END; + v += (1 << L2_PAGETABLE_SHIFT)) { + l2tab[v >> L2_PAGETABLE_SHIFT] = + l2e_create_phys(__pa(d->arch.mm_perdomain_pt) + (v-PERDOMAIN_VIRT_START), + __PAGE_HYPERVISOR); + } + ed->arch.guest_table = mk_pagetable((unsigned long)l3start); +#else l2start = l2tab = (l2_pgentry_t *)mpt_alloc; mpt_alloc += PAGE_SIZE; memcpy(l2tab, &idle_pg_table[0], PAGE_SIZE); l2tab[LINEAR_PT_VIRT_START >> L2_PAGETABLE_SHIFT] = @@ -256,8 +283,9 @@ int construct_dom0(struct domain *d, l2tab[PERDOMAIN_VIRT_START >> L2_PAGETABLE_SHIFT] = l2e_create_phys(__pa(d->arch.mm_perdomain_pt), __PAGE_HYPERVISOR); ed->arch.guest_table = mk_pagetable((unsigned long)l2start); +#endif - l2tab += l2_table_offset(dsi.v_start); + l2tab += l2_linear_offset(dsi.v_start); mfn = alloc_start >> PAGE_SHIFT; for ( count = 0; count < ((v_end-dsi.v_start)>>PAGE_SHIFT); count++ ) { @@ -282,8 +310,8 @@ int construct_dom0(struct domain *d, } /* Pages that are part of page tables must be read only. */ - l2tab = l2start + l2_table_offset(vpt_start); - l1start = l1tab = (l1_pgentry_t *)l2e_get_phys(*l2tab); + l2tab = l2start + l2_linear_offset(vpt_start); + l1start = l1tab = (l1_pgentry_t *)(u32)l2e_get_phys(*l2tab); l1tab += l1_table_offset(vpt_start); for ( count = 0; count < nr_pt_pages; count++ ) { @@ -294,6 +322,34 @@ int construct_dom0(struct domain *d, if ( !get_page_type(page, PGT_writable_page) ) BUG(); +#if CONFIG_PAGING_LEVELS == 3 + switch (count) { + case 0: + page->u.inuse.type_info &= ~PGT_type_mask; + page->u.inuse.type_info |= PGT_l3_page_table; + get_page(page, d); /* an extra ref because of readable mapping */ + + /* Get another ref to L3 page so that it can be pinned. */ + if ( !get_page_and_type(page, d, PGT_l3_page_table) ) + BUG(); + set_bit(_PGT_pinned, &page->u.inuse.type_info); + break; + case 1 ... 4: + page->u.inuse.type_info &= ~PGT_type_mask; + page->u.inuse.type_info |= PGT_l2_page_table; + page->u.inuse.type_info |= + (count-1) << PGT_va_shift; + get_page(page, d); /* an extra ref because of readable mapping */ + break; + default: + page->u.inuse.type_info &= ~PGT_type_mask; + page->u.inuse.type_info |= PGT_l1_page_table; + page->u.inuse.type_info |= + ((dsi.v_start>>L2_PAGETABLE_SHIFT)+(count-5))<<PGT_va_shift; + get_page(page, d); /* an extra ref because of readable mapping */ + break; + } +#else if ( count == 0 ) { page->u.inuse.type_info &= ~PGT_type_mask; @@ -326,8 +382,9 @@ int construct_dom0(struct domain *d, */ get_page(page, d); /* an extra ref because of readable mapping */ } +#endif if ( !((unsigned long)++l1tab & (PAGE_SIZE - 1)) ) - l1start = l1tab = (l1_pgentry_t *)l2e_get_phys(*++l2tab); + l1start = l1tab = (l1_pgentry_t *)(u32)l2e_get_phys(*++l2tab); } #elif defined(__x86_64__) @@ -538,10 +595,8 @@ int construct_dom0(struct domain *d, #if defined(__i386__) /* Destroy low mappings - they were only for our convenience. */ - for ( i = 0; i < DOMAIN_ENTRIES_PER_L2_PAGETABLE; i++ ) - if ( l2e_get_flags(l2start[i]) & _PAGE_PSE ) - l2start[i] = l2e_empty(); - zap_low_mappings(); /* Do the same for the idle page tables. */ + zap_low_mappings(l2start); + zap_low_mappings(idle_pg_table_l2); #endif /* DOM0 gets access to everything. */ @@ -558,6 +613,12 @@ int construct_dom0(struct domain *d, : SHM_enable)); if ( opt_dom0_translate ) { +#if defined(__i386__) && defined(CONFIG_X86_PAE) + printk("FIXME: PAE code needed here: %s:%d (%s)\n", + __FILE__, __LINE__, __FUNCTION__); + for ( ; ; ) + __asm__ __volatile__ ( "hlt" ); +#else /* Hmm, what does this? Looks like isn't portable across 32/64 bit and pae/non-pae ... -- kraxel */ @@ -573,13 +634,14 @@ int construct_dom0(struct domain *d, // so that we can easily access it. // ASSERT( root_get_value(idle_pg_table[1]) == 0 ); - ASSERT( pagetable_val(d->arch.phys_table) ); + ASSERT( pagetable_get_phys(d->arch.phys_table) ); idle_pg_table[1] = root_create_phys( - pagetable_val(d->arch.phys_table), __PAGE_HYPERVISOR); + pagetable_get_phys(d->arch.phys_table), __PAGE_HYPERVISOR); translate_l2pgtable(d, (l1_pgentry_t *)(1u << L2_PAGETABLE_SHIFT), pagetable_get_pfn(ed->arch.guest_table)); idle_pg_table[1] = root_empty(); local_flush_tlb(); +#endif } update_pagetables(ed); /* XXX SMP */ diff --git a/xen/arch/x86/idle0_task.c b/xen/arch/x86/idle0_task.c index 7e811b28e8..8ed04ea072 100644 --- a/xen/arch/x86/idle0_task.c +++ b/xen/arch/x86/idle0_task.c @@ -11,8 +11,7 @@ struct domain idle0_domain = { struct exec_domain idle0_exec_domain = { processor: 0, - domain: &idle0_domain, - arch: IDLE0_ARCH_EXEC_DOMAIN + domain: &idle0_domain }; struct tss_struct init_tss[NR_CPUS]; diff --git a/xen/arch/x86/mm.c b/xen/arch/x86/mm.c index fd3ac2d886..37298b443e 100644 --- a/xen/arch/x86/mm.c +++ b/xen/arch/x86/mm.c @@ -121,7 +121,8 @@ static void free_l2_table(struct pfn_info *page); static void free_l1_table(struct pfn_info *page); -static int mod_l2_entry(l2_pgentry_t *, l2_pgentry_t, unsigned long); +static int mod_l2_entry(l2_pgentry_t *, l2_pgentry_t, unsigned long, + unsigned int type); static int mod_l1_entry(l1_pgentry_t *, l1_pgentry_t); /* Used to defer flushing of memory structures. */ @@ -149,21 +150,22 @@ unsigned long max_page; void __init init_frametable(void) { - unsigned long i, p; + unsigned long i, p, step; frame_table = (struct pfn_info *)FRAMETABLE_VIRT_START; frame_table_size = max_page * sizeof(struct pfn_info); frame_table_size = (frame_table_size + PAGE_SIZE - 1) & PAGE_MASK; - for ( i = 0; i < frame_table_size; i += (4UL << 20) ) + step = (1 << L2_PAGETABLE_SHIFT); + for ( i = 0; i < frame_table_size; i += step ) { - p = alloc_boot_pages(min(frame_table_size - i, 4UL << 20), 4UL << 20); + p = alloc_boot_pages(min(frame_table_size - i, step), step); if ( p == 0 ) panic("Not enough memory for frame table\n"); map_pages_to_xen( FRAMETABLE_VIRT_START + i, p >> PAGE_SHIFT, - 4UL << (20-PAGE_SHIFT), + step >> PAGE_SHIFT, PAGE_HYPERVISOR); } @@ -232,7 +234,7 @@ void arch_init_memory(void) void write_ptbase(struct exec_domain *ed) { - write_cr3(pagetable_val(ed->arch.monitor_table)); + write_cr3(pagetable_get_phys(ed->arch.monitor_table)); } void invalidate_shadow_ldt(struct exec_domain *d) @@ -375,7 +377,6 @@ static int get_page_and_type_from_pagenr(unsigned long page_nr, return 1; } - /* * We allow root tables to map each other (a.k.a. linear page tables). It * needs some special care with reference counts and access permissions: @@ -432,7 +433,6 @@ get_linear_pagetable( return 1; } - int get_page_from_l1e( l1_pgentry_t l1e, struct domain *d) @@ -446,8 +446,7 @@ get_page_from_l1e( if ( unlikely(l1e_get_flags(l1e) & L1_DISALLOW_MASK) ) { - MEM_LOG("Bad L1 type settings %lx %lx", l1e_get_value(l1e), - l1e_get_value(l1e) & L1_DISALLOW_MASK); + MEM_LOG("Bad L1 flags %x\n", l1e_get_flags(l1e) & L1_DISALLOW_MASK); return 0; } @@ -482,7 +481,7 @@ get_page_from_l1e( static int get_page_from_l2e( l2_pgentry_t l2e, unsigned long pfn, - struct domain *d, unsigned long va_idx) + struct domain *d, unsigned long vaddr) { int rc; @@ -493,45 +492,58 @@ get_page_from_l2e( if ( unlikely((l2e_get_flags(l2e) & L2_DISALLOW_MASK)) ) { - MEM_LOG("Bad L2 page type settings %lx", - l2e_get_value(l2e) & L2_DISALLOW_MASK); + MEM_LOG("Bad L2 flags %x\n", l2e_get_flags(l2e) & L2_DISALLOW_MASK); return 0; } + vaddr >>= L2_PAGETABLE_SHIFT; + vaddr <<= PGT_va_shift; rc = get_page_and_type_from_pagenr( - l2e_get_pfn(l2e), - PGT_l1_page_table | (va_idx<<PGT_va_shift), d); + l2e_get_pfn(l2e), PGT_l1_page_table | vaddr, d); -#if defined(__i386__) - return rc ? rc : get_linear_pagetable(l2e, pfn, d); -#elif defined(__x86_64__) - return rc; +#if CONFIG_PAGING_LEVELS == 2 + if (!rc) + rc = get_linear_pagetable(l2e, pfn, d); #endif + return rc; } -#ifdef __x86_64__ +#if CONFIG_PAGING_LEVELS >= 3 static int get_page_from_l3e( - l3_pgentry_t l3e, unsigned long pfn, struct domain *d) + l3_pgentry_t l3e, unsigned long pfn, + struct domain *d, unsigned long vaddr) { ASSERT( !shadow_mode_refcounts(d) ); + int rc; + if ( !(l3e_get_flags(l3e) & _PAGE_PRESENT) ) return 1; if ( unlikely((l3e_get_flags(l3e) & L3_DISALLOW_MASK)) ) { - MEM_LOG("Bad L3 page type settings %lx", - l3e_get_value(l3e) & L3_DISALLOW_MASK); + MEM_LOG("Bad L3 flags %x\n", l3e_get_flags(l3e) & L3_DISALLOW_MASK); return 0; } - return get_page_and_type_from_pagenr( - l3e_get_pfn(l3e), PGT_l2_page_table, d); + vaddr >>= L3_PAGETABLE_SHIFT; + vaddr <<= PGT_va_shift; + rc = get_page_and_type_from_pagenr( + l3e_get_pfn(l3e), + PGT_l2_page_table | vaddr, d); +#if CONFIG_PAGING_LEVELS == 3 + if (!rc) + rc = get_linear_pagetable(l3e, pfn, d); +#endif + return rc; } +#endif /* 3 level */ + +#if CONFIG_PAGING_LEVELS >= 4 static int get_page_from_l4e( @@ -546,8 +558,7 @@ get_page_from_l4e( if ( unlikely((l4e_get_flags(l4e) & L4_DISALLOW_MASK)) ) { - MEM_LOG("Bad L4 page type settings %lx", - l4e_get_value(l4e) & L4_DISALLOW_MASK); + MEM_LOG("Bad L4 flags %x\n", l4e_get_flags(l4e) & L4_DISALLOW_MASK); return 0; } @@ -560,7 +571,7 @@ get_page_from_l4e( return 1; } -#endif /* __x86_64__ */ +#endif /* 4 level */ void put_page_from_l1e(l1_pgentry_t l1e, struct domain *d) @@ -622,7 +633,7 @@ static void put_page_from_l2e(l2_pgentry_t l2e, unsigned long pfn) } -#ifdef __x86_64__ +#if CONFIG_PAGING_LEVELS >= 3 static void put_page_from_l3e(l3_pgentry_t l3e, unsigned long pfn) { @@ -631,6 +642,9 @@ static void put_page_from_l3e(l3_pgentry_t l3e, unsigned long pfn) put_page_and_type(&frame_table[l3e_get_pfn(l3e)]); } +#endif + +#if CONFIG_PAGING_LEVELS >= 4 static void put_page_from_l4e(l4_pgentry_t l4e, unsigned long pfn) { @@ -639,7 +653,7 @@ static void put_page_from_l4e(l4_pgentry_t l4e, unsigned long pfn) put_page_and_type(&frame_table[l4e_get_pfn(l4e)]); } -#endif /* __x86_64__ */ +#endif static int alloc_l1_table(struct pfn_info *page) @@ -670,11 +684,61 @@ static int alloc_l1_table(struct pfn_info *page) return 0; } +#ifdef CONFIG_X86_PAE +static inline int fixup_pae_linear_mappings(l3_pgentry_t *pl3e) +{ + l2_pgentry_t *pl2e; + unsigned long vaddr; + int i,idx; + + while ((unsigned long)pl3e & ~PAGE_MASK) + pl3e--; + + if (!(l3e_get_flags(pl3e[3]) & _PAGE_PRESENT)) { + printk("Installing a L3 PAE pt without L2 in slot #3 isn't going to fly ...\n"); + return 0; + } -static int alloc_l2_table(struct pfn_info *page) + pl2e = map_domain_mem(l3e_get_phys(pl3e[3])); + for (i = 0; i < 4; i++) { + vaddr = LINEAR_PT_VIRT_START + (i << L2_PAGETABLE_SHIFT); + idx = (vaddr >> L2_PAGETABLE_SHIFT) & (L2_PAGETABLE_ENTRIES-1); + if (l3e_get_flags(pl3e[i]) & _PAGE_PRESENT) { + pl2e[idx] = l2e_create_phys(l3e_get_phys(pl3e[i]), + __PAGE_HYPERVISOR); + } else + pl2e[idx] = l2e_empty(); + } + unmap_domain_mem(pl2e); + + return 1; +} + +static inline unsigned long fixup_pae_vaddr(unsigned long l2vaddr, + unsigned long l2type) +{ + unsigned long l3vaddr; + + if ((l2type & PGT_va_mask) == PGT_va_unknown) { + printk("%s: hooking one l2 pt into multiple l3 slots isn't allowed, sorry\n", + __FUNCTION__); + domain_crash(); + } + l3vaddr = ((l2type & PGT_va_mask) >> PGT_va_shift) + << L3_PAGETABLE_SHIFT; + return l3vaddr + l2vaddr; +} + +#else +# define fixup_pae_linear_mappings(unused) (1) +# define fixup_pae_vaddr(vaddr, type) (vaddr) +#endif + +static int alloc_l2_table(struct pfn_info *page, unsigned int type) { struct domain *d = page_get_owner(page); unsigned long pfn = page_to_pfn(page); + unsigned long vaddr; l2_pgentry_t *pl2e; int i; @@ -682,21 +746,24 @@ static int alloc_l2_table(struct pfn_info *page) if ( (PGT_base_page_table == PGT_l2_page_table) && unlikely(shadow_mode_refcounts(d)) ) return 1; - ASSERT( !shadow_mode_refcounts(d) ); + pl2e = map_domain_mem(pfn << PAGE_SHIFT); - for ( i = 0; i < L2_PAGETABLE_ENTRIES; i++ ) - if ( is_guest_l2_slot(i) && - unlikely(!get_page_from_l2e(pl2e[i], pfn, d, i)) ) + for ( i = 0; i < L2_PAGETABLE_ENTRIES; i++ ) { + vaddr = i << L2_PAGETABLE_SHIFT; + vaddr = fixup_pae_vaddr(vaddr,type); + if ( is_guest_l2_slot(type, i) && + unlikely(!get_page_from_l2e(pl2e[i], pfn, d, vaddr)) ) goto fail; + } -#if defined(__i386__) +#if CONFIG_PAGING_LEVELS == 2 /* Xen private mappings. */ - memcpy(&pl2e[ROOT_PAGETABLE_FIRST_XEN_SLOT], - &idle_pg_table[ROOT_PAGETABLE_FIRST_XEN_SLOT], - ROOT_PAGETABLE_XEN_SLOTS * sizeof(l2_pgentry_t)); + memcpy(&pl2e[L2_PAGETABLE_FIRST_XEN_SLOT], + &idle_pg_table[L2_PAGETABLE_FIRST_XEN_SLOT], + L2_PAGETABLE_XEN_SLOTS * sizeof(l2_pgentry_t)); pl2e[l2_table_offset(LINEAR_PT_VIRT_START)] = l2e_create_pfn(pfn, __PAGE_HYPERVISOR); pl2e[l2_table_offset(PERDOMAIN_VIRT_START)] = @@ -704,13 +771,31 @@ static int alloc_l2_table(struct pfn_info *page) virt_to_page(page_get_owner(page)->arch.mm_perdomain_pt), __PAGE_HYPERVISOR); #endif +#if CONFIG_PAGING_LEVELS == 3 + if (3 == ((type & PGT_va_mask) >> PGT_va_shift)) { + unsigned long v,src,dst; + void *virt; + /* Xen private mappings. */ + dst = L2_PAGETABLE_FIRST_XEN_SLOT & (L2_PAGETABLE_ENTRIES-1); + src = L2_PAGETABLE_FIRST_XEN_SLOT; + memcpy(&pl2e[dst], &idle_pg_table_l2[src], + L2_PAGETABLE_XEN_SLOTS * sizeof(l2_pgentry_t)); + for (v = PERDOMAIN_VIRT_START; v < PERDOMAIN_VIRT_END; + v += (1 << L2_PAGETABLE_SHIFT)) { + dst = (v >> L2_PAGETABLE_SHIFT) & (L2_PAGETABLE_ENTRIES-1); + virt = page_get_owner(page)->arch.mm_perdomain_pt + (v-PERDOMAIN_VIRT_START); + pl2e[dst] = l2e_create_page(virt_to_page(virt), __PAGE_HYPERVISOR); + } + /* see fixup_pae_linear_mappings() for linear pagetables */ + } +#endif unmap_domain_mem(pl2e); return 1; fail: while ( i-- > 0 ) - if ( is_guest_l2_slot(i) ) + if ( is_guest_l2_slot(type, i) ) put_page_from_l2e(pl2e[i], pfn); unmap_domain_mem(pl2e); @@ -718,22 +803,29 @@ static int alloc_l2_table(struct pfn_info *page) } -#ifdef __x86_64__ +#if CONFIG_PAGING_LEVELS >= 3 static int alloc_l3_table(struct pfn_info *page) { struct domain *d = page_get_owner(page); unsigned long pfn = page_to_pfn(page); - l3_pgentry_t *pl3e = page_to_virt(page); + unsigned long vaddr; + l3_pgentry_t *pl3e; int i; ASSERT( !shadow_mode_refcounts(d) ); - for ( i = 0; i < L3_PAGETABLE_ENTRIES; i++ ) + pl3e = map_domain_mem(pfn << PAGE_SHIFT); + for ( i = 0; i < L3_PAGETABLE_ENTRIES; i++ ) { + vaddr = i << L3_PAGETABLE_SHIFT; if ( is_guest_l3_slot(i) && - unlikely(!get_page_from_l3e(pl3e[i], pfn, d)) ) + unlikely(!get_page_from_l3e(pl3e[i], pfn, d, vaddr)) ) goto fail; + } + if (!fixup_pae_linear_mappings(pl3e)) + goto fail; + unmap_domain_mem(pl3e); return 1; fail: @@ -741,9 +833,13 @@ static int alloc_l3_table(struct pfn_info *page) if ( is_guest_l3_slot(i) ) put_page_from_l3e(pl3e[i], pfn); + unmap_domain_mem(pl3e); return 0; } +#endif + +#if CONFIG_PAGING_LEVELS >= 4 static int alloc_l4_table(struct pfn_info *page) { @@ -813,27 +909,35 @@ static void free_l2_table(struct pfn_info *page) pl2e = map_domain_mem(pfn << PAGE_SHIFT); - for ( i = 0; i < L2_PAGETABLE_ENTRIES; i++ ) - if ( is_guest_l2_slot(i) ) + for ( i = 0; i < L2_PAGETABLE_ENTRIES; i++ ) { + if ( is_guest_l2_slot(page->u.inuse.type_info, i) ) put_page_from_l2e(pl2e[i], pfn); + } unmap_domain_mem(pl2e); } -#ifdef __x86_64__ +#if CONFIG_PAGING_LEVELS >= 3 static void free_l3_table(struct pfn_info *page) { unsigned long pfn = page_to_pfn(page); - l3_pgentry_t *pl3e = page_to_virt(page); + l3_pgentry_t *pl3e; int i; + pl3e = map_domain_mem(pfn << PAGE_SHIFT); + for ( i = 0; i < L3_PAGETABLE_ENTRIES; i++ ) if ( is_guest_l3_slot(i) ) put_page_from_l3e(pl3e[i], pfn); + + unmap_domain_mem(pl3e); } +#endif + +#if CONFIG_PAGING_LEVELS >= 4 static void free_l4_table(struct pfn_info *page) { @@ -846,25 +950,24 @@ static void free_l4_table(struct pfn_info *page) put_page_from_l4e(pl4e[i], pfn); } -#endif /* __x86_64__ */ - +#endif static inline int update_l1e(l1_pgentry_t *pl1e, l1_pgentry_t ol1e, l1_pgentry_t nl1e) { - /* FIXME: breaks with PAE */ - unsigned long o = l1e_get_value(ol1e); - unsigned long n = l1e_get_value(nl1e); + intpte_t o = l1e_get_value(ol1e); + intpte_t n = l1e_get_value(nl1e); if ( unlikely(cmpxchg_user(pl1e, o, n) != 0) || unlikely(o != l1e_get_value(ol1e)) ) { - MEM_LOG("Failed to update %lx -> %lx: saw %lx", - l1e_get_value(ol1e), l1e_get_value(nl1e), o); + MEM_LOG("Failed to update %" PRIpte " -> %" PRIpte ": saw %" PRIpte "\n", + l1e_get_value(ol1e), + l1e_get_value(nl1e), + o); return 0; } - return 1; } @@ -885,8 +988,8 @@ static int mod_l1_entry(l1_pgentry_t *pl1e, l1_pgentry_t nl1e) { if ( unlikely(l1e_get_flags(nl1e) & L1_DISALLOW_MASK) ) { - MEM_LOG("Bad L1 type settings %lx", - l1e_get_value(nl1e) & L1_DISALLOW_MASK); + MEM_LOG("Bad L1 type settings %" PRIpte "\n", + (l1e_get_value(nl1e) & L1_DISALLOW_MASK)); return 0; } @@ -913,25 +1016,27 @@ static int mod_l1_entry(l1_pgentry_t *pl1e, l1_pgentry_t nl1e) return 1; } - #define UPDATE_ENTRY(_t,_p,_o,_n) ({ \ - unsigned long __o = cmpxchg((unsigned long *)(_p), \ - _t ## e_get_value(_o), \ - _t ## e_get_value(_n)); \ + intpte_t __o = cmpxchg((intpte_t *)(_p), \ + _t ## e_get_value(_o), \ + _t ## e_get_value(_n)); \ if ( __o != _t ## e_get_value(_o) ) \ - MEM_LOG("Failed to update %lx -> %lx: saw %lx", \ - _t ## e_get_value(_o), _t ## e_get_value(_n), __o); \ + MEM_LOG("Failed to update %" PRIpte " -> %" PRIpte ": saw %" PRIpte "", \ + (_t ## e_get_value(_o)), \ + (_t ## e_get_value(_n)), \ + (__o)); \ (__o == _t ## e_get_value(_o)); }) - /* Update the L2 entry at pl2e to new value nl2e. pl2e is within frame pfn. */ static int mod_l2_entry(l2_pgentry_t *pl2e, l2_pgentry_t nl2e, - unsigned long pfn) + unsigned long pfn, + unsigned int type) { l2_pgentry_t ol2e; + unsigned long vaddr; - if ( unlikely(!is_guest_l2_slot(pgentry_ptr_to_slot(pl2e))) ) + if ( unlikely(!is_guest_l2_slot(type,pgentry_ptr_to_slot(pl2e))) ) { MEM_LOG("Illegal L2 update attempt in Xen-private area %p", pl2e); return 0; @@ -944,8 +1049,8 @@ static int mod_l2_entry(l2_pgentry_t *pl2e, { if ( unlikely(l2e_get_flags(nl2e) & L2_DISALLOW_MASK) ) { - MEM_LOG("Bad L2 type settings %lx", - l2e_get_value(nl2e) & L2_DISALLOW_MASK); + MEM_LOG("Bad L2 type settings %" PRIpte "\n", + (l2e_get_value(nl2e) & L2_DISALLOW_MASK)); return 0; } @@ -953,9 +1058,10 @@ static int mod_l2_entry(l2_pgentry_t *pl2e, if ( !l2e_has_changed(&ol2e, &nl2e, _PAGE_PRESENT)) return UPDATE_ENTRY(l2, pl2e, ol2e, nl2e); - if ( unlikely(!get_page_from_l2e(nl2e, pfn, current->domain, - ((unsigned long)pl2e & - ~PAGE_MASK) >> 2)) ) + vaddr = (((unsigned long)pl2e & ~PAGE_MASK) / sizeof(l2_pgentry_t)) + << L2_PAGETABLE_SHIFT; + vaddr = fixup_pae_vaddr(vaddr,type); + if ( unlikely(!get_page_from_l2e(nl2e, pfn, current->domain, vaddr)) ) return 0; if ( unlikely(!UPDATE_ENTRY(l2, pl2e, ol2e, nl2e)) ) @@ -975,7 +1081,7 @@ static int mod_l2_entry(l2_pgentry_t *pl2e, } -#ifdef __x86_64__ +#if CONFIG_PAGING_LEVELS >= 3 /* Update the L3 entry at pl3e to new value nl3e. pl3e is within frame pfn. */ static int mod_l3_entry(l3_pgentry_t *pl3e, @@ -983,6 +1089,7 @@ static int mod_l3_entry(l3_pgentry_t *pl3e, unsigned long pfn) { l3_pgentry_t ol3e; + unsigned long vaddr; if ( unlikely(!is_guest_l3_slot(pgentry_ptr_to_slot(pl3e))) ) { @@ -997,8 +1104,8 @@ static int mod_l3_entry(l3_pgentry_t *pl3e, { if ( unlikely(l3e_get_flags(nl3e) & L3_DISALLOW_MASK) ) { - MEM_LOG("Bad L3 type settings %lx", - l3e_get_value(nl3e) & L3_DISALLOW_MASK); + MEM_LOG("Bad L3 type settings %" PRIpte "", + (u64)(l3e_get_value(nl3e) & L3_DISALLOW_MASK)); return 0; } @@ -1006,26 +1113,33 @@ static int mod_l3_entry(l3_pgentry_t *pl3e, if (!l3e_has_changed(&ol3e, &nl3e, _PAGE_PRESENT)) return UPDATE_ENTRY(l3, pl3e, ol3e, nl3e); - if ( unlikely(!get_page_from_l3e(nl3e, pfn, current->domain)) ) + vaddr = (((unsigned long)pl3e & ~PAGE_MASK) / sizeof(l3_pgentry_t)) + << L3_PAGETABLE_SHIFT; + if ( unlikely(!get_page_from_l3e(nl3e, pfn, current->domain, vaddr)) ) return 0; - if ( unlikely(!UPDATE_ENTRY(l3, pl3e, ol3e, nl3e)) ) + if ( unlikely(!UPDATE_ENTRY(l3, pl3e, ol3e, nl3e) || + !fixup_pae_linear_mappings(pl3e)) ) { put_page_from_l3e(nl3e, pfn); return 0; } - + put_page_from_l3e(ol3e, pfn); return 1; } - if ( unlikely(!UPDATE_ENTRY(l3, pl3e, ol3e, nl3e)) ) + if ( unlikely(!UPDATE_ENTRY(l3, pl3e, ol3e, nl3e) || + !fixup_pae_linear_mappings(pl3e)) ) return 0; put_page_from_l3e(ol3e, pfn); return 1; } +#endif + +#if CONFIG_PAGING_LEVELS >= 4 /* Update the L4 entry at pl4e to new value nl4e. pl4e is within frame pfn. */ static int mod_l4_entry(l4_pgentry_t *pl4e, @@ -1076,20 +1190,21 @@ static int mod_l4_entry(l4_pgentry_t *pl4e, return 1; } -#endif /* __x86_64__ */ - +#endif int alloc_page_type(struct pfn_info *page, unsigned int type) { - switch ( type ) + switch ( type & PGT_type_mask ) { case PGT_l1_page_table: return alloc_l1_table(page); case PGT_l2_page_table: - return alloc_l2_table(page); -#ifdef __x86_64__ + return alloc_l2_table(page, type); +#if CONFIG_PAGING_LEVELS >= 3 case PGT_l3_page_table: return alloc_l3_table(page); +#endif +#if CONFIG_PAGING_LEVELS >= 4 case PGT_l4_page_table: return alloc_l4_table(page); #endif @@ -1124,7 +1239,7 @@ void free_page_type(struct pfn_info *page, unsigned int type) } } - switch ( type ) + switch (type & PGT_type_mask) { case PGT_l1_page_table: free_l1_table(page); @@ -1134,17 +1249,21 @@ void free_page_type(struct pfn_info *page, unsigned int type) free_l2_table(page); break; -#ifdef __x86_64__ +#if CONFIG_PAGING_LEVELS >= 3 case PGT_l3_page_table: free_l3_table(page); break; +#endif +#if CONFIG_PAGING_LEVELS >= 4 case PGT_l4_page_table: free_l4_table(page); break; #endif default: + printk("%s: type %x pfn %lx\n",__FUNCTION__, + type, page_to_pfn(page)); BUG(); } } @@ -1187,7 +1306,7 @@ void put_page_type(struct pfn_info *page) x & ~PGT_validated)) != x) ) goto again; /* We cleared the 'valid bit' so we do the clean up. */ - free_page_type(page, x & PGT_type_mask); + free_page_type(page, x); /* Carry on, but with the 'valid bit' now clear. */ x &= ~PGT_validated; nx &= ~PGT_validated; @@ -1270,6 +1389,10 @@ int get_page_type(struct pfn_info *page, u32 type) /* This table is may be mapped at multiple locations. */ nx &= ~PGT_va_mask; nx |= PGT_va_unknown; +#if 0 /* debug */ + printk("%s: pfn %lx type %x -> %x (tag as unknown)\n", + __FUNCTION__,page_to_pfn(page),x,nx); +#endif } } if ( unlikely(!(x & PGT_validated)) ) @@ -1286,7 +1409,7 @@ int get_page_type(struct pfn_info *page, u32 type) if ( unlikely(!(nx & PGT_validated)) ) { /* Try to validate page type; drop the new reference on failure. */ - if ( unlikely(!alloc_page_type(page, type & PGT_type_mask)) ) + if ( unlikely(!alloc_page_type(page, type)) ) { MEM_LOG("Error while validating pfn %lx for type %08x." " caf=%08x taf=%08x", @@ -1537,15 +1660,17 @@ int do_mmuext_op( type = PGT_l2_page_table; goto pin_page; -#ifdef __x86_64__ +#if CONFIG_PAGING_LEVELS >= 3 case MMUEXT_PIN_L3_TABLE: type = PGT_l3_page_table; goto pin_page; +#endif +#if CONFIG_PAGING_LEVELS >= 4 case MMUEXT_PIN_L4_TABLE: type = PGT_l4_page_table; goto pin_page; -#endif /* __x86_64__ */ +#endif case MMUEXT_UNPIN_TABLE: if ( unlikely(!(okay = get_page_from_pagenr(op.mfn, FOREIGNDOM))) ) @@ -1912,19 +2037,20 @@ int do_mmu_update( break; case PGT_l2_page_table: ASSERT( !shadow_mode_refcounts(d) ); - if ( likely(get_page_type(page, PGT_l2_page_table)) ) + if ( likely(get_page_type( + page, type_info & (PGT_type_mask|PGT_va_mask))) ) { l2_pgentry_t l2e; /* FIXME: doesn't work with PAE */ l2e = l2e_create_phys(req.val, req.val); - okay = mod_l2_entry(va, l2e, mfn); + okay = mod_l2_entry((l2_pgentry_t *)va, l2e, mfn, type_info); if ( okay && unlikely(shadow_mode_enabled(d)) ) shadow_l2_normal_pt_update(d, req.ptr, l2e, &sh_mapcache); put_page_type(page); } break; -#ifdef __x86_64__ +#if CONFIG_PAGING_LEVELS >= 3 case PGT_l3_page_table: ASSERT( !shadow_mode_refcounts(d) ); if ( likely(get_page_type(page, PGT_l3_page_table)) ) @@ -1939,6 +2065,8 @@ int do_mmu_update( put_page_type(page); } break; +#endif +#if CONFIG_PAGING_LEVELS >= 4 case PGT_l4_page_table: ASSERT( !shadow_mode_refcounts(d) ); if ( likely(get_page_type(page, PGT_l4_page_table)) ) @@ -1952,7 +2080,7 @@ int do_mmu_update( put_page_type(page); } break; -#endif /* __x86_64__ */ +#endif default: if ( likely(get_page_type(page, PGT_writable_page)) ) { @@ -2119,9 +2247,10 @@ int update_grant_va_mapping(unsigned long va, int do_update_va_mapping(unsigned long va, - l1_pgentry_t val, + unsigned long val32, unsigned long flags) { + l1_pgentry_t val = l1e_create_phys(val32,val32); struct exec_domain *ed = current; struct domain *d = ed->domain; unsigned int cpu = ed->processor; @@ -2216,7 +2345,7 @@ int do_update_va_mapping(unsigned long va, } int do_update_va_mapping_otherdomain(unsigned long va, - l1_pgentry_t val, + unsigned long val32, unsigned long flags, domid_t domid) { @@ -2234,7 +2363,7 @@ int do_update_va_mapping_otherdomain(unsigned long va, return -ESRCH; } - rc = do_update_va_mapping(va, val, flags); + rc = do_update_va_mapping(va, val32, flags); return rc; } @@ -2551,8 +2680,8 @@ void ptwr_flush(struct domain *d, const int which) static int ptwr_emulated_update( unsigned long addr, - unsigned long old, - unsigned long val, + physaddr_t old, + physaddr_t val, unsigned int bytes, unsigned int do_cmpxchg) { @@ -2570,21 +2699,22 @@ static int ptwr_emulated_update( } /* Turn a sub-word access into a full-word access. */ - /* FIXME: needs tweaks for PAE */ - if ( (addr & ((BITS_PER_LONG/8)-1)) != 0 ) + if (bytes != sizeof(physaddr_t)) { int rc; - unsigned long full; - unsigned int mask = addr & ((BITS_PER_LONG/8)-1); + physaddr_t full; + unsigned int offset = addr & (sizeof(physaddr_t)-1); + /* Align address; read full word. */ - addr &= ~((BITS_PER_LONG/8)-1); - if ( (rc = x86_emulate_read_std(addr, &full, BITS_PER_LONG/8)) ) - return rc; + addr &= ~(sizeof(physaddr_t)-1); + if ( (rc = x86_emulate_read_std(addr, (unsigned long *)&full, + sizeof(physaddr_t))) ) + return rc; /* Mask out bits provided by caller. */ - full &= ~((1UL << (bytes*8)) - 1UL) << (mask*8); + full &= ~((((physaddr_t)1 << (bytes*8)) - 1) << (offset*8)); /* Shift the caller value and OR in the missing bits. */ - val &= (1UL << (bytes*8)) - 1UL; - val <<= mask*8; + val &= (((physaddr_t)1 << (bytes*8)) - 1); + val <<= (offset)*8; val |= full; } diff --git a/xen/arch/x86/setup.c b/xen/arch/x86/setup.c index bd21c5bb18..ecfc988f05 100644 --- a/xen/arch/x86/setup.c +++ b/xen/arch/x86/setup.c @@ -87,7 +87,7 @@ extern unsigned long cpu0_stack[]; struct cpuinfo_x86 boot_cpu_data = { 0, 0, 0, 0, -1, 1, 0, 0, -1 }; -#if defined(CONFIG_X86_64) +#if CONFIG_PAGING_LEVELS > 2 unsigned long mmu_cr4_features = X86_CR4_PSE | X86_CR4_PGE | X86_CR4_PAE; #else unsigned long mmu_cr4_features = X86_CR4_PSE | X86_CR4_PGE; diff --git a/xen/arch/x86/shadow.c b/xen/arch/x86/shadow.c index 83d7fc11b2..f14c6efbfb 100644 --- a/xen/arch/x86/shadow.c +++ b/xen/arch/x86/shadow.c @@ -358,13 +358,13 @@ free_shadow_hl2_table(struct domain *d, unsigned long smfn) } static void inline -free_shadow_l2_table(struct domain *d, unsigned long smfn) +free_shadow_l2_table(struct domain *d, unsigned long smfn, unsigned int type) { l2_pgentry_t *pl2e = map_domain_mem(smfn << PAGE_SHIFT); int i, external = shadow_mode_external(d); for ( i = 0; i < L2_PAGETABLE_ENTRIES; i++ ) - if ( external || is_guest_l2_slot(i) ) + if ( external || is_guest_l2_slot(type, i) ) if ( l2e_get_flags(pl2e[i]) & _PAGE_PRESENT ) put_shadow_ref(l2e_get_pfn(pl2e[i])); @@ -404,7 +404,7 @@ void free_shadow_page(unsigned long smfn) case PGT_l2_shadow: perfc_decr(shadow_l2_pages); shadow_demote(d, gpfn, gmfn); - free_shadow_l2_table(d, smfn); + free_shadow_l2_table(d, smfn, page->u.inuse.type_info); break; case PGT_hl2_shadow: @@ -573,7 +573,7 @@ static void free_shadow_pages(struct domain *d) // for_each_exec_domain(d, ed) { - if ( pagetable_val(ed->arch.shadow_table) ) + if ( pagetable_get_phys(ed->arch.shadow_table) ) { put_shadow_ref(pagetable_get_pfn(ed->arch.shadow_table)); ed->arch.shadow_table = mk_pagetable(0); @@ -684,7 +684,7 @@ static void alloc_monitor_pagetable(struct exec_domain *ed) struct pfn_info *mmfn_info; struct domain *d = ed->domain; - ASSERT(pagetable_val(ed->arch.monitor_table) == 0); + ASSERT(pagetable_get_phys(ed->arch.monitor_table) == 0); mmfn_info = alloc_domheap_page(NULL); ASSERT(mmfn_info != NULL); @@ -705,7 +705,7 @@ static void alloc_monitor_pagetable(struct exec_domain *ed) // map the phys_to_machine map into the Read-Only MPT space for this domain mpl2e[l2_table_offset(RO_MPT_VIRT_START)] = - l2e_create_phys(pagetable_val(d->arch.phys_table), + l2e_create_phys(pagetable_get_phys(d->arch.phys_table), __PAGE_HYPERVISOR); // Don't (yet) have mappings for these... @@ -726,7 +726,7 @@ void free_monitor_pagetable(struct exec_domain *ed) l2_pgentry_t *mpl2e, hl2e, sl2e; unsigned long mfn; - ASSERT( pagetable_val(ed->arch.monitor_table) ); + ASSERT( pagetable_get_phys(ed->arch.monitor_table) ); mpl2e = ed->arch.monitor_vtable; @@ -766,7 +766,7 @@ set_p2m_entry(struct domain *d, unsigned long pfn, unsigned long mfn, struct map_dom_mem_cache *l2cache, struct map_dom_mem_cache *l1cache) { - unsigned long phystab = pagetable_val(d->arch.phys_table); + unsigned long phystab = pagetable_get_phys(d->arch.phys_table); l2_pgentry_t *l2, l2e; l1_pgentry_t *l1; struct pfn_info *l1page; @@ -965,7 +965,7 @@ int __shadow_mode_enable(struct domain *d, unsigned int mode) { if ( !(new_modes & SHM_external) ) { - ASSERT( !pagetable_val(d->arch.phys_table) ); + ASSERT( !pagetable_get_phys(d->arch.phys_table) ); if ( !alloc_p2m_table(d) ) { printk("alloc_p2m_table failed (out-of-memory?)\n"); @@ -1051,7 +1051,7 @@ int __shadow_mode_enable(struct domain *d, unsigned int mode) d->arch.shadow_dirty_bitmap = NULL; } if ( (new_modes & SHM_translate) && !(new_modes & SHM_external) && - pagetable_val(d->arch.phys_table) ) + pagetable_get_phys(d->arch.phys_table) ) { free_p2m_table(d); } @@ -1093,7 +1093,8 @@ translate_l1pgtable(struct domain *d, l1_pgentry_t *p2m, unsigned long l1mfn) // up dom0. // void -translate_l2pgtable(struct domain *d, l1_pgentry_t *p2m, unsigned long l2mfn) +translate_l2pgtable(struct domain *d, l1_pgentry_t *p2m, unsigned long l2mfn, + unsigned int type) { int i; l2_pgentry_t *l2; @@ -1103,7 +1104,7 @@ translate_l2pgtable(struct domain *d, l1_pgentry_t *p2m, unsigned long l2mfn) l2 = map_domain_mem(l2mfn << PAGE_SHIFT); for (i = 0; i < L2_PAGETABLE_ENTRIES; i++) { - if ( is_guest_l2_slot(i) && + if ( is_guest_l2_slot(type, i) && (l2e_get_flags(l2[i]) & _PAGE_PRESENT) ) { unsigned long mfn = l2e_get_pfn(l2[i]); @@ -1403,13 +1404,13 @@ gpfn_to_mfn_foreign(struct domain *d, unsigned long gpfn) perfc_incrc(gpfn_to_mfn_foreign); unsigned long va = gpfn << PAGE_SHIFT; - unsigned long phystab = pagetable_val(d->arch.phys_table); + unsigned long phystab = pagetable_get_phys(d->arch.phys_table); l2_pgentry_t *l2 = map_domain_mem(phystab); l2_pgentry_t l2e = l2[l2_table_offset(va)]; unmap_domain_mem(l2); if ( !(l2e_get_flags(l2e) & _PAGE_PRESENT) ) { - printk("gpfn_to_mfn_foreign(d->id=%d, gpfn=%lx) => 0 l2e=%lx\n", + printk("gpfn_to_mfn_foreign(d->id=%d, gpfn=%lx) => 0 l2e=%" PRIpte "\n", d->domain_id, gpfn, l2e_get_value(l2e)); return INVALID_MFN; } @@ -1425,7 +1426,7 @@ gpfn_to_mfn_foreign(struct domain *d, unsigned long gpfn) if ( !(l1e_get_flags(l1e) & _PAGE_PRESENT) ) { - printk("gpfn_to_mfn_foreign(d->id=%d, gpfn=%lx) => 0 l1e=%lx\n", + printk("gpfn_to_mfn_foreign(d->id=%d, gpfn=%lx) => 0 l1e=%" PRIpte "\n", d->domain_id, gpfn, l1e_get_value(l1e)); return INVALID_MFN; } @@ -1540,7 +1541,7 @@ static unsigned long shadow_l2_table( unsigned long hl2mfn; spl2e[l2_table_offset(RO_MPT_VIRT_START)] = - l2e_create_phys(pagetable_val(d->arch.phys_table), + l2e_create_phys(pagetable_get_phys(d->arch.phys_table), __PAGE_HYPERVISOR); if ( unlikely(!(hl2mfn = __shadow_status(d, gpfn, PGT_hl2_shadow))) ) @@ -2391,7 +2392,10 @@ static int resync_all(struct domain *d, u32 stype) changed = 0; for ( i = 0; i < L2_PAGETABLE_ENTRIES; i++ ) { - if ( !is_guest_l2_slot(i) && !external ) +#if CONFIG_X86_PAE + BUG(); /* FIXME: need type_info */ +#endif + if ( !is_guest_l2_slot(0,i) && !external ) continue; l2_pgentry_t new_pde = guest2[i]; @@ -2434,7 +2438,10 @@ static int resync_all(struct domain *d, u32 stype) changed = 0; for ( i = 0; i < L2_PAGETABLE_ENTRIES; i++ ) { - if ( !is_guest_l2_slot(i) && !external ) +#if CONFIG_X86_PAE + BUG(); /* FIXME: need type_info */ +#endif + if ( !is_guest_l2_slot(0, i) && !external ) continue; l2_pgentry_t new_pde = guest2[i]; @@ -2647,8 +2654,8 @@ int shadow_fault(unsigned long va, struct cpu_user_regs *regs) &gpte, sizeof(gpte))) ) { printk("%s() failed, crashing domain %d " - "due to a read-only L2 page table (gpde=%lx), va=%lx\n", - __func__, d->domain_id, l2e_get_value(gpde), va); + "due to a read-only L2 page table (gpde=%" PRIpte "), va=%lx\n", + __func__,d->domain_id, l2e_get_value(gpde), va); domain_crash_synchronous(); } @@ -2721,7 +2728,7 @@ void shadow_l2_normal_pt_update( shadow_unlock(d); } -#ifdef __x86_64__ +#if CONFIG_PAGING_LEVELS >= 3 void shadow_l3_normal_pt_update( struct domain *d, unsigned long pa, l3_pgentry_t gpde, @@ -2729,7 +2736,9 @@ void shadow_l3_normal_pt_update( { BUG(); // not yet implemented } +#endif +#if CONFIG_PAGING_LEVELS >= 4 void shadow_l4_normal_pt_update( struct domain *d, unsigned long pa, l4_pgentry_t gpde, diff --git a/xen/arch/x86/traps.c b/xen/arch/x86/traps.c index 7907fe269d..989a07a381 100644 --- a/xen/arch/x86/traps.c +++ b/xen/arch/x86/traps.c @@ -797,7 +797,7 @@ static int emulate_privileged_op(struct cpu_user_regs *regs) break; case 3: /* Read CR3 */ - *reg = pagetable_val(ed->arch.guest_table); + *reg = pagetable_get_phys(ed->arch.guest_table); break; default: diff --git a/xen/arch/x86/vmx.c b/xen/arch/x86/vmx.c index 895c8c11ce..ef51424275 100644 --- a/xen/arch/x86/vmx.c +++ b/xen/arch/x86/vmx.c @@ -567,7 +567,7 @@ vmx_world_restore(struct exec_domain *d, struct vmx_assist_context *c) if (!vmx_paging_enabled(d)) { VMX_DBG_LOG(DBG_LEVEL_VMMU, "switching to vmxassist. use phys table"); - __vmwrite(GUEST_CR3, pagetable_val(d->domain->arch.phys_table)); + __vmwrite(GUEST_CR3, pagetable_get_phys(d->domain->arch.phys_table)); goto skip_cr3; } @@ -578,7 +578,7 @@ vmx_world_restore(struct exec_domain *d, struct vmx_assist_context *c) * We simply invalidate the shadow. */ mfn = phys_to_machine_mapping(c->cr3 >> PAGE_SHIFT); - if ((mfn << PAGE_SHIFT) != pagetable_val(d->arch.guest_table)) { + if (mfn != pagetable_get_pfn(d->arch.guest_table)) { printk("Invalid CR3 value=%lx", c->cr3); domain_crash_synchronous(); return 0; @@ -603,7 +603,7 @@ vmx_world_restore(struct exec_domain *d, struct vmx_assist_context *c) */ d->arch.arch_vmx.cpu_cr3 = c->cr3; VMX_DBG_LOG(DBG_LEVEL_VMMU, "Update CR3 value = %lx", c->cr3); - __vmwrite(GUEST_CR3, pagetable_val(d->arch.shadow_table)); + __vmwrite(GUEST_CR3, pagetable_get_phys(d->arch.shadow_table)); } skip_cr3: @@ -769,7 +769,7 @@ static int vmx_set_cr0(unsigned long value) VMX_DBG_LOG(DBG_LEVEL_VMMU, "New arch.guest_table = %lx", (unsigned long) (mfn << PAGE_SHIFT)); - __vmwrite(GUEST_CR3, pagetable_val(d->arch.shadow_table)); + __vmwrite(GUEST_CR3, pagetable_get_phys(d->arch.shadow_table)); /* * arch->shadow_table should hold the next CR3 for shadow */ @@ -869,7 +869,7 @@ static int mov_to_cr(int gp, int cr, struct cpu_user_regs *regs) * We simply invalidate the shadow. */ mfn = phys_to_machine_mapping(value >> PAGE_SHIFT); - if ((mfn << PAGE_SHIFT) != pagetable_val(d->arch.guest_table)) + if (mfn != pagetable_get_pfn(d->arch.guest_table)) __vmx_bug(regs); shadow_sync_all(d->domain); } else { @@ -896,7 +896,7 @@ static int mov_to_cr(int gp, int cr, struct cpu_user_regs *regs) d->arch.arch_vmx.cpu_cr3 = value; VMX_DBG_LOG(DBG_LEVEL_VMMU, "Update CR3 value = %lx", value); - __vmwrite(GUEST_CR3, pagetable_val(d->arch.shadow_table)); + __vmwrite(GUEST_CR3, pagetable_get_phys(d->arch.shadow_table)); } break; } diff --git a/xen/arch/x86/vmx_io.c b/xen/arch/x86/vmx_io.c index dc18839d2d..3c49b7b905 100644 --- a/xen/arch/x86/vmx_io.c +++ b/xen/arch/x86/vmx_io.c @@ -466,12 +466,12 @@ void vmx_do_resume(struct exec_domain *d) { vmx_stts(); if ( vmx_paging_enabled(d) ) - __vmwrite(GUEST_CR3, pagetable_val(d->arch.shadow_table)); + __vmwrite(GUEST_CR3, pagetable_get_phys(d->arch.shadow_table)); else // paging is not enabled in the guest - __vmwrite(GUEST_CR3, pagetable_val(d->domain->arch.phys_table)); + __vmwrite(GUEST_CR3, pagetable_get_phys(d->domain->arch.phys_table)); - __vmwrite(HOST_CR3, pagetable_val(d->arch.monitor_table)); + __vmwrite(HOST_CR3, pagetable_get_phys(d->arch.monitor_table)); __vmwrite(HOST_ESP, (unsigned long)get_stack_bottom()); if (event_pending(d)) { diff --git a/xen/arch/x86/vmx_vmcs.c b/xen/arch/x86/vmx_vmcs.c index a770855c6a..b21db2a40a 100644 --- a/xen/arch/x86/vmx_vmcs.c +++ b/xen/arch/x86/vmx_vmcs.c @@ -196,8 +196,8 @@ void vmx_do_launch(struct exec_domain *ed) error |= __vmwrite(GUEST_TR_BASE, 0); error |= __vmwrite(GUEST_TR_LIMIT, 0xff); - __vmwrite(GUEST_CR3, pagetable_val(ed->arch.guest_table)); - __vmwrite(HOST_CR3, pagetable_val(ed->arch.monitor_table)); + __vmwrite(GUEST_CR3, pagetable_get_phys(ed->arch.guest_table)); + __vmwrite(HOST_CR3, pagetable_get_phys(ed->arch.monitor_table)); __vmwrite(HOST_ESP, (unsigned long)get_stack_bottom()); ed->arch.schedule_tail = arch_vmx_do_resume; diff --git a/xen/arch/x86/x86_32/domain_page.c b/xen/arch/x86/x86_32/domain_page.c index 3c805016e8..8f3a813f5e 100644 --- a/xen/arch/x86/x86_32/domain_page.c +++ b/xen/arch/x86/x86_32/domain_page.c @@ -72,7 +72,7 @@ void *map_domain_mem(unsigned long pa) shadow_epoch[cpu] = ++epoch; } } - while ( l1e_get_value(cache[idx]) != 0 ); + while ( l1e_get_flags(cache[idx]) & _PAGE_PRESENT ); cache[idx] = l1e_create_phys(pa, __PAGE_HYPERVISOR); diff --git a/xen/arch/x86/x86_32/mm.c b/xen/arch/x86/x86_32/mm.c index 902a9c64d4..0bbdabb1d6 100644 --- a/xen/arch/x86/x86_32/mm.c +++ b/xen/arch/x86/x86_32/mm.c @@ -27,6 +27,8 @@ #include <asm/fixmap.h> #include <asm/domain_page.h> +static unsigned long mpt_size; + struct pfn_info *alloc_xen_pagetable(void) { extern int early_boot; @@ -51,69 +53,102 @@ void free_xen_pagetable(struct pfn_info *pg) l2_pgentry_t *virt_to_xen_l2e(unsigned long v) { - return &idle_pg_table[l2_table_offset(v)]; + return &idle_pg_table_l2[l2_linear_offset(v)]; } void __init paging_init(void) { void *ioremap_pt; - unsigned long v; - struct pfn_info *m2p_pg; + unsigned long v,v2,i; + struct pfn_info *pg; - /* Allocate and map the machine-to-phys table. */ - if ( (m2p_pg = alloc_domheap_pages(NULL, 10)) == NULL ) - panic("Not enough memory to bootstrap Xen.\n"); - idle_pg_table[l2_table_offset(RDWR_MPT_VIRT_START)] = - l2e_create_page(m2p_pg, __PAGE_HYPERVISOR | _PAGE_PSE); - memset((void *)RDWR_MPT_VIRT_START, 0x55, 4UL << 20); +#ifdef CONFIG_X86_PAE + printk("PAE enabled, limit: %d GB\n", MACHPHYS_MBYTES); +#else + printk("PAE disabled.\n"); +#endif + + idle0_exec_domain.arch.monitor_table = mk_pagetable(__pa(idle_pg_table)); + + /* Allocate and map the machine-to-phys table and create read-only + * mapping of MPT for guest-OS use. Without PAE we'll end up with + * one 4MB page, with PAE we'll allocate 2MB pages depending on + * the amout of memory installed, but at least 4MB to cover 4GB + * address space. This is needed to make PCI I/O memory address + * lookups work in guests. -- kraxel */ + mpt_size = max_page * 4; + if (mpt_size < 4*1024*1024) + mpt_size = 4*1024*1024; + for (v = RDWR_MPT_VIRT_START, v2 = RO_MPT_VIRT_START; + v != RDWR_MPT_VIRT_END && mpt_size > (v - RDWR_MPT_VIRT_START); + v += (1 << L2_PAGETABLE_SHIFT), v2 += (1 << L2_PAGETABLE_SHIFT)) { + if ( (pg = alloc_domheap_pages(NULL, PAGETABLE_ORDER)) == NULL ) + panic("Not enough memory to bootstrap Xen.\n"); + idle_pg_table_l2[l2_linear_offset(v)] = + l2e_create_page(pg, __PAGE_HYPERVISOR | _PAGE_PSE); + idle_pg_table_l2[l2_linear_offset(v2)] = + l2e_create_page(pg, (__PAGE_HYPERVISOR | _PAGE_PSE) & ~_PAGE_RW); + } + memset((void *)RDWR_MPT_VIRT_START, 0x55, mpt_size); - /* Xen 4MB mappings can all be GLOBAL. */ + /* Xen 2/4MB mappings can all be GLOBAL. */ if ( cpu_has_pge ) { - for ( v = HYPERVISOR_VIRT_START; v; v += (1 << L2_PAGETABLE_SHIFT) ) - { - if (l2e_get_flags(idle_pg_table[l2_table_offset(v)]) & _PAGE_PSE) - l2e_add_flags(&idle_pg_table[l2_table_offset(v)], - _PAGE_GLOBAL); + for ( v = HYPERVISOR_VIRT_START; v; v += (1 << L2_PAGETABLE_SHIFT) ) { + if (!l2e_get_flags(idle_pg_table_l2[l2_linear_offset(v)]) & _PAGE_PSE) + continue; + if (v >= RO_MPT_VIRT_START && v < RO_MPT_VIRT_END) + continue; + l2e_add_flags(&idle_pg_table_l2[l2_linear_offset(v)], + _PAGE_GLOBAL); } } - /* Create page table for ioremap(). */ - ioremap_pt = (void *)alloc_xenheap_page(); - clear_page(ioremap_pt); - idle_pg_table[l2_table_offset(IOREMAP_VIRT_START)] = - l2e_create_page(virt_to_page(ioremap_pt), __PAGE_HYPERVISOR); - - /* - * Create read-only mapping of MPT for guest-OS use. - * NB. Remove the global bit so that shadow_mode_translate()==true domains - * can reused this address space for their phys-to-machine mapping. - */ - idle_pg_table[l2_table_offset(RO_MPT_VIRT_START)] = - l2e_create_page(m2p_pg, (__PAGE_HYPERVISOR | _PAGE_PSE) & ~_PAGE_RW); + /* Create page table(s) for ioremap(). */ + for (v = IOREMAP_VIRT_START; v != IOREMAP_VIRT_END; v += (1 << L2_PAGETABLE_SHIFT)) { + ioremap_pt = (void *)alloc_xenheap_page(); + clear_page(ioremap_pt); + idle_pg_table_l2[l2_linear_offset(v)] = + l2e_create_page(virt_to_page(ioremap_pt), __PAGE_HYPERVISOR); + } /* Set up mapping cache for domain pages. */ - mapcache = (l1_pgentry_t *)alloc_xenheap_page(); - clear_page(mapcache); - idle_pg_table[l2_table_offset(MAPCACHE_VIRT_START)] = - l2e_create_page(virt_to_page(mapcache), __PAGE_HYPERVISOR); - - /* Set up linear page table mapping. */ - idle_pg_table[l2_table_offset(LINEAR_PT_VIRT_START)] = - l2e_create_page(virt_to_page(idle_pg_table), __PAGE_HYPERVISOR); + mapcache = (l1_pgentry_t*)alloc_xenheap_pages(10-PAGETABLE_ORDER); + for (v = MAPCACHE_VIRT_START, i = 0; + v != MAPCACHE_VIRT_END; + v += (1 << L2_PAGETABLE_SHIFT), i++) { + clear_page(mapcache + i*L1_PAGETABLE_ENTRIES); + idle_pg_table_l2[l2_linear_offset(v)] = + l2e_create_page(virt_to_page(mapcache + i*L1_PAGETABLE_ENTRIES), + __PAGE_HYPERVISOR); + } + + for (v = LINEAR_PT_VIRT_START; v != LINEAR_PT_VIRT_END; v += (1 << L2_PAGETABLE_SHIFT)) { + idle_pg_table_l2[l2_linear_offset(v)] = + l2e_create_page(virt_to_page(idle_pg_table_l2 + ((v-RDWR_MPT_VIRT_START) >> PAGETABLE_ORDER)), + __PAGE_HYPERVISOR); + } } -void __init zap_low_mappings(void) +void __init zap_low_mappings(l2_pgentry_t *base) { int i; - for ( i = 0; i < DOMAIN_ENTRIES_PER_L2_PAGETABLE; i++ ) - idle_pg_table[i] = l2e_empty(); + u32 addr; + + for (i = 0; ; i++) { + addr = (i << L2_PAGETABLE_SHIFT); + if (addr >= HYPERVISOR_VIRT_START) + break; + if (l2e_get_phys(base[i]) != addr) + continue; + base[i] = l2e_empty(); + } flush_tlb_all_pge(); } void subarch_init_memory(struct domain *dom_xen) { - unsigned long i, m2p_start_mfn; + unsigned long i, v, m2p_start_mfn; /* * We are rather picky about the layout of 'struct pfn_info'. The @@ -129,19 +164,24 @@ void subarch_init_memory(struct domain *dom_xen) offsetof(struct pfn_info, count_info), offsetof(struct pfn_info, u.inuse._domain), sizeof(struct pfn_info)); - for ( ; ; ) ; + for ( ; ; ) + __asm__ __volatile__ ( "hlt" ); } /* M2P table is mappable read-only by privileged domains. */ - m2p_start_mfn = l2e_get_pfn( - idle_pg_table[l2_table_offset(RDWR_MPT_VIRT_START)]); - for ( i = 0; i < 1024; i++ ) - { - frame_table[m2p_start_mfn+i].count_info = PGC_allocated | 1; - /* gdt to make sure it's only mapped read-only by non-privileged - domains. */ - frame_table[m2p_start_mfn+i].u.inuse.type_info = PGT_gdt_page | 1; - page_set_owner(&frame_table[m2p_start_mfn+i], dom_xen); + for (v = RDWR_MPT_VIRT_START; + v != RDWR_MPT_VIRT_END && mpt_size > (v - RDWR_MPT_VIRT_START); + v += (1 << L2_PAGETABLE_SHIFT)) { + m2p_start_mfn = l2e_get_pfn( + idle_pg_table_l2[l2_linear_offset(v)]); + for ( i = 0; i < L2_PAGETABLE_ENTRIES; i++ ) + { + frame_table[m2p_start_mfn+i].count_info = PGC_allocated | 1; + /* gdt to make sure it's only mapped read-only by non-privileged + domains. */ + frame_table[m2p_start_mfn+i].u.inuse.type_info = PGT_gdt_page | 1; + page_set_owner(&frame_table[m2p_start_mfn+i], dom_xen); + } } } diff --git a/xen/arch/x86/x86_32/traps.c b/xen/arch/x86/x86_32/traps.c index f545efcba4..8bb876f3f3 100644 --- a/xen/arch/x86/x86_32/traps.c +++ b/xen/arch/x86/x86_32/traps.c @@ -92,21 +92,24 @@ void show_registers(struct cpu_user_regs *regs) void show_page_walk(unsigned long addr) { - unsigned long page; + l2_pgentry_t pmd; + l1_pgentry_t *pte; if ( addr < PAGE_OFFSET ) return; printk("Pagetable walk from %08lx:\n", addr); - page = l2e_get_value(idle_pg_table[l2_table_offset(addr)]); - printk(" L2 = %08lx %s\n", page, (page & _PAGE_PSE) ? "(4MB)" : ""); - if ( !(page & _PAGE_PRESENT) || (page & _PAGE_PSE) ) + pmd = idle_pg_table_l2[l2_linear_offset(addr)]; + printk(" L2 = %08llx %s\n", (u64)l2e_get_value(pmd), + (l2e_get_flags(pmd) & _PAGE_PSE) ? "(2/4MB)" : ""); + if ( !(l2e_get_flags(pmd) & _PAGE_PRESENT) || + (l2e_get_flags(pmd) & _PAGE_PSE) ) return; - page &= PAGE_MASK; - page = ((unsigned long *) __va(page))[l1_table_offset(addr)]; - printk(" L1 = %08lx\n", page); + pte = __va(l2e_get_phys(pmd)); + pte += l1_table_offset(addr); + printk(" L1 = %08llx\n", (u64)l1e_get_value(*pte)); } #define DOUBLEFAULT_STACK_SIZE 1024 diff --git a/xen/arch/x86/x86_64/mm.c b/xen/arch/x86/x86_64/mm.c index 7fcb8cb51c..147d4eed31 100644 --- a/xen/arch/x86/x86_64/mm.c +++ b/xen/arch/x86/x86_64/mm.c @@ -79,6 +79,8 @@ void __init paging_init(void) l2_pgentry_t *l2_ro_mpt; struct pfn_info *pg; + idle0_exec_domain.arch.monitor_table = mk_pagetable(__pa(idle_pg_table)); + /* Create user-accessible L2 directory to map the MPT for guests. */ l3_ro_mpt = (l3_pgentry_t *)alloc_xenheap_page(); clear_page(l3_ro_mpt); diff --git a/xen/include/asm-x86/config.h b/xen/include/asm-x86/config.h index 4f3925e894..48fe47a953 100644 --- a/xen/include/asm-x86/config.h +++ b/xen/include/asm-x86/config.h @@ -7,6 +7,19 @@ #ifndef __X86_CONFIG_H__ #define __X86_CONFIG_H__ +#if defined(__i386__) +// # define CONFIG_X86_PAE 1 /* yes */ + # undef CONFIG_X86_PAE /* no */ +#endif + +#if defined(__x86_64__) +# define CONFIG_PAGING_LEVELS 4 +#elif defined(CONFIG_X86_PAE) +# define CONFIG_PAGING_LEVELS 3 +#else +# define CONFIG_PAGING_LEVELS 2 +#endif + #define CONFIG_X86 1 #define CONFIG_X86_HT 1 #define CONFIG_SHADOW 1 @@ -189,7 +202,7 @@ extern unsigned long _end; /* standard ELF symbol */ * Per-domain mappings ( 4MB) * Shadow linear pagetable ( 4MB) ( 8MB) * Guest linear pagetable ( 4MB) ( 8MB) - * Machine-to-physical translation table [writable] ( 4MB) + * Machine-to-physical translation table [writable] ( 4MB) (16MB) * Frame-info table (24MB) (96MB) * * Start of guest inaccessible area * Machine-to-physical translation table [read-only] ( 4MB) @@ -203,8 +216,8 @@ extern unsigned long _end; /* standard ELF symbol */ #ifdef CONFIG_X86_PAE # define LINEARPT_MBYTES 8 -# define MACHPHYS_MBYTES 4 /* KAF: This needs to be bigger */ -# define FRAMETABLE_MBYTES 96 /* 16 GB mem limit (total) */ +# define MACHPHYS_MBYTES 16 /* 1 MB needed per 1 GB memory */ +# define FRAMETABLE_MBYTES (MACHPHYS_MBYTES * 6) #else # define LINEARPT_MBYTES 4 # define MACHPHYS_MBYTES 4 @@ -237,21 +250,21 @@ extern unsigned long _end; /* standard ELF symbol */ #define GUEST_SEGMENT_MAX_ADDR RO_MPT_VIRT_END #ifdef CONFIG_X86_PAE -/* Hypervisor owns top 144MB of virtual address space. */ -# define __HYPERVISOR_VIRT_START 0xF7000000 -# define HYPERVISOR_VIRT_START (0xF7000000UL) +/* Hypervisor owns top 168MB of virtual address space. */ +# define __HYPERVISOR_VIRT_START 0xF5800000 +# define HYPERVISOR_VIRT_START (0xF5800000UL) #else /* Hypervisor owns top 64MB of virtual address space. */ # define __HYPERVISOR_VIRT_START 0xFC000000 # define HYPERVISOR_VIRT_START (0xFC000000UL) #endif -#define ROOT_PAGETABLE_FIRST_XEN_SLOT \ +#define L2_PAGETABLE_FIRST_XEN_SLOT \ (HYPERVISOR_VIRT_START >> L2_PAGETABLE_SHIFT) -#define ROOT_PAGETABLE_LAST_XEN_SLOT \ +#define L2_PAGETABLE_LAST_XEN_SLOT \ (~0UL >> L2_PAGETABLE_SHIFT) -#define ROOT_PAGETABLE_XEN_SLOTS \ - (ROOT_PAGETABLE_LAST_XEN_SLOT - ROOT_PAGETABLE_FIRST_XEN_SLOT + 1) +#define L2_PAGETABLE_XEN_SLOTS \ + (L2_PAGETABLE_LAST_XEN_SLOT - L2_PAGETABLE_FIRST_XEN_SLOT + 1) #define PGT_base_page_table PGT_l2_page_table diff --git a/xen/include/asm-x86/domain.h b/xen/include/asm-x86/domain.h index 98da7e017a..7c9aa59c62 100644 --- a/xen/include/asm-x86/domain.h +++ b/xen/include/asm-x86/domain.h @@ -119,12 +119,6 @@ struct arch_exec_domain unsigned long shadow_ldt_mapcnt; } __cacheline_aligned; -#define IDLE0_ARCH_EXEC_DOMAIN \ -{ \ - perdomain_ptes: 0, \ - monitor_table: mk_pagetable(__pa(idle_pg_table)) \ -} - #endif /* __ASM_DOMAIN_H__ */ /* diff --git a/xen/include/asm-x86/mm.h b/xen/include/asm-x86/mm.h index e8a9617980..ac0d3cd40b 100644 --- a/xen/include/asm-x86/mm.h +++ b/xen/include/asm-x86/mm.h @@ -76,15 +76,15 @@ struct pfn_info /* Owning guest has pinned this page to its current type? */ #define _PGT_pinned 27 #define PGT_pinned (1U<<_PGT_pinned) - /* The 10 most significant bits of virt address if this is a page table. */ -#define PGT_va_shift 17 -#define PGT_va_mask (((1U<<10)-1)<<PGT_va_shift) + /* The 11 most significant bits of virt address if this is a page table. */ +#define PGT_va_shift 16 +#define PGT_va_mask (((1U<<11)-1)<<PGT_va_shift) /* Is the back pointer still mutable (i.e. not fixed yet)? */ -#define PGT_va_mutable (((1U<<10)-1)<<PGT_va_shift) +#define PGT_va_mutable (((1U<<11)-1)<<PGT_va_shift) /* Is the back pointer unknown (e.g., p.t. is mapped at multiple VAs)? */ -#define PGT_va_unknown (((1U<<10)-2)<<PGT_va_shift) - /* 17-bit count of uses of this frame as its current type. */ -#define PGT_count_mask ((1U<<17)-1) +#define PGT_va_unknown (((1U<<11)-2)<<PGT_va_shift) + /* 16-bit count of uses of this frame as its current type. */ +#define PGT_count_mask ((1U<<16)-1) #define PGT_mfn_mask ((1U<<20)-1) /* mfn mask for shadow types */ diff --git a/xen/include/asm-x86/page.h b/xen/include/asm-x86/page.h index 30f1a8fe93..e82303b6e7 100644 --- a/xen/include/asm-x86/page.h +++ b/xen/include/asm-x86/page.h @@ -8,23 +8,132 @@ #define PAGE_SIZE (1 << PAGE_SHIFT) #endif #define PAGE_MASK (~(PAGE_SIZE-1)) +#define PAGE_FLAG_MASK (~0U) + +#ifndef __ASSEMBLY__ +# include <asm/types.h> +#endif #if defined(__i386__) -#include <asm/x86_32/page.h> +# include <asm/x86_32/page.h> #elif defined(__x86_64__) -#include <asm/x86_64/page.h> +# include <asm/x86_64/page.h> #endif +/* Get pte contents as an integer (intpte_t). */ +#define l1e_get_value(x) ((x).l1) +#define l2e_get_value(x) ((x).l2) +#define l3e_get_value(x) ((x).l3) +#define l4e_get_value(x) ((x).l4) + +/* Get pfn mapped by pte (unsigned long). */ +#define l1e_get_pfn(x) \ + ((unsigned long)(((x).l1 & (PADDR_MASK&PAGE_MASK)) >> PAGE_SHIFT)) +#define l2e_get_pfn(x) \ + ((unsigned long)(((x).l2 & (PADDR_MASK&PAGE_MASK)) >> PAGE_SHIFT)) +#define l3e_get_pfn(x) \ + ((unsigned long)(((x).l3 & (PADDR_MASK&PAGE_MASK)) >> PAGE_SHIFT)) +#define l4e_get_pfn(x) \ + ((unsigned long)(((x).l4 & (PADDR_MASK&PAGE_MASK)) >> PAGE_SHIFT)) + +/* Get physical address of page mapped by pte (physaddr_t). */ +#define l1e_get_phys(x) \ + ((physaddr_t)(((x).l1 & (PADDR_MASK&PAGE_MASK)))) +#define l2e_get_phys(x) \ + ((physaddr_t)(((x).l2 & (PADDR_MASK&PAGE_MASK)))) +#define l3e_get_phys(x) \ + ((physaddr_t)(((x).l3 & (PADDR_MASK&PAGE_MASK)))) +#define l4e_get_phys(x) \ + ((physaddr_t)(((x).l4 & (PADDR_MASK&PAGE_MASK)))) + +/* Get pte access flags (unsigned int). */ +#define l1e_get_flags(x) (get_pte_flags((x).l1)) +#define l2e_get_flags(x) (get_pte_flags((x).l2)) +#define l3e_get_flags(x) (get_pte_flags((x).l3)) +#define l4e_get_flags(x) (get_pte_flags((x).l4)) + +/* Construct an empty pte. */ +#define l1e_empty() ((l1_pgentry_t) { 0 }) +#define l2e_empty() ((l2_pgentry_t) { 0 }) +#define l3e_empty() ((l3_pgentry_t) { 0 }) +#define l4e_empty() ((l4_pgentry_t) { 0 }) + +/* Construct a pte from a pfn and access flags. */ +#define l1e_create_pfn(pfn, flags) \ + ((l1_pgentry_t) { ((intpte_t)(pfn) << PAGE_SHIFT) | put_pte_flags(flags) }) +#define l2e_create_pfn(pfn, flags) \ + ((l2_pgentry_t) { ((intpte_t)(pfn) << PAGE_SHIFT) | put_pte_flags(flags) }) +#define l3e_create_pfn(pfn, flags) \ + ((l3_pgentry_t) { ((intpte_t)(pfn) << PAGE_SHIFT) | put_pte_flags(flags) }) +#define l4e_create_pfn(pfn, flags) \ + ((l4_pgentry_t) { ((intpte_t)(pfn) << PAGE_SHIFT) | put_pte_flags(flags) }) + +/* Construct a pte from a physical address and access flags. */ +#define l1e_create_phys(pa, flags) \ + ((l1_pgentry_t) { (pa) | put_pte_flags(flags) }) +#define l2e_create_phys(pa, flags) \ + ((l2_pgentry_t) { (pa) | put_pte_flags(flags) }) +#define l3e_create_phys(pa, flags) \ + ((l3_pgentry_t) { (pa) | put_pte_flags(flags) }) +#define l4e_create_phys(pa, flags) \ + ((l4_pgentry_t) { (pa) | put_pte_flags(flags) }) + +/* Add extra flags to an existing pte. */ +#define l1e_add_flags(x, flags) ((x)->l1 |= put_pte_flags(flags)) +#define l2e_add_flags(x, flags) ((x)->l2 |= put_pte_flags(flags)) +#define l3e_add_flags(x, flags) ((x)->l3 |= put_pte_flags(flags)) +#define l4e_add_flags(x, flags) ((x)->l4 |= put_pte_flags(flags)) + +/* Remove flags from an existing pte. */ +#define l1e_remove_flags(x, flags) ((x)->l1 &= ~put_pte_flags(flags)) +#define l2e_remove_flags(x, flags) ((x)->l2 &= ~put_pte_flags(flags)) +#define l3e_remove_flags(x, flags) ((x)->l3 &= ~put_pte_flags(flags)) +#define l4e_remove_flags(x, flags) ((x)->l4 &= ~put_pte_flags(flags)) + +/* Check if a pte's page mapping or significant access flags have changed. */ +#define l1e_has_changed(x,y,flags) \ + ( !!(((x)->l1 ^ (y)->l1) & ((PADDR_MASK&PAGE_MASK)|put_pte_flags(flags))) ) +#define l2e_has_changed(x,y,flags) \ + ( !!(((x)->l2 ^ (y)->l2) & ((PADDR_MASK&PAGE_MASK)|put_pte_flags(flags))) ) +#define l3e_has_changed(x,y,flags) \ + ( !!(((x)->l3 ^ (y)->l3) & ((PADDR_MASK&PAGE_MASK)|put_pte_flags(flags))) ) +#define l4e_has_changed(x,y,flags) \ + ( !!(((x)->l4 ^ (y)->l4) & ((PADDR_MASK&PAGE_MASK)|put_pte_flags(flags))) ) + +/* Pagetable walking. */ +#define l2e_to_l1e(x) ((l1_pgentry_t *)__va(l2e_get_phys(x))) +#define l3e_to_l2e(x) ((l2_pgentry_t *)__va(l3e_get_phys(x))) +#define l4e_to_l3e(x) ((l3_pgentry_t *)__va(l4e_get_phys(x))) + +/* Given a virtual address, get an entry offset into a page table. */ +#define l1_table_offset(a) \ + (((a) >> L1_PAGETABLE_SHIFT) & (L1_PAGETABLE_ENTRIES - 1)) +#define l2_table_offset(a) \ + (((a) >> L2_PAGETABLE_SHIFT) & (L2_PAGETABLE_ENTRIES - 1)) +#define l3_table_offset(a) \ + (((a) >> L3_PAGETABLE_SHIFT) & (L3_PAGETABLE_ENTRIES - 1)) +#define l4_table_offset(a) \ + (((a) >> L4_PAGETABLE_SHIFT) & (L4_PAGETABLE_ENTRIES - 1)) + /* Convert a pointer to a page-table entry into pagetable slot index. */ #define pgentry_ptr_to_slot(_p) \ (((unsigned long)(_p) & ~PAGE_MASK) / sizeof(*(_p))) /* Page-table type. */ #ifndef __ASSEMBLY__ -typedef struct { unsigned long pt_lo; } pagetable_t; -#define pagetable_val(_x) ((_x).pt_lo) -#define pagetable_get_pfn(_x) ((_x).pt_lo >> PAGE_SHIFT) -#define mk_pagetable(_x) ( (pagetable_t) { (_x) } ) +#if CONFIG_PAGING_LEVELS == 2 +/* x86_32 default */ +typedef struct { u32 pfn; } pagetable_t; +#elif CONFIG_PAGING_LEVELS == 3 +/* x86_32 PAE */ +typedef struct { u32 pfn; } pagetable_t; +#elif CONFIG_PAGING_LEVELS == 4 +/* x86_64 */ +typedef struct { u64 pfn; } pagetable_t; +#endif +#define pagetable_get_phys(_x) ((physaddr_t)(_x).pfn << PAGE_SHIFT) +#define pagetable_get_pfn(_x) ((_x).pfn) +#define mk_pagetable(_phys) ({ pagetable_t __p; __p.pfn = _phys >> PAGE_SHIFT; __p; }) #endif #define clear_page(_p) memset((void *)(_p), 0, PAGE_SIZE) @@ -49,6 +158,7 @@ typedef struct { unsigned long pt_lo; } pagetable_t; #define l4e_create_page(_x,_y) (l4e_create_pfn(page_to_pfn(_x),(_y))) /* High table entries are reserved by the hypervisor. */ +/* FIXME: this breaks with PAE -- kraxel */ #define DOMAIN_ENTRIES_PER_L2_PAGETABLE \ (HYPERVISOR_VIRT_START >> L2_PAGETABLE_SHIFT) #define HYPERVISOR_ENTRIES_PER_L2_PAGETABLE \ @@ -78,7 +188,14 @@ typedef struct { unsigned long pt_lo; } pagetable_t; (l2e_get_pfn(linear_l2_table(_ed)[_va>>L2_PAGETABLE_SHIFT])) #ifndef __ASSEMBLY__ +#if CONFIG_PAGING_LEVELS == 3 +extern root_pgentry_t idle_pg_table[ROOT_PAGETABLE_ENTRIES]; +extern l3_pgentry_t idle_pg_table_l3[ROOT_PAGETABLE_ENTRIES]; +extern l2_pgentry_t idle_pg_table_l2[ROOT_PAGETABLE_ENTRIES*L2_PAGETABLE_ENTRIES]; +#else extern root_pgentry_t idle_pg_table[ROOT_PAGETABLE_ENTRIES]; +extern l2_pgentry_t idle_pg_table_l2[ROOT_PAGETABLE_ENTRIES]; +#endif extern void paging_init(void); #endif @@ -96,17 +213,17 @@ extern void paging_init(void); : : "r" (mmu_cr4_features) ); \ } while ( 0 ) -#define _PAGE_PRESENT 0x001UL -#define _PAGE_RW 0x002UL -#define _PAGE_USER 0x004UL -#define _PAGE_PWT 0x008UL -#define _PAGE_PCD 0x010UL -#define _PAGE_ACCESSED 0x020UL -#define _PAGE_DIRTY 0x040UL -#define _PAGE_PAT 0x080UL -#define _PAGE_PSE 0x080UL -#define _PAGE_GLOBAL 0x100UL -#define _PAGE_AVAIL 0xe00UL +#define _PAGE_PRESENT 0x001U +#define _PAGE_RW 0x002U +#define _PAGE_USER 0x004U +#define _PAGE_PWT 0x008U +#define _PAGE_PCD 0x010U +#define _PAGE_ACCESSED 0x020U +#define _PAGE_DIRTY 0x040U +#define _PAGE_PAT 0x080U +#define _PAGE_PSE 0x080U +#define _PAGE_GLOBAL 0x100U +#define _PAGE_AVAIL 0xE00U #define __PAGE_HYPERVISOR \ (_PAGE_PRESENT | _PAGE_RW | _PAGE_DIRTY | _PAGE_ACCESSED) diff --git a/xen/include/asm-x86/shadow.h b/xen/include/asm-x86/shadow.h index 42a9c1e6f1..665e5e9026 100644 --- a/xen/include/asm-x86/shadow.h +++ b/xen/include/asm-x86/shadow.h @@ -130,10 +130,12 @@ extern void shadow_l1_normal_pt_update(struct domain *d, extern void shadow_l2_normal_pt_update(struct domain *d, unsigned long pa, l2_pgentry_t l2e, struct map_dom_mem_cache *cache); -#ifdef __x86_64__ +#if CONFIG_PAGING_LEVELS >= 3 extern void shadow_l3_normal_pt_update(struct domain *d, unsigned long pa, l3_pgentry_t l3e, struct map_dom_mem_cache *cache); +#endif +#if CONFIG_PAGING_LEVELS >= 4 extern void shadow_l4_normal_pt_update(struct domain *d, unsigned long pa, l4_pgentry_t l4e, struct map_dom_mem_cache *cache); @@ -1682,7 +1684,7 @@ static inline void update_pagetables(struct exec_domain *ed) // HACK ALERT: there's currently no easy way to figure out if a domU // has set its arch.guest_table to zero, vs not yet initialized it. // - paging_enabled = !!pagetable_val(ed->arch.guest_table); + paging_enabled = !!pagetable_get_phys(ed->arch.guest_table); /* * We don't call __update_pagetables() when vmx guest paging is diff --git a/xen/include/asm-x86/smp.h b/xen/include/asm-x86/smp.h index 336cab889f..08995644e8 100644 --- a/xen/include/asm-x86/smp.h +++ b/xen/include/asm-x86/smp.h @@ -39,7 +39,12 @@ extern cpumask_t cpu_sibling_map[]; extern void smp_flush_tlb(void); extern void smp_invalidate_rcv(void); /* Process an NMI */ extern void (*mtrr_hook) (void); -extern void zap_low_mappings (void); + +#ifdef CONFIG_X86_64 +extern void zap_low_mappings(void); +#else +extern void zap_low_mappings(l2_pgentry_t *base); +#endif #define MAX_APICID 256 extern u8 x86_cpu_to_apicid[]; diff --git a/xen/include/asm-x86/types.h b/xen/include/asm-x86/types.h index 9bb1f6ec85..5dd6c6c1af 100644 --- a/xen/include/asm-x86/types.h +++ b/xen/include/asm-x86/types.h @@ -44,11 +44,17 @@ typedef signed long long s64; typedef unsigned long long u64; #define BITS_PER_LONG 32 typedef unsigned int size_t; +#if defined(CONFIG_X86_PAE) +typedef u64 physaddr_t; +#else +typedef u32 physaddr_t; +#endif #elif defined(__x86_64__) typedef signed long s64; typedef unsigned long u64; #define BITS_PER_LONG 64 typedef unsigned long size_t; +typedef u64 physaddr_t; #endif /* DMA addresses come in generic and 64-bit flavours. */ diff --git a/xen/include/asm-x86/x86_32/page-2level.h b/xen/include/asm-x86/x86_32/page-2level.h new file mode 100644 index 0000000000..2c029b2317 --- /dev/null +++ b/xen/include/asm-x86/x86_32/page-2level.h @@ -0,0 +1,49 @@ +#ifndef __X86_32_PAGE_2L_H__ +#define __X86_32_PAGE_2L_H__ + +#define L1_PAGETABLE_SHIFT 12 +#define L2_PAGETABLE_SHIFT 22 +#define PAGE_SHIFT L1_PAGETABLE_SHIFT +#define ROOT_PAGETABLE_SHIFT L2_PAGETABLE_SHIFT + +#define PAGETABLE_ORDER 10 +#define L1_PAGETABLE_ENTRIES (1<<PAGETABLE_ORDER) +#define L2_PAGETABLE_ENTRIES (1<<PAGETABLE_ORDER) +#define ROOT_PAGETABLE_ENTRIES L2_PAGETABLE_ENTRIES + +#define PADDR_BITS 32 +#define PADDR_MASK (~0UL) + +#ifndef __ASSEMBLY__ + +#include <asm/types.h> + +/* read access (should only be used for debug printk's) */ +typedef u32 intpte_t; +#define PRIpte "08x" + +typedef struct { intpte_t l1; } l1_pgentry_t; +typedef struct { intpte_t l2; } l2_pgentry_t; +typedef l2_pgentry_t root_pgentry_t; + +#endif /* !__ASSEMBLY__ */ + +/* root table */ +#define root_get_pfn l2e_get_pfn +#define root_get_flags l2e_get_flags +#define root_get_value l2e_get_value +#define root_empty l2e_empty +#define root_create_phys l2e_create_phys +#define PGT_root_page_table PGT_l2_page_table + +/* misc */ +#define is_guest_l1_slot(_s) (1) +#define is_guest_l2_slot(_t,_s) ((_s) < L2_PAGETABLE_FIRST_XEN_SLOT) + +#define get_pte_flags(x) ((int)(x) & 0xFFF) +#define put_pte_flags(x) ((intpte_t)(x)) + +#define L1_DISALLOW_MASK (0xFFFFF180U) /* PAT/GLOBAL */ +#define L2_DISALLOW_MASK (0xFFFFF180U) /* PSE/GLOBAL */ + +#endif /* __X86_32_PAGE_2L_H__ */ diff --git a/xen/include/asm-x86/x86_32/page-3level.h b/xen/include/asm-x86/x86_32/page-3level.h new file mode 100644 index 0000000000..d8cf21bfe5 --- /dev/null +++ b/xen/include/asm-x86/x86_32/page-3level.h @@ -0,0 +1,56 @@ +#ifndef __X86_32_PAGE_3L_H__ +#define __X86_32_PAGE_3L_H__ + +#define L1_PAGETABLE_SHIFT 12 +#define L2_PAGETABLE_SHIFT 21 +#define L3_PAGETABLE_SHIFT 30 +#define PAGE_SHIFT L1_PAGETABLE_SHIFT +#define ROOT_PAGETABLE_SHIFT L3_PAGETABLE_SHIFT + +#define PAGETABLE_ORDER 9 +#define L1_PAGETABLE_ENTRIES (1<<PAGETABLE_ORDER) +#define L2_PAGETABLE_ENTRIES (1<<PAGETABLE_ORDER) +#define L3_PAGETABLE_ENTRIES 4 +#define ROOT_PAGETABLE_ENTRIES L3_PAGETABLE_ENTRIES + +#define PADDR_BITS 52 +#define PADDR_MASK ((1ULL << PADDR_BITS)-1) + +#ifndef __ASSEMBLY__ + +#include <asm/types.h> + +/* read access (should only be used for debug printk's) */ +typedef u64 intpte_t; +#define PRIpte "016llx" + +typedef struct { intpte_t l1; } l1_pgentry_t; +typedef struct { intpte_t l2; } l2_pgentry_t; +typedef struct { intpte_t l3; } l3_pgentry_t; +typedef l3_pgentry_t root_pgentry_t; + +#endif /* !__ASSEMBLY__ */ + +/* root table */ +#define root_get_pfn l3e_get_pfn +#define root_get_flags l3e_get_flags +#define root_get_value l3e_get_value +#define root_empty l3e_empty +#define root_init_phys l3e_create_phys +#define PGT_root_page_table PGT_l3_page_table + +/* misc */ +#define is_guest_l1_slot(_s) (1) +#define is_guest_l2_slot(_t,_s) \ + ((3 != (((_t) & PGT_va_mask) >> PGT_va_shift)) || \ + ((_s) < (L2_PAGETABLE_FIRST_XEN_SLOT & (L2_PAGETABLE_ENTRIES-1)))) +#define is_guest_l3_slot(_s) (1) + +#define get_pte_flags(x) ((int)((x) >> 40) | ((int)(x) & 0xFFF)) +#define put_pte_flags(x) ((((intpte_t)((x) & ~0xFFF)) << 40) | ((x) & 0xFFF)) + +#define L1_DISALLOW_MASK (0xFFFFF180U & ~_PAGE_NX) /* PAT/GLOBAL */ +#define L2_DISALLOW_MASK (0xFFFFF180U & ~_PAGE_NX) /* PSE/GLOBAL */ +#define L3_DISALLOW_MASK (0xFFFFF1E6U) /* must-be-zero */ + +#endif /* __X86_32_PAGE_3L_H__ */ diff --git a/xen/include/asm-x86/x86_32/page.h b/xen/include/asm-x86/x86_32/page.h index b48971921c..988da65811 100644 --- a/xen/include/asm-x86/x86_32/page.h +++ b/xen/include/asm-x86/x86_32/page.h @@ -2,134 +2,23 @@ #ifndef __X86_32_PAGE_H__ #define __X86_32_PAGE_H__ -#define L1_PAGETABLE_SHIFT 12 -#define L2_PAGETABLE_SHIFT 22 -#define PAGE_SHIFT L1_PAGETABLE_SHIFT -#define ROOT_PAGETABLE_SHIFT L2_PAGETABLE_SHIFT - -#define PAGETABLE_ORDER 10 -#define L1_PAGETABLE_ENTRIES (1<<PAGETABLE_ORDER) -#define L2_PAGETABLE_ENTRIES (1<<PAGETABLE_ORDER) -#define ROOT_PAGETABLE_ENTRIES L2_PAGETABLE_ENTRIES - #define __PAGE_OFFSET (0xFF000000) -#define PADDR_BITS 32 #define VADDR_BITS 32 -#define PADDR_MASK (~0UL) #define VADDR_MASK (~0UL) -#define _PAGE_NX 0UL -#define PAGE_FLAG_MASK 0xfff +#define _PAGE_NX 0U -#ifndef __ASSEMBLY__ #include <xen/config.h> -#include <asm/types.h> -typedef struct { u32 l1_lo; } l1_pgentry_t; -typedef struct { u32 l2_lo; } l2_pgentry_t; -typedef l2_pgentry_t root_pgentry_t; - -/* read access (deprecated) */ -#define l1e_get_value(_x) ((unsigned long)((_x).l1_lo)) -#define l2e_get_value(_x) ((unsigned long)((_x).l2_lo)) - -/* read access */ -#define l1e_get_pfn(_x) ((unsigned long)((_x).l1_lo >> PAGE_SHIFT)) -#define l1e_get_phys(_x) ((unsigned long)((_x).l1_lo & PAGE_MASK)) -#define l1e_get_flags(_x) ((unsigned long)((_x).l1_lo & PAGE_FLAG_MASK)) - -#define l2e_get_pfn(_x) ((unsigned long)((_x).l2_lo >> PAGE_SHIFT)) -#define l2e_get_phys(_x) ((unsigned long)((_x).l2_lo & PAGE_MASK)) -#define l2e_get_flags(_x) ((unsigned long)((_x).l2_lo & PAGE_FLAG_MASK)) - -/* write access */ -static inline l1_pgentry_t l1e_empty(void) -{ - l1_pgentry_t e = { .l1_lo = 0 }; - return e; -} -static inline l1_pgentry_t l1e_create_pfn(u32 pfn, u32 flags) -{ - l1_pgentry_t e = { .l1_lo = (pfn << PAGE_SHIFT) | flags }; - return e; -} -static inline l1_pgentry_t l1e_create_phys(u32 addr, u32 flags) -{ - l1_pgentry_t e = { .l1_lo = (addr & PAGE_MASK) | flags }; - return e; -} -static inline void l1e_add_flags(l1_pgentry_t *e, u32 flags) -{ - e->l1_lo |= flags; -} -static inline void l1e_remove_flags(l1_pgentry_t *e, u32 flags) -{ - e->l1_lo &= ~flags; -} - -static inline l2_pgentry_t l2e_empty(void) -{ - l2_pgentry_t e = { .l2_lo = 0 }; - return e; -} -static inline l2_pgentry_t l2e_create_pfn(u32 pfn, u32 flags) -{ - l2_pgentry_t e = { .l2_lo = (pfn << PAGE_SHIFT) | flags }; - return e; -} -static inline l2_pgentry_t l2e_create_phys(u32 addr, u32 flags) -{ - l2_pgentry_t e = { .l2_lo = (addr & PAGE_MASK) | flags }; - return e; -} -static inline void l2e_add_flags(l2_pgentry_t *e, u32 flags) -{ - e->l2_lo |= flags; -} -static inline void l2e_remove_flags(l2_pgentry_t *e, u32 flags) -{ - e->l2_lo &= ~flags; -} - -/* check entries */ -static inline int l1e_has_changed(l1_pgentry_t *e1, l1_pgentry_t *e2, u32 flags) -{ - return ((e1->l1_lo ^ e2->l1_lo) & (PAGE_MASK | flags)) != 0; -} -static inline int l2e_has_changed(l2_pgentry_t *e1, l2_pgentry_t *e2, u32 flags) -{ - return ((e1->l2_lo ^ e2->l2_lo) & (PAGE_MASK | flags)) != 0; -} - -#endif /* !__ASSEMBLY__ */ - -/* Pagetable walking. */ -#define l2e_to_l1e(_x) \ - ((l1_pgentry_t *)__va(l2e_get_phys(_x))) - -/* Given a virtual address, get an entry offset into a page table. */ -#define l1_table_offset(_a) \ - (((_a) >> L1_PAGETABLE_SHIFT) & (L1_PAGETABLE_ENTRIES - 1)) -#define l2_table_offset(_a) \ - ((_a) >> L2_PAGETABLE_SHIFT) +#ifdef CONFIG_X86_PAE +# include <asm/x86_32/page-3level.h> +#else +# include <asm/x86_32/page-2level.h> +#endif /* Given a virtual address, get an entry offset into a linear page table. */ -#define l1_linear_offset(_a) ((_a) >> PAGE_SHIFT) - -#define is_guest_l1_slot(_s) (1) -#define is_guest_l2_slot(_s) ((_s) < ROOT_PAGETABLE_FIRST_XEN_SLOT) - -#define root_get_pfn l2e_get_pfn -#define root_get_flags l2e_get_flags -#define root_get_value l2e_get_value -#define root_empty l2e_empty -#define root_create_phys l2e_create_phys -#define PGT_root_page_table PGT_l2_page_table - -#define L1_DISALLOW_MASK (3UL << 7) -#define L2_DISALLOW_MASK (7UL << 7) -#define L3_DISALLOW_MASK (7UL << 7) -#define L4_DISALLOW_MASK (7UL << 7) +#define l1_linear_offset(_a) ((_a) >> L1_PAGETABLE_SHIFT) +#define l2_linear_offset(_a) ((_a) >> L2_PAGETABLE_SHIFT) #endif /* __X86_32_PAGE_H__ */ diff --git a/xen/include/asm-x86/x86_64/page.h b/xen/include/asm-x86/x86_64/page.h index abe56c9952..75bff5b6e9 100644 --- a/xen/include/asm-x86/x86_64/page.h +++ b/xen/include/asm-x86/x86_64/page.h @@ -24,181 +24,28 @@ #define PADDR_MASK ((1UL << PADDR_BITS)-1) #define VADDR_MASK ((1UL << VADDR_BITS)-1) -#define _PAGE_NX (cpu_has_nx ? (1UL<<63) : 0UL) -#define PAGE_FLAG_MASK 0xfff - #ifndef __ASSEMBLY__ + #include <xen/config.h> #include <asm/types.h> -typedef struct { u64 l1_lo; } l1_pgentry_t; -typedef struct { u64 l2_lo; } l2_pgentry_t; -typedef struct { u64 l3_lo; } l3_pgentry_t; -typedef struct { u64 l4_lo; } l4_pgentry_t; -typedef l4_pgentry_t root_pgentry_t; -/* read access (depricated) */ -#define l1e_get_value(_x) ((_x).l1_lo) -#define l2e_get_value(_x) ((_x).l2_lo) -#define l3e_get_value(_x) ((_x).l3_lo) -#define l4e_get_value(_x) ((_x).l4_lo) - -/* read access */ -#define l1e_get_pfn(_x) (((_x).l1_lo & (PADDR_MASK&PAGE_MASK)) >> PAGE_SHIFT) -#define l1e_get_phys(_x) (((_x).l1_lo & (PADDR_MASK&PAGE_MASK))) -#define l1e_get_flags(_x) ((_x).l1_lo & PAGE_FLAG_MASK) - -#define l2e_get_pfn(_x) (((_x).l2_lo & (PADDR_MASK&PAGE_MASK)) >> PAGE_SHIFT) -#define l2e_get_phys(_x) (((_x).l2_lo & (PADDR_MASK&PAGE_MASK))) -#define l2e_get_flags(_x) ((_x).l2_lo & PAGE_FLAG_MASK) - -#define l3e_get_pfn(_x) (((_x).l3_lo & (PADDR_MASK&PAGE_MASK)) >> PAGE_SHIFT) -#define l3e_get_phys(_x) (((_x).l3_lo & (PADDR_MASK&PAGE_MASK))) -#define l3e_get_flags(_x) ((_x).l3_lo & PAGE_FLAG_MASK) - -#define l4e_get_pfn(_x) (((_x).l4_lo & (PADDR_MASK&PAGE_MASK)) >> PAGE_SHIFT) -#define l4e_get_phys(_x) (((_x).l4_lo & (PADDR_MASK&PAGE_MASK))) -#define l4e_get_flags(_x) ((_x).l4_lo & PAGE_FLAG_MASK) - -/* write access */ -static inline l1_pgentry_t l1e_empty(void) -{ - l1_pgentry_t e = { .l1_lo = 0 }; - return e; -} -static inline l1_pgentry_t l1e_create_pfn(u64 pfn, u64 flags) -{ - l1_pgentry_t e = { .l1_lo = (pfn << PAGE_SHIFT) | flags }; - return e; -} -static inline l1_pgentry_t l1e_create_phys(u64 addr, u64 flags) -{ - l1_pgentry_t e = { .l1_lo = (addr & (PADDR_MASK&PAGE_MASK)) | flags }; - return e; -} -static inline void l1e_add_flags(l1_pgentry_t *e, u64 flags) -{ - e->l1_lo |= flags; -} -static inline void l1e_remove_flags(l1_pgentry_t *e, u64 flags) -{ - e->l1_lo &= ~flags; -} - -static inline l2_pgentry_t l2e_empty(void) -{ - l2_pgentry_t e = { .l2_lo = 0 }; - return e; -} -static inline l2_pgentry_t l2e_create_pfn(u64 pfn, u64 flags) -{ - l2_pgentry_t e = { .l2_lo = (pfn << PAGE_SHIFT) | flags }; - return e; -} -static inline l2_pgentry_t l2e_create_phys(u64 addr, u64 flags) -{ - l2_pgentry_t e = { .l2_lo = (addr & (PADDR_MASK&PAGE_MASK)) | flags }; - return e; -} -static inline void l2e_add_flags(l2_pgentry_t *e, u64 flags) -{ - e->l2_lo |= flags; -} -static inline void l2e_remove_flags(l2_pgentry_t *e, u64 flags) -{ - e->l2_lo &= ~flags; -} - -static inline l3_pgentry_t l3e_empty(void) -{ - l3_pgentry_t e = { .l3_lo = 0 }; - return e; -} -static inline l3_pgentry_t l3e_create_pfn(u64 pfn, u64 flags) -{ - l3_pgentry_t e = { .l3_lo = (pfn << PAGE_SHIFT) | flags }; - return e; -} -static inline l3_pgentry_t l3e_create_phys(u64 addr, u64 flags) -{ - l3_pgentry_t e = { .l3_lo = (addr & (PADDR_MASK&PAGE_MASK)) | flags }; - return e; -} -static inline void l3e_add_flags(l3_pgentry_t *e, u64 flags) -{ - e->l3_lo |= flags; -} -static inline void l3e_remove_flags(l3_pgentry_t *e, u64 flags) -{ - e->l3_lo &= ~flags; -} - -static inline l4_pgentry_t l4e_empty(void) -{ - l4_pgentry_t e = { .l4_lo = 0 }; - return e; -} -static inline l4_pgentry_t l4e_create_pfn(u64 pfn, u64 flags) -{ - l4_pgentry_t e = { .l4_lo = (pfn << PAGE_SHIFT) | flags }; - return e; -} -static inline l4_pgentry_t l4e_create_phys(u64 addr, u64 flags) -{ - l4_pgentry_t e = { .l4_lo = (addr & (PADDR_MASK&PAGE_MASK)) | flags }; - return e; -} -static inline void l4e_add_flags(l4_pgentry_t *e, u64 flags) -{ - e->l4_lo |= flags; -} -static inline void l4e_remove_flags(l4_pgentry_t *e, u64 flags) -{ - e->l4_lo &= ~flags; -} - -/* check entries */ -static inline int l1e_has_changed(l1_pgentry_t *e1, l1_pgentry_t *e2, u32 flags) -{ - return ((e1->l1_lo ^ e2->l1_lo) & ((PADDR_MASK&PAGE_MASK) | flags)) != 0; -} -static inline int l2e_has_changed(l2_pgentry_t *e1, l2_pgentry_t *e2, u32 flags) -{ - return ((e1->l2_lo ^ e2->l2_lo) & ((PADDR_MASK&PAGE_MASK) | flags)) != 0; -} -static inline int l3e_has_changed(l3_pgentry_t *e1, l3_pgentry_t *e2, u32 flags) -{ - return ((e1->l3_lo ^ e2->l3_lo) & ((PADDR_MASK&PAGE_MASK) | flags)) != 0; -} -static inline int l4e_has_changed(l4_pgentry_t *e1, l4_pgentry_t *e2, u32 flags) -{ - return ((e1->l4_lo ^ e2->l4_lo) & ((PADDR_MASK&PAGE_MASK) | flags)) != 0; -} +/* read access (should only be used for debug printk's) */ +typedef u64 intpte_t; +#define PRIpte "016lx" -#endif /* !__ASSEMBLY__ */ +typedef struct { intpte_t l1; } l1_pgentry_t; +typedef struct { intpte_t l2; } l2_pgentry_t; +typedef struct { intpte_t l3; } l3_pgentry_t; +typedef struct { intpte_t l4; } l4_pgentry_t; +typedef l4_pgentry_t root_pgentry_t; -/* Pagetable walking. */ -#define l2e_to_l1e(_x) \ - ((l1_pgentry_t *)__va(l2e_get_phys(_x))) -#define l3e_to_l2e(_x) \ - ((l2_pgentry_t *)__va(l3e_get_phys(_x))) -#define l4e_to_l3e(_x) \ - ((l3_pgentry_t *)__va(l4e_get_phys(_x))) - -/* Given a virtual address, get an entry offset into a page table. */ -#define l1_table_offset(_a) \ - (((_a) >> L1_PAGETABLE_SHIFT) & (L1_PAGETABLE_ENTRIES - 1)) -#define l2_table_offset(_a) \ - (((_a) >> L2_PAGETABLE_SHIFT) & (L2_PAGETABLE_ENTRIES - 1)) -#define l3_table_offset(_a) \ - (((_a) >> L3_PAGETABLE_SHIFT) & (L3_PAGETABLE_ENTRIES - 1)) -#define l4_table_offset(_a) \ - (((_a) >> L4_PAGETABLE_SHIFT) & (L4_PAGETABLE_ENTRIES - 1)) +#endif /* !__ASSEMBLY__ */ /* Given a virtual address, get an entry offset into a linear page table. */ #define l1_linear_offset(_a) (((_a) & VADDR_MASK) >> PAGE_SHIFT) #define is_guest_l1_slot(_s) (1) -#define is_guest_l2_slot(_s) (1) +#define is_guest_l2_slot(_t, _s) (1) #define is_guest_l3_slot(_s) (1) #define is_guest_l4_slot(_s) \ (((_s) < ROOT_PAGETABLE_FIRST_XEN_SLOT) || \ @@ -211,10 +58,15 @@ static inline int l4e_has_changed(l4_pgentry_t *e1, l4_pgentry_t *e2, u32 flags) #define root_create_phys l4e_create_phys #define PGT_root_page_table PGT_l4_page_table -#define L1_DISALLOW_MASK ((cpu_has_nx?0:(1UL<<63)) | (3UL << 7)) -#define L2_DISALLOW_MASK ((cpu_has_nx?0:(1UL<<63)) | (7UL << 7)) -#define L3_DISALLOW_MASK ((cpu_has_nx?0:(1UL<<63)) | (7UL << 7)) -#define L4_DISALLOW_MASK ((cpu_has_nx?0:(1UL<<63)) | (7UL << 7)) +#define get_pte_flags(x) ((int)((x) >> 40) | ((int)(x) & 0xFFF)) +#define put_pte_flags(x) ((((intpte_t)((x) & ~0xFFF)) << 40) | ((x) & 0xFFF)) + +#define _PAGE_NX (cpu_has_nx ? (1U<<23) : 0U) + +#define L1_DISALLOW_MASK (0xFFFFF180U & ~_PAGE_NX) /* PAT/GLOBAL */ +#define L2_DISALLOW_MASK (0xFFFFF180U & ~_PAGE_NX) /* PSE/GLOBAL */ +#define L3_DISALLOW_MASK (0xFFFFF180U & ~_PAGE_NX) /* must-be-zero */ +#define L4_DISALLOW_MASK (0xFFFFF180U & ~_PAGE_NX) /* must-be-zero */ #endif /* __X86_64_PAGE_H__ */ diff --git a/xen/include/public/arch-x86_32.h b/xen/include/public/arch-x86_32.h index 6ebfd11399..84ba88e7f7 100644 --- a/xen/include/public/arch-x86_32.h +++ b/xen/include/public/arch-x86_32.h @@ -64,7 +64,11 @@ * Virtual addresses beyond this are not modifiable by guest OSes. The * machine->physical mapping table starts at this address, read-only. */ -#define HYPERVISOR_VIRT_START (0xFC000000UL) +#ifdef CONFIG_X86_PAE +# define HYPERVISOR_VIRT_START (0xF5800000UL) +#else +# define HYPERVISOR_VIRT_START (0xFC000000UL) +#endif #ifndef machine_to_phys_mapping #define machine_to_phys_mapping ((u32 *)HYPERVISOR_VIRT_START) #endif |