aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--.rootkeys2
-rw-r--r--xen/arch/x86/audit.c4
-rw-r--r--xen/arch/x86/boot/x86_32.S43
-rw-r--r--xen/arch/x86/dom0_ops.c2
-rw-r--r--xen/arch/x86/domain.c8
-rw-r--r--xen/arch/x86/domain_build.c92
-rw-r--r--xen/arch/x86/idle0_task.c3
-rw-r--r--xen/arch/x86/mm.c348
-rw-r--r--xen/arch/x86/setup.c2
-rw-r--r--xen/arch/x86/shadow.c51
-rw-r--r--xen/arch/x86/traps.c2
-rw-r--r--xen/arch/x86/vmx.c12
-rw-r--r--xen/arch/x86/vmx_io.c6
-rw-r--r--xen/arch/x86/vmx_vmcs.c4
-rw-r--r--xen/arch/x86/x86_32/domain_page.c2
-rw-r--r--xen/arch/x86/x86_32/mm.c140
-rw-r--r--xen/arch/x86/x86_32/traps.c17
-rw-r--r--xen/arch/x86/x86_64/mm.c2
-rw-r--r--xen/include/asm-x86/config.h33
-rw-r--r--xen/include/asm-x86/domain.h6
-rw-r--r--xen/include/asm-x86/mm.h14
-rw-r--r--xen/include/asm-x86/page.h151
-rw-r--r--xen/include/asm-x86/shadow.h6
-rw-r--r--xen/include/asm-x86/smp.h7
-rw-r--r--xen/include/asm-x86/types.h6
-rw-r--r--xen/include/asm-x86/x86_32/page-2level.h49
-rw-r--r--xen/include/asm-x86/x86_32/page-3level.h56
-rw-r--r--xen/include/asm-x86/x86_32/page.h127
-rw-r--r--xen/include/asm-x86/x86_64/page.h188
-rw-r--r--xen/include/public/arch-x86_32.h6
30 files changed, 832 insertions, 557 deletions
diff --git a/.rootkeys b/.rootkeys
index 81d4402321..65875185d4 100644
--- a/.rootkeys
+++ b/.rootkeys
@@ -1377,6 +1377,8 @@
41c0c412lQ0NVVN9PsOSznQ-qhOiPA xen/include/asm-x86/vmx_vmcs.h
418fbcfe_WliJPToeVM-9VStvym-hw xen/include/asm-x86/x86_32/asm_defns.h
3e20b82fl1jmQiKdLy7fxMcutfpjWA xen/include/asm-x86/x86_32/domain_page.h
+429c852fi3pvfa9kIjryYK5AGBmXAg xen/include/asm-x86/x86_32/page-2level.h
+429c852fskvSOgcD5EC25_m9um9t4g xen/include/asm-x86/x86_32/page-3level.h
4208e2a3ZNFroNXbX9OYaOB-xtUyDQ xen/include/asm-x86/x86_32/page.h
3ddb79c3mbqEM7QQr3zVq7NiBNhouA xen/include/asm-x86/x86_32/regs.h
3e7f358aG11EvMI9VJ4_9hD4LUO7rQ xen/include/asm-x86/x86_32/string.h
diff --git a/xen/arch/x86/audit.c b/xen/arch/x86/audit.c
index c15c3de31e..3750ea788a 100644
--- a/xen/arch/x86/audit.c
+++ b/xen/arch/x86/audit.c
@@ -408,9 +408,9 @@ int audit_adjust_pgtables(struct domain *d, int dir, int noisy)
for_each_exec_domain(d, ed)
{
- if ( pagetable_val(ed->arch.guest_table) )
+ if ( pagetable_get_phys(ed->arch.guest_table) )
adjust(&frame_table[pagetable_get_pfn(ed->arch.guest_table)], 1);
- if ( pagetable_val(ed->arch.shadow_table) )
+ if ( pagetable_get_phys(ed->arch.shadow_table) )
adjust(&frame_table[pagetable_get_pfn(ed->arch.shadow_table)], 0);
if ( ed->arch.monitor_shadow_ref )
adjust(&frame_table[ed->arch.monitor_shadow_ref], 0);
diff --git a/xen/arch/x86/boot/x86_32.S b/xen/arch/x86/boot/x86_32.S
index 9f7580ab98..75d3b57bf0 100644
--- a/xen/arch/x86/boot/x86_32.S
+++ b/xen/arch/x86/boot/x86_32.S
@@ -101,6 +101,22 @@ __start:
xor %eax,%eax
rep stosb
+#ifdef CONFIG_X86_PAE
+ /* Initialize low and high mappings of all memory with 2MB pages */
+ mov $idle_pg_table_l2-__PAGE_OFFSET,%edi
+ mov $0xe3,%eax /* PRESENT+RW+A+D+2MB */
+1: mov %eax,__PAGE_OFFSET>>18(%edi) /* high mapping */
+ stosl /* low mapping */
+ add $4,%edi
+ add $(1<<L2_PAGETABLE_SHIFT),%eax
+ cmp $DIRECTMAP_PHYS_END+0xe3,%eax
+ jne 1b
+1: stosl /* low mappings cover as much physmem as possible */
+ add $4,%edi
+ add $(1<<L2_PAGETABLE_SHIFT),%eax
+ cmp $__HYPERVISOR_VIRT_START+0xe3,%eax
+ jne 1b
+#else
/* Initialize low and high mappings of all memory with 4MB pages */
mov $idle_pg_table-__PAGE_OFFSET,%edi
mov $0xe3,%eax /* PRESENT+RW+A+D+4MB */
@@ -113,6 +129,7 @@ __start:
add $(1<<L2_PAGETABLE_SHIFT),%eax
cmp $__HYPERVISOR_VIRT_START+0xe3,%eax
jne 1b
+#endif
/* Initialise IDT with simple error defaults. */
lea ignore_int,%edx
@@ -204,10 +221,17 @@ ENTRY(gdt_table)
.quad 0x0000000000000000 /* unused */
.quad 0x00cf9a000000ffff /* 0xe008 ring 0 4.00GB code at 0x0 */
.quad 0x00cf92000000ffff /* 0xe010 ring 0 4.00GB data at 0x0 */
+#ifdef CONFIG_X86_PAE
+ .quad 0x00cfba00000067ff
+ .quad 0x00cfb200000067ff
+ .quad 0x00cffa00000067ff
+ .quad 0x00cff200000067ff
+#else
.quad 0x00cfba000000c3ff /* 0xe019 ring 1 3.95GB code at 0x0 */
.quad 0x00cfb2000000c3ff /* 0xe021 ring 1 3.95GB data at 0x0 */
.quad 0x00cffa000000c3ff /* 0xe02b ring 3 3.95GB code at 0x0 */
.quad 0x00cff2000000c3ff /* 0xe033 ring 3 3.95GB data at 0x0 */
+#endif
.quad 0x0000000000000000 /* unused */
.fill 2*NR_CPUS,8,0 /* space for TSS and LDT per CPU */
@@ -215,10 +239,27 @@ ENTRY(gdt_table)
/* Maximum STACK_ORDER for x86/32 is 1. We must therefore ensure that the */
/* CPU0 stack is aligned on an even page boundary! */
ENTRY(cpu0_stack)
-
.org 0x2000 + STACK_SIZE
+
+#ifdef CONFIG_X86_PAE
+
ENTRY(idle_pg_table)
+ENTRY(idle_pg_table_l3)
+ .quad 0x100000 + 0x2000 + STACK_SIZE + 1*PAGE_SIZE + 0x01
+ .quad 0x100000 + 0x2000 + STACK_SIZE + 2*PAGE_SIZE + 0x01
+ .quad 0x100000 + 0x2000 + STACK_SIZE + 3*PAGE_SIZE + 0x01
+ .quad 0x100000 + 0x2000 + STACK_SIZE + 4*PAGE_SIZE + 0x01
+ .org 0x2000 + STACK_SIZE + 1*PAGE_SIZE
+ENTRY(idle_pg_table_l2)
+ .org 0x2000 + STACK_SIZE + 5*PAGE_SIZE
+
+#else /* CONFIG_X86_PAE */
+ENTRY(idle_pg_table)
+ENTRY(idle_pg_table_l2) # Initial page directory is 4kB
.org 0x2000 + STACK_SIZE + PAGE_SIZE
+
+#endif /* CONFIG_X86_PAE */
+
ENTRY(stext)
ENTRY(_stext)
diff --git a/xen/arch/x86/dom0_ops.c b/xen/arch/x86/dom0_ops.c
index 4232911978..d8fee9e15d 100644
--- a/xen/arch/x86/dom0_ops.c
+++ b/xen/arch/x86/dom0_ops.c
@@ -405,7 +405,7 @@ void arch_getdomaininfo_ctxt(
c->flags |= VGCF_VMX_GUEST;
#endif
- c->pt_base = pagetable_val(ed->arch.guest_table);
+ c->pt_base = pagetable_get_phys(ed->arch.guest_table);
c->vm_assist = ed->domain->vm_assist;
}
diff --git a/xen/arch/x86/domain.c b/xen/arch/x86/domain.c
index f7f7818de0..200db3be4c 100644
--- a/xen/arch/x86/domain.c
+++ b/xen/arch/x86/domain.c
@@ -460,7 +460,7 @@ int arch_set_info_guest(
// trust the VMX domain builder. Xen should validate this
// page table, and/or build the table itself, or ???
//
- if ( !pagetable_val(d->arch.phys_table) )
+ if ( !pagetable_get_phys(d->arch.phys_table) )
d->arch.phys_table = ed->arch.guest_table;
if ( (error = vmx_final_setup_guest(ed, c)) )
@@ -660,7 +660,7 @@ long do_switch_to_user(void)
struct exec_domain *ed = current;
if ( unlikely(copy_from_user(&stu, (void *)regs->rsp, sizeof(stu))) ||
- unlikely(pagetable_val(ed->arch.guest_table_user) == 0) )
+ unlikely(pagetable_get_phys(ed->arch.guest_table_user) == 0) )
return -EFAULT;
toggle_guest_mode(ed);
@@ -978,7 +978,7 @@ void domain_relinquish_resources(struct domain *d)
/* Drop the in-use references to page-table bases. */
for_each_exec_domain ( d, ed )
{
- if ( pagetable_val(ed->arch.guest_table) != 0 )
+ if ( pagetable_get_phys(ed->arch.guest_table) != 0 )
{
if ( shadow_mode_refcounts(d) )
put_page(&frame_table[pagetable_get_pfn(ed->arch.guest_table)]);
@@ -988,7 +988,7 @@ void domain_relinquish_resources(struct domain *d)
ed->arch.guest_table = mk_pagetable(0);
}
- if ( pagetable_val(ed->arch.guest_table_user) != 0 )
+ if ( pagetable_get_phys(ed->arch.guest_table_user) != 0 )
{
if ( shadow_mode_refcounts(d) )
put_page(&frame_table[pagetable_get_pfn(ed->arch.guest_table_user)]);
diff --git a/xen/arch/x86/domain_build.c b/xen/arch/x86/domain_build.c
index ebdbb622c8..01e80e89be 100644
--- a/xen/arch/x86/domain_build.c
+++ b/xen/arch/x86/domain_build.c
@@ -44,15 +44,15 @@ boolean_param("dom0_translate", opt_dom0_translate);
#if defined(__i386__)
/* No ring-3 access in initial leaf page tables. */
#define L1_PROT (_PAGE_PRESENT|_PAGE_RW|_PAGE_ACCESSED)
+#define L2_PROT (_PAGE_PRESENT|_PAGE_RW|_PAGE_ACCESSED|_PAGE_DIRTY|_PAGE_USER)
+#define L3_PROT (_PAGE_PRESENT)
#elif defined(__x86_64__)
/* Allow ring-3 access in long mode as guest cannot use ring 1. */
#define L1_PROT (_PAGE_PRESENT|_PAGE_RW|_PAGE_ACCESSED|_PAGE_USER)
-#endif
-/* Don't change these: Linux expects just these bits to be set. */
-/* (And that includes the bogus _PAGE_DIRTY!) */
#define L2_PROT (_PAGE_PRESENT|_PAGE_RW|_PAGE_ACCESSED|_PAGE_DIRTY|_PAGE_USER)
#define L3_PROT (_PAGE_PRESENT|_PAGE_RW|_PAGE_ACCESSED|_PAGE_DIRTY|_PAGE_USER)
#define L4_PROT (_PAGE_PRESENT|_PAGE_RW|_PAGE_ACCESSED|_PAGE_DIRTY|_PAGE_USER)
+#endif
#define round_pgup(_p) (((_p)+(PAGE_SIZE-1))&PAGE_MASK)
#define round_pgdown(_p) ((_p)&PAGE_MASK)
@@ -91,7 +91,11 @@ int construct_dom0(struct domain *d,
#elif defined(__x86_64__)
char *image_start = __va(_image_start);
char *initrd_start = __va(_initrd_start);
+#endif
+#if CONFIG_PAGING_LEVELS >= 4
l4_pgentry_t *l4tab = NULL, *l4start = NULL;
+#endif
+#if CONFIG_PAGING_LEVELS >= 3
l3_pgentry_t *l3tab = NULL, *l3start = NULL;
#endif
l2_pgentry_t *l2tab = NULL, *l2start = NULL;
@@ -143,7 +147,7 @@ int construct_dom0(struct domain *d,
panic("Not enough RAM for DOM0 reservation.\n");
alloc_start = page_to_phys(page);
alloc_end = alloc_start + (d->tot_pages << PAGE_SHIFT);
-
+
if ( (rc = parseelfimage(&dsi)) != 0 )
return rc;
@@ -172,10 +176,15 @@ int construct_dom0(struct domain *d,
v_end = (vstack_end + (1UL<<22)-1) & ~((1UL<<22)-1);
if ( (v_end - vstack_end) < (512UL << 10) )
v_end += 1UL << 22; /* Add extra 4MB to get >= 512kB padding. */
-#if defined(__i386__)
+#if defined(__i386__) && !defined(CONFIG_X86_PAE)
if ( (((v_end - dsi.v_start + ((1UL<<L2_PAGETABLE_SHIFT)-1)) >>
L2_PAGETABLE_SHIFT) + 1) <= nr_pt_pages )
break;
+#elif defined(__i386__) && defined(CONFIG_X86_PAE)
+ /* 5 pages: 1x 3rd + 4x 2nd level */
+ if ( (((v_end - dsi.v_start + ((1UL<<L2_PAGETABLE_SHIFT)-1)) >>
+ L2_PAGETABLE_SHIFT) + 5) <= nr_pt_pages )
+ break;
#elif defined(__x86_64__)
#define NR(_l,_h,_s) \
(((((_h) + ((1UL<<(_s))-1)) & ~((1UL<<(_s))-1)) - \
@@ -249,6 +258,24 @@ int construct_dom0(struct domain *d,
}
/* WARNING: The new domain must have its 'processor' field filled in! */
+#if CONFIG_PAGING_LEVELS == 3
+ l3start = l3tab = (l3_pgentry_t *)mpt_alloc; mpt_alloc += PAGE_SIZE;
+ l2start = l2tab = (l2_pgentry_t *)mpt_alloc; mpt_alloc += 4*PAGE_SIZE;
+ memcpy(l2tab, idle_pg_table_l2, 4*PAGE_SIZE);
+ for (i = 0; i < 4; i++) {
+ l3tab[i] = l3e_create_phys((u32)l2tab + i*PAGE_SIZE, L3_PROT);
+ l2tab[(LINEAR_PT_VIRT_START >> L2_PAGETABLE_SHIFT)+i] =
+ l2e_create_phys((u32)l2tab + i*PAGE_SIZE, __PAGE_HYPERVISOR);
+ }
+ unsigned long v;
+ for (v = PERDOMAIN_VIRT_START; v < PERDOMAIN_VIRT_END;
+ v += (1 << L2_PAGETABLE_SHIFT)) {
+ l2tab[v >> L2_PAGETABLE_SHIFT] =
+ l2e_create_phys(__pa(d->arch.mm_perdomain_pt) + (v-PERDOMAIN_VIRT_START),
+ __PAGE_HYPERVISOR);
+ }
+ ed->arch.guest_table = mk_pagetable((unsigned long)l3start);
+#else
l2start = l2tab = (l2_pgentry_t *)mpt_alloc; mpt_alloc += PAGE_SIZE;
memcpy(l2tab, &idle_pg_table[0], PAGE_SIZE);
l2tab[LINEAR_PT_VIRT_START >> L2_PAGETABLE_SHIFT] =
@@ -256,8 +283,9 @@ int construct_dom0(struct domain *d,
l2tab[PERDOMAIN_VIRT_START >> L2_PAGETABLE_SHIFT] =
l2e_create_phys(__pa(d->arch.mm_perdomain_pt), __PAGE_HYPERVISOR);
ed->arch.guest_table = mk_pagetable((unsigned long)l2start);
+#endif
- l2tab += l2_table_offset(dsi.v_start);
+ l2tab += l2_linear_offset(dsi.v_start);
mfn = alloc_start >> PAGE_SHIFT;
for ( count = 0; count < ((v_end-dsi.v_start)>>PAGE_SHIFT); count++ )
{
@@ -282,8 +310,8 @@ int construct_dom0(struct domain *d,
}
/* Pages that are part of page tables must be read only. */
- l2tab = l2start + l2_table_offset(vpt_start);
- l1start = l1tab = (l1_pgentry_t *)l2e_get_phys(*l2tab);
+ l2tab = l2start + l2_linear_offset(vpt_start);
+ l1start = l1tab = (l1_pgentry_t *)(u32)l2e_get_phys(*l2tab);
l1tab += l1_table_offset(vpt_start);
for ( count = 0; count < nr_pt_pages; count++ )
{
@@ -294,6 +322,34 @@ int construct_dom0(struct domain *d,
if ( !get_page_type(page, PGT_writable_page) )
BUG();
+#if CONFIG_PAGING_LEVELS == 3
+ switch (count) {
+ case 0:
+ page->u.inuse.type_info &= ~PGT_type_mask;
+ page->u.inuse.type_info |= PGT_l3_page_table;
+ get_page(page, d); /* an extra ref because of readable mapping */
+
+ /* Get another ref to L3 page so that it can be pinned. */
+ if ( !get_page_and_type(page, d, PGT_l3_page_table) )
+ BUG();
+ set_bit(_PGT_pinned, &page->u.inuse.type_info);
+ break;
+ case 1 ... 4:
+ page->u.inuse.type_info &= ~PGT_type_mask;
+ page->u.inuse.type_info |= PGT_l2_page_table;
+ page->u.inuse.type_info |=
+ (count-1) << PGT_va_shift;
+ get_page(page, d); /* an extra ref because of readable mapping */
+ break;
+ default:
+ page->u.inuse.type_info &= ~PGT_type_mask;
+ page->u.inuse.type_info |= PGT_l1_page_table;
+ page->u.inuse.type_info |=
+ ((dsi.v_start>>L2_PAGETABLE_SHIFT)+(count-5))<<PGT_va_shift;
+ get_page(page, d); /* an extra ref because of readable mapping */
+ break;
+ }
+#else
if ( count == 0 )
{
page->u.inuse.type_info &= ~PGT_type_mask;
@@ -326,8 +382,9 @@ int construct_dom0(struct domain *d,
*/
get_page(page, d); /* an extra ref because of readable mapping */
}
+#endif
if ( !((unsigned long)++l1tab & (PAGE_SIZE - 1)) )
- l1start = l1tab = (l1_pgentry_t *)l2e_get_phys(*++l2tab);
+ l1start = l1tab = (l1_pgentry_t *)(u32)l2e_get_phys(*++l2tab);
}
#elif defined(__x86_64__)
@@ -538,10 +595,8 @@ int construct_dom0(struct domain *d,
#if defined(__i386__)
/* Destroy low mappings - they were only for our convenience. */
- for ( i = 0; i < DOMAIN_ENTRIES_PER_L2_PAGETABLE; i++ )
- if ( l2e_get_flags(l2start[i]) & _PAGE_PSE )
- l2start[i] = l2e_empty();
- zap_low_mappings(); /* Do the same for the idle page tables. */
+ zap_low_mappings(l2start);
+ zap_low_mappings(idle_pg_table_l2);
#endif
/* DOM0 gets access to everything. */
@@ -558,6 +613,12 @@ int construct_dom0(struct domain *d,
: SHM_enable));
if ( opt_dom0_translate )
{
+#if defined(__i386__) && defined(CONFIG_X86_PAE)
+ printk("FIXME: PAE code needed here: %s:%d (%s)\n",
+ __FILE__, __LINE__, __FUNCTION__);
+ for ( ; ; )
+ __asm__ __volatile__ ( "hlt" );
+#else
/* Hmm, what does this?
Looks like isn't portable across 32/64 bit and pae/non-pae ...
-- kraxel */
@@ -573,13 +634,14 @@ int construct_dom0(struct domain *d,
// so that we can easily access it.
//
ASSERT( root_get_value(idle_pg_table[1]) == 0 );
- ASSERT( pagetable_val(d->arch.phys_table) );
+ ASSERT( pagetable_get_phys(d->arch.phys_table) );
idle_pg_table[1] = root_create_phys(
- pagetable_val(d->arch.phys_table), __PAGE_HYPERVISOR);
+ pagetable_get_phys(d->arch.phys_table), __PAGE_HYPERVISOR);
translate_l2pgtable(d, (l1_pgentry_t *)(1u << L2_PAGETABLE_SHIFT),
pagetable_get_pfn(ed->arch.guest_table));
idle_pg_table[1] = root_empty();
local_flush_tlb();
+#endif
}
update_pagetables(ed); /* XXX SMP */
diff --git a/xen/arch/x86/idle0_task.c b/xen/arch/x86/idle0_task.c
index 7e811b28e8..8ed04ea072 100644
--- a/xen/arch/x86/idle0_task.c
+++ b/xen/arch/x86/idle0_task.c
@@ -11,8 +11,7 @@ struct domain idle0_domain = {
struct exec_domain idle0_exec_domain = {
processor: 0,
- domain: &idle0_domain,
- arch: IDLE0_ARCH_EXEC_DOMAIN
+ domain: &idle0_domain
};
struct tss_struct init_tss[NR_CPUS];
diff --git a/xen/arch/x86/mm.c b/xen/arch/x86/mm.c
index fd3ac2d886..37298b443e 100644
--- a/xen/arch/x86/mm.c
+++ b/xen/arch/x86/mm.c
@@ -121,7 +121,8 @@
static void free_l2_table(struct pfn_info *page);
static void free_l1_table(struct pfn_info *page);
-static int mod_l2_entry(l2_pgentry_t *, l2_pgentry_t, unsigned long);
+static int mod_l2_entry(l2_pgentry_t *, l2_pgentry_t, unsigned long,
+ unsigned int type);
static int mod_l1_entry(l1_pgentry_t *, l1_pgentry_t);
/* Used to defer flushing of memory structures. */
@@ -149,21 +150,22 @@ unsigned long max_page;
void __init init_frametable(void)
{
- unsigned long i, p;
+ unsigned long i, p, step;
frame_table = (struct pfn_info *)FRAMETABLE_VIRT_START;
frame_table_size = max_page * sizeof(struct pfn_info);
frame_table_size = (frame_table_size + PAGE_SIZE - 1) & PAGE_MASK;
- for ( i = 0; i < frame_table_size; i += (4UL << 20) )
+ step = (1 << L2_PAGETABLE_SHIFT);
+ for ( i = 0; i < frame_table_size; i += step )
{
- p = alloc_boot_pages(min(frame_table_size - i, 4UL << 20), 4UL << 20);
+ p = alloc_boot_pages(min(frame_table_size - i, step), step);
if ( p == 0 )
panic("Not enough memory for frame table\n");
map_pages_to_xen(
FRAMETABLE_VIRT_START + i,
p >> PAGE_SHIFT,
- 4UL << (20-PAGE_SHIFT),
+ step >> PAGE_SHIFT,
PAGE_HYPERVISOR);
}
@@ -232,7 +234,7 @@ void arch_init_memory(void)
void write_ptbase(struct exec_domain *ed)
{
- write_cr3(pagetable_val(ed->arch.monitor_table));
+ write_cr3(pagetable_get_phys(ed->arch.monitor_table));
}
void invalidate_shadow_ldt(struct exec_domain *d)
@@ -375,7 +377,6 @@ static int get_page_and_type_from_pagenr(unsigned long page_nr,
return 1;
}
-
/*
* We allow root tables to map each other (a.k.a. linear page tables). It
* needs some special care with reference counts and access permissions:
@@ -432,7 +433,6 @@ get_linear_pagetable(
return 1;
}
-
int
get_page_from_l1e(
l1_pgentry_t l1e, struct domain *d)
@@ -446,8 +446,7 @@ get_page_from_l1e(
if ( unlikely(l1e_get_flags(l1e) & L1_DISALLOW_MASK) )
{
- MEM_LOG("Bad L1 type settings %lx %lx", l1e_get_value(l1e),
- l1e_get_value(l1e) & L1_DISALLOW_MASK);
+ MEM_LOG("Bad L1 flags %x\n", l1e_get_flags(l1e) & L1_DISALLOW_MASK);
return 0;
}
@@ -482,7 +481,7 @@ get_page_from_l1e(
static int
get_page_from_l2e(
l2_pgentry_t l2e, unsigned long pfn,
- struct domain *d, unsigned long va_idx)
+ struct domain *d, unsigned long vaddr)
{
int rc;
@@ -493,45 +492,58 @@ get_page_from_l2e(
if ( unlikely((l2e_get_flags(l2e) & L2_DISALLOW_MASK)) )
{
- MEM_LOG("Bad L2 page type settings %lx",
- l2e_get_value(l2e) & L2_DISALLOW_MASK);
+ MEM_LOG("Bad L2 flags %x\n", l2e_get_flags(l2e) & L2_DISALLOW_MASK);
return 0;
}
+ vaddr >>= L2_PAGETABLE_SHIFT;
+ vaddr <<= PGT_va_shift;
rc = get_page_and_type_from_pagenr(
- l2e_get_pfn(l2e),
- PGT_l1_page_table | (va_idx<<PGT_va_shift), d);
+ l2e_get_pfn(l2e), PGT_l1_page_table | vaddr, d);
-#if defined(__i386__)
- return rc ? rc : get_linear_pagetable(l2e, pfn, d);
-#elif defined(__x86_64__)
- return rc;
+#if CONFIG_PAGING_LEVELS == 2
+ if (!rc)
+ rc = get_linear_pagetable(l2e, pfn, d);
#endif
+ return rc;
}
-#ifdef __x86_64__
+#if CONFIG_PAGING_LEVELS >= 3
static int
get_page_from_l3e(
- l3_pgentry_t l3e, unsigned long pfn, struct domain *d)
+ l3_pgentry_t l3e, unsigned long pfn,
+ struct domain *d, unsigned long vaddr)
{
ASSERT( !shadow_mode_refcounts(d) );
+ int rc;
+
if ( !(l3e_get_flags(l3e) & _PAGE_PRESENT) )
return 1;
if ( unlikely((l3e_get_flags(l3e) & L3_DISALLOW_MASK)) )
{
- MEM_LOG("Bad L3 page type settings %lx",
- l3e_get_value(l3e) & L3_DISALLOW_MASK);
+ MEM_LOG("Bad L3 flags %x\n", l3e_get_flags(l3e) & L3_DISALLOW_MASK);
return 0;
}
- return get_page_and_type_from_pagenr(
- l3e_get_pfn(l3e), PGT_l2_page_table, d);
+ vaddr >>= L3_PAGETABLE_SHIFT;
+ vaddr <<= PGT_va_shift;
+ rc = get_page_and_type_from_pagenr(
+ l3e_get_pfn(l3e),
+ PGT_l2_page_table | vaddr, d);
+#if CONFIG_PAGING_LEVELS == 3
+ if (!rc)
+ rc = get_linear_pagetable(l3e, pfn, d);
+#endif
+ return rc;
}
+#endif /* 3 level */
+
+#if CONFIG_PAGING_LEVELS >= 4
static int
get_page_from_l4e(
@@ -546,8 +558,7 @@ get_page_from_l4e(
if ( unlikely((l4e_get_flags(l4e) & L4_DISALLOW_MASK)) )
{
- MEM_LOG("Bad L4 page type settings %lx",
- l4e_get_value(l4e) & L4_DISALLOW_MASK);
+ MEM_LOG("Bad L4 flags %x\n", l4e_get_flags(l4e) & L4_DISALLOW_MASK);
return 0;
}
@@ -560,7 +571,7 @@ get_page_from_l4e(
return 1;
}
-#endif /* __x86_64__ */
+#endif /* 4 level */
void put_page_from_l1e(l1_pgentry_t l1e, struct domain *d)
@@ -622,7 +633,7 @@ static void put_page_from_l2e(l2_pgentry_t l2e, unsigned long pfn)
}
-#ifdef __x86_64__
+#if CONFIG_PAGING_LEVELS >= 3
static void put_page_from_l3e(l3_pgentry_t l3e, unsigned long pfn)
{
@@ -631,6 +642,9 @@ static void put_page_from_l3e(l3_pgentry_t l3e, unsigned long pfn)
put_page_and_type(&frame_table[l3e_get_pfn(l3e)]);
}
+#endif
+
+#if CONFIG_PAGING_LEVELS >= 4
static void put_page_from_l4e(l4_pgentry_t l4e, unsigned long pfn)
{
@@ -639,7 +653,7 @@ static void put_page_from_l4e(l4_pgentry_t l4e, unsigned long pfn)
put_page_and_type(&frame_table[l4e_get_pfn(l4e)]);
}
-#endif /* __x86_64__ */
+#endif
static int alloc_l1_table(struct pfn_info *page)
@@ -670,11 +684,61 @@ static int alloc_l1_table(struct pfn_info *page)
return 0;
}
+#ifdef CONFIG_X86_PAE
+static inline int fixup_pae_linear_mappings(l3_pgentry_t *pl3e)
+{
+ l2_pgentry_t *pl2e;
+ unsigned long vaddr;
+ int i,idx;
+
+ while ((unsigned long)pl3e & ~PAGE_MASK)
+ pl3e--;
+
+ if (!(l3e_get_flags(pl3e[3]) & _PAGE_PRESENT)) {
+ printk("Installing a L3 PAE pt without L2 in slot #3 isn't going to fly ...\n");
+ return 0;
+ }
-static int alloc_l2_table(struct pfn_info *page)
+ pl2e = map_domain_mem(l3e_get_phys(pl3e[3]));
+ for (i = 0; i < 4; i++) {
+ vaddr = LINEAR_PT_VIRT_START + (i << L2_PAGETABLE_SHIFT);
+ idx = (vaddr >> L2_PAGETABLE_SHIFT) & (L2_PAGETABLE_ENTRIES-1);
+ if (l3e_get_flags(pl3e[i]) & _PAGE_PRESENT) {
+ pl2e[idx] = l2e_create_phys(l3e_get_phys(pl3e[i]),
+ __PAGE_HYPERVISOR);
+ } else
+ pl2e[idx] = l2e_empty();
+ }
+ unmap_domain_mem(pl2e);
+
+ return 1;
+}
+
+static inline unsigned long fixup_pae_vaddr(unsigned long l2vaddr,
+ unsigned long l2type)
+{
+ unsigned long l3vaddr;
+
+ if ((l2type & PGT_va_mask) == PGT_va_unknown) {
+ printk("%s: hooking one l2 pt into multiple l3 slots isn't allowed, sorry\n",
+ __FUNCTION__);
+ domain_crash();
+ }
+ l3vaddr = ((l2type & PGT_va_mask) >> PGT_va_shift)
+ << L3_PAGETABLE_SHIFT;
+ return l3vaddr + l2vaddr;
+}
+
+#else
+# define fixup_pae_linear_mappings(unused) (1)
+# define fixup_pae_vaddr(vaddr, type) (vaddr)
+#endif
+
+static int alloc_l2_table(struct pfn_info *page, unsigned int type)
{
struct domain *d = page_get_owner(page);
unsigned long pfn = page_to_pfn(page);
+ unsigned long vaddr;
l2_pgentry_t *pl2e;
int i;
@@ -682,21 +746,24 @@ static int alloc_l2_table(struct pfn_info *page)
if ( (PGT_base_page_table == PGT_l2_page_table) &&
unlikely(shadow_mode_refcounts(d)) )
return 1;
-
ASSERT( !shadow_mode_refcounts(d) );
+
pl2e = map_domain_mem(pfn << PAGE_SHIFT);
- for ( i = 0; i < L2_PAGETABLE_ENTRIES; i++ )
- if ( is_guest_l2_slot(i) &&
- unlikely(!get_page_from_l2e(pl2e[i], pfn, d, i)) )
+ for ( i = 0; i < L2_PAGETABLE_ENTRIES; i++ ) {
+ vaddr = i << L2_PAGETABLE_SHIFT;
+ vaddr = fixup_pae_vaddr(vaddr,type);
+ if ( is_guest_l2_slot(type, i) &&
+ unlikely(!get_page_from_l2e(pl2e[i], pfn, d, vaddr)) )
goto fail;
+ }
-#if defined(__i386__)
+#if CONFIG_PAGING_LEVELS == 2
/* Xen private mappings. */
- memcpy(&pl2e[ROOT_PAGETABLE_FIRST_XEN_SLOT],
- &idle_pg_table[ROOT_PAGETABLE_FIRST_XEN_SLOT],
- ROOT_PAGETABLE_XEN_SLOTS * sizeof(l2_pgentry_t));
+ memcpy(&pl2e[L2_PAGETABLE_FIRST_XEN_SLOT],
+ &idle_pg_table[L2_PAGETABLE_FIRST_XEN_SLOT],
+ L2_PAGETABLE_XEN_SLOTS * sizeof(l2_pgentry_t));
pl2e[l2_table_offset(LINEAR_PT_VIRT_START)] =
l2e_create_pfn(pfn, __PAGE_HYPERVISOR);
pl2e[l2_table_offset(PERDOMAIN_VIRT_START)] =
@@ -704,13 +771,31 @@ static int alloc_l2_table(struct pfn_info *page)
virt_to_page(page_get_owner(page)->arch.mm_perdomain_pt),
__PAGE_HYPERVISOR);
#endif
+#if CONFIG_PAGING_LEVELS == 3
+ if (3 == ((type & PGT_va_mask) >> PGT_va_shift)) {
+ unsigned long v,src,dst;
+ void *virt;
+ /* Xen private mappings. */
+ dst = L2_PAGETABLE_FIRST_XEN_SLOT & (L2_PAGETABLE_ENTRIES-1);
+ src = L2_PAGETABLE_FIRST_XEN_SLOT;
+ memcpy(&pl2e[dst], &idle_pg_table_l2[src],
+ L2_PAGETABLE_XEN_SLOTS * sizeof(l2_pgentry_t));
+ for (v = PERDOMAIN_VIRT_START; v < PERDOMAIN_VIRT_END;
+ v += (1 << L2_PAGETABLE_SHIFT)) {
+ dst = (v >> L2_PAGETABLE_SHIFT) & (L2_PAGETABLE_ENTRIES-1);
+ virt = page_get_owner(page)->arch.mm_perdomain_pt + (v-PERDOMAIN_VIRT_START);
+ pl2e[dst] = l2e_create_page(virt_to_page(virt), __PAGE_HYPERVISOR);
+ }
+ /* see fixup_pae_linear_mappings() for linear pagetables */
+ }
+#endif
unmap_domain_mem(pl2e);
return 1;
fail:
while ( i-- > 0 )
- if ( is_guest_l2_slot(i) )
+ if ( is_guest_l2_slot(type, i) )
put_page_from_l2e(pl2e[i], pfn);
unmap_domain_mem(pl2e);
@@ -718,22 +803,29 @@ static int alloc_l2_table(struct pfn_info *page)
}
-#ifdef __x86_64__
+#if CONFIG_PAGING_LEVELS >= 3
static int alloc_l3_table(struct pfn_info *page)
{
struct domain *d = page_get_owner(page);
unsigned long pfn = page_to_pfn(page);
- l3_pgentry_t *pl3e = page_to_virt(page);
+ unsigned long vaddr;
+ l3_pgentry_t *pl3e;
int i;
ASSERT( !shadow_mode_refcounts(d) );
- for ( i = 0; i < L3_PAGETABLE_ENTRIES; i++ )
+ pl3e = map_domain_mem(pfn << PAGE_SHIFT);
+ for ( i = 0; i < L3_PAGETABLE_ENTRIES; i++ ) {
+ vaddr = i << L3_PAGETABLE_SHIFT;
if ( is_guest_l3_slot(i) &&
- unlikely(!get_page_from_l3e(pl3e[i], pfn, d)) )
+ unlikely(!get_page_from_l3e(pl3e[i], pfn, d, vaddr)) )
goto fail;
+ }
+ if (!fixup_pae_linear_mappings(pl3e))
+ goto fail;
+ unmap_domain_mem(pl3e);
return 1;
fail:
@@ -741,9 +833,13 @@ static int alloc_l3_table(struct pfn_info *page)
if ( is_guest_l3_slot(i) )
put_page_from_l3e(pl3e[i], pfn);
+ unmap_domain_mem(pl3e);
return 0;
}
+#endif
+
+#if CONFIG_PAGING_LEVELS >= 4
static int alloc_l4_table(struct pfn_info *page)
{
@@ -813,27 +909,35 @@ static void free_l2_table(struct pfn_info *page)
pl2e = map_domain_mem(pfn << PAGE_SHIFT);
- for ( i = 0; i < L2_PAGETABLE_ENTRIES; i++ )
- if ( is_guest_l2_slot(i) )
+ for ( i = 0; i < L2_PAGETABLE_ENTRIES; i++ ) {
+ if ( is_guest_l2_slot(page->u.inuse.type_info, i) )
put_page_from_l2e(pl2e[i], pfn);
+ }
unmap_domain_mem(pl2e);
}
-#ifdef __x86_64__
+#if CONFIG_PAGING_LEVELS >= 3
static void free_l3_table(struct pfn_info *page)
{
unsigned long pfn = page_to_pfn(page);
- l3_pgentry_t *pl3e = page_to_virt(page);
+ l3_pgentry_t *pl3e;
int i;
+ pl3e = map_domain_mem(pfn << PAGE_SHIFT);
+
for ( i = 0; i < L3_PAGETABLE_ENTRIES; i++ )
if ( is_guest_l3_slot(i) )
put_page_from_l3e(pl3e[i], pfn);
+
+ unmap_domain_mem(pl3e);
}
+#endif
+
+#if CONFIG_PAGING_LEVELS >= 4
static void free_l4_table(struct pfn_info *page)
{
@@ -846,25 +950,24 @@ static void free_l4_table(struct pfn_info *page)
put_page_from_l4e(pl4e[i], pfn);
}
-#endif /* __x86_64__ */
-
+#endif
static inline int update_l1e(l1_pgentry_t *pl1e,
l1_pgentry_t ol1e,
l1_pgentry_t nl1e)
{
- /* FIXME: breaks with PAE */
- unsigned long o = l1e_get_value(ol1e);
- unsigned long n = l1e_get_value(nl1e);
+ intpte_t o = l1e_get_value(ol1e);
+ intpte_t n = l1e_get_value(nl1e);
if ( unlikely(cmpxchg_user(pl1e, o, n) != 0) ||
unlikely(o != l1e_get_value(ol1e)) )
{
- MEM_LOG("Failed to update %lx -> %lx: saw %lx",
- l1e_get_value(ol1e), l1e_get_value(nl1e), o);
+ MEM_LOG("Failed to update %" PRIpte " -> %" PRIpte ": saw %" PRIpte "\n",
+ l1e_get_value(ol1e),
+ l1e_get_value(nl1e),
+ o);
return 0;
}
-
return 1;
}
@@ -885,8 +988,8 @@ static int mod_l1_entry(l1_pgentry_t *pl1e, l1_pgentry_t nl1e)
{
if ( unlikely(l1e_get_flags(nl1e) & L1_DISALLOW_MASK) )
{
- MEM_LOG("Bad L1 type settings %lx",
- l1e_get_value(nl1e) & L1_DISALLOW_MASK);
+ MEM_LOG("Bad L1 type settings %" PRIpte "\n",
+ (l1e_get_value(nl1e) & L1_DISALLOW_MASK));
return 0;
}
@@ -913,25 +1016,27 @@ static int mod_l1_entry(l1_pgentry_t *pl1e, l1_pgentry_t nl1e)
return 1;
}
-
#define UPDATE_ENTRY(_t,_p,_o,_n) ({ \
- unsigned long __o = cmpxchg((unsigned long *)(_p), \
- _t ## e_get_value(_o), \
- _t ## e_get_value(_n)); \
+ intpte_t __o = cmpxchg((intpte_t *)(_p), \
+ _t ## e_get_value(_o), \
+ _t ## e_get_value(_n)); \
if ( __o != _t ## e_get_value(_o) ) \
- MEM_LOG("Failed to update %lx -> %lx: saw %lx", \
- _t ## e_get_value(_o), _t ## e_get_value(_n), __o); \
+ MEM_LOG("Failed to update %" PRIpte " -> %" PRIpte ": saw %" PRIpte "", \
+ (_t ## e_get_value(_o)), \
+ (_t ## e_get_value(_n)), \
+ (__o)); \
(__o == _t ## e_get_value(_o)); })
-
/* Update the L2 entry at pl2e to new value nl2e. pl2e is within frame pfn. */
static int mod_l2_entry(l2_pgentry_t *pl2e,
l2_pgentry_t nl2e,
- unsigned long pfn)
+ unsigned long pfn,
+ unsigned int type)
{
l2_pgentry_t ol2e;
+ unsigned long vaddr;
- if ( unlikely(!is_guest_l2_slot(pgentry_ptr_to_slot(pl2e))) )
+ if ( unlikely(!is_guest_l2_slot(type,pgentry_ptr_to_slot(pl2e))) )
{
MEM_LOG("Illegal L2 update attempt in Xen-private area %p", pl2e);
return 0;
@@ -944,8 +1049,8 @@ static int mod_l2_entry(l2_pgentry_t *pl2e,
{
if ( unlikely(l2e_get_flags(nl2e) & L2_DISALLOW_MASK) )
{
- MEM_LOG("Bad L2 type settings %lx",
- l2e_get_value(nl2e) & L2_DISALLOW_MASK);
+ MEM_LOG("Bad L2 type settings %" PRIpte "\n",
+ (l2e_get_value(nl2e) & L2_DISALLOW_MASK));
return 0;
}
@@ -953,9 +1058,10 @@ static int mod_l2_entry(l2_pgentry_t *pl2e,
if ( !l2e_has_changed(&ol2e, &nl2e, _PAGE_PRESENT))
return UPDATE_ENTRY(l2, pl2e, ol2e, nl2e);
- if ( unlikely(!get_page_from_l2e(nl2e, pfn, current->domain,
- ((unsigned long)pl2e &
- ~PAGE_MASK) >> 2)) )
+ vaddr = (((unsigned long)pl2e & ~PAGE_MASK) / sizeof(l2_pgentry_t))
+ << L2_PAGETABLE_SHIFT;
+ vaddr = fixup_pae_vaddr(vaddr,type);
+ if ( unlikely(!get_page_from_l2e(nl2e, pfn, current->domain, vaddr)) )
return 0;
if ( unlikely(!UPDATE_ENTRY(l2, pl2e, ol2e, nl2e)) )
@@ -975,7 +1081,7 @@ static int mod_l2_entry(l2_pgentry_t *pl2e,
}
-#ifdef __x86_64__
+#if CONFIG_PAGING_LEVELS >= 3
/* Update the L3 entry at pl3e to new value nl3e. pl3e is within frame pfn. */
static int mod_l3_entry(l3_pgentry_t *pl3e,
@@ -983,6 +1089,7 @@ static int mod_l3_entry(l3_pgentry_t *pl3e,
unsigned long pfn)
{
l3_pgentry_t ol3e;
+ unsigned long vaddr;
if ( unlikely(!is_guest_l3_slot(pgentry_ptr_to_slot(pl3e))) )
{
@@ -997,8 +1104,8 @@ static int mod_l3_entry(l3_pgentry_t *pl3e,
{
if ( unlikely(l3e_get_flags(nl3e) & L3_DISALLOW_MASK) )
{
- MEM_LOG("Bad L3 type settings %lx",
- l3e_get_value(nl3e) & L3_DISALLOW_MASK);
+ MEM_LOG("Bad L3 type settings %" PRIpte "",
+ (u64)(l3e_get_value(nl3e) & L3_DISALLOW_MASK));
return 0;
}
@@ -1006,26 +1113,33 @@ static int mod_l3_entry(l3_pgentry_t *pl3e,
if (!l3e_has_changed(&ol3e, &nl3e, _PAGE_PRESENT))
return UPDATE_ENTRY(l3, pl3e, ol3e, nl3e);
- if ( unlikely(!get_page_from_l3e(nl3e, pfn, current->domain)) )
+ vaddr = (((unsigned long)pl3e & ~PAGE_MASK) / sizeof(l3_pgentry_t))
+ << L3_PAGETABLE_SHIFT;
+ if ( unlikely(!get_page_from_l3e(nl3e, pfn, current->domain, vaddr)) )
return 0;
- if ( unlikely(!UPDATE_ENTRY(l3, pl3e, ol3e, nl3e)) )
+ if ( unlikely(!UPDATE_ENTRY(l3, pl3e, ol3e, nl3e) ||
+ !fixup_pae_linear_mappings(pl3e)) )
{
put_page_from_l3e(nl3e, pfn);
return 0;
}
-
+
put_page_from_l3e(ol3e, pfn);
return 1;
}
- if ( unlikely(!UPDATE_ENTRY(l3, pl3e, ol3e, nl3e)) )
+ if ( unlikely(!UPDATE_ENTRY(l3, pl3e, ol3e, nl3e) ||
+ !fixup_pae_linear_mappings(pl3e)) )
return 0;
put_page_from_l3e(ol3e, pfn);
return 1;
}
+#endif
+
+#if CONFIG_PAGING_LEVELS >= 4
/* Update the L4 entry at pl4e to new value nl4e. pl4e is within frame pfn. */
static int mod_l4_entry(l4_pgentry_t *pl4e,
@@ -1076,20 +1190,21 @@ static int mod_l4_entry(l4_pgentry_t *pl4e,
return 1;
}
-#endif /* __x86_64__ */
-
+#endif
int alloc_page_type(struct pfn_info *page, unsigned int type)
{
- switch ( type )
+ switch ( type & PGT_type_mask )
{
case PGT_l1_page_table:
return alloc_l1_table(page);
case PGT_l2_page_table:
- return alloc_l2_table(page);
-#ifdef __x86_64__
+ return alloc_l2_table(page, type);
+#if CONFIG_PAGING_LEVELS >= 3
case PGT_l3_page_table:
return alloc_l3_table(page);
+#endif
+#if CONFIG_PAGING_LEVELS >= 4
case PGT_l4_page_table:
return alloc_l4_table(page);
#endif
@@ -1124,7 +1239,7 @@ void free_page_type(struct pfn_info *page, unsigned int type)
}
}
- switch ( type )
+ switch (type & PGT_type_mask)
{
case PGT_l1_page_table:
free_l1_table(page);
@@ -1134,17 +1249,21 @@ void free_page_type(struct pfn_info *page, unsigned int type)
free_l2_table(page);
break;
-#ifdef __x86_64__
+#if CONFIG_PAGING_LEVELS >= 3
case PGT_l3_page_table:
free_l3_table(page);
break;
+#endif
+#if CONFIG_PAGING_LEVELS >= 4
case PGT_l4_page_table:
free_l4_table(page);
break;
#endif
default:
+ printk("%s: type %x pfn %lx\n",__FUNCTION__,
+ type, page_to_pfn(page));
BUG();
}
}
@@ -1187,7 +1306,7 @@ void put_page_type(struct pfn_info *page)
x & ~PGT_validated)) != x) )
goto again;
/* We cleared the 'valid bit' so we do the clean up. */
- free_page_type(page, x & PGT_type_mask);
+ free_page_type(page, x);
/* Carry on, but with the 'valid bit' now clear. */
x &= ~PGT_validated;
nx &= ~PGT_validated;
@@ -1270,6 +1389,10 @@ int get_page_type(struct pfn_info *page, u32 type)
/* This table is may be mapped at multiple locations. */
nx &= ~PGT_va_mask;
nx |= PGT_va_unknown;
+#if 0 /* debug */
+ printk("%s: pfn %lx type %x -> %x (tag as unknown)\n",
+ __FUNCTION__,page_to_pfn(page),x,nx);
+#endif
}
}
if ( unlikely(!(x & PGT_validated)) )
@@ -1286,7 +1409,7 @@ int get_page_type(struct pfn_info *page, u32 type)
if ( unlikely(!(nx & PGT_validated)) )
{
/* Try to validate page type; drop the new reference on failure. */
- if ( unlikely(!alloc_page_type(page, type & PGT_type_mask)) )
+ if ( unlikely(!alloc_page_type(page, type)) )
{
MEM_LOG("Error while validating pfn %lx for type %08x."
" caf=%08x taf=%08x",
@@ -1537,15 +1660,17 @@ int do_mmuext_op(
type = PGT_l2_page_table;
goto pin_page;
-#ifdef __x86_64__
+#if CONFIG_PAGING_LEVELS >= 3
case MMUEXT_PIN_L3_TABLE:
type = PGT_l3_page_table;
goto pin_page;
+#endif
+#if CONFIG_PAGING_LEVELS >= 4
case MMUEXT_PIN_L4_TABLE:
type = PGT_l4_page_table;
goto pin_page;
-#endif /* __x86_64__ */
+#endif
case MMUEXT_UNPIN_TABLE:
if ( unlikely(!(okay = get_page_from_pagenr(op.mfn, FOREIGNDOM))) )
@@ -1912,19 +2037,20 @@ int do_mmu_update(
break;
case PGT_l2_page_table:
ASSERT( !shadow_mode_refcounts(d) );
- if ( likely(get_page_type(page, PGT_l2_page_table)) )
+ if ( likely(get_page_type(
+ page, type_info & (PGT_type_mask|PGT_va_mask))) )
{
l2_pgentry_t l2e;
/* FIXME: doesn't work with PAE */
l2e = l2e_create_phys(req.val, req.val);
- okay = mod_l2_entry(va, l2e, mfn);
+ okay = mod_l2_entry((l2_pgentry_t *)va, l2e, mfn, type_info);
if ( okay && unlikely(shadow_mode_enabled(d)) )
shadow_l2_normal_pt_update(d, req.ptr, l2e, &sh_mapcache);
put_page_type(page);
}
break;
-#ifdef __x86_64__
+#if CONFIG_PAGING_LEVELS >= 3
case PGT_l3_page_table:
ASSERT( !shadow_mode_refcounts(d) );
if ( likely(get_page_type(page, PGT_l3_page_table)) )
@@ -1939,6 +2065,8 @@ int do_mmu_update(
put_page_type(page);
}
break;
+#endif
+#if CONFIG_PAGING_LEVELS >= 4
case PGT_l4_page_table:
ASSERT( !shadow_mode_refcounts(d) );
if ( likely(get_page_type(page, PGT_l4_page_table)) )
@@ -1952,7 +2080,7 @@ int do_mmu_update(
put_page_type(page);
}
break;
-#endif /* __x86_64__ */
+#endif
default:
if ( likely(get_page_type(page, PGT_writable_page)) )
{
@@ -2119,9 +2247,10 @@ int update_grant_va_mapping(unsigned long va,
int do_update_va_mapping(unsigned long va,
- l1_pgentry_t val,
+ unsigned long val32,
unsigned long flags)
{
+ l1_pgentry_t val = l1e_create_phys(val32,val32);
struct exec_domain *ed = current;
struct domain *d = ed->domain;
unsigned int cpu = ed->processor;
@@ -2216,7 +2345,7 @@ int do_update_va_mapping(unsigned long va,
}
int do_update_va_mapping_otherdomain(unsigned long va,
- l1_pgentry_t val,
+ unsigned long val32,
unsigned long flags,
domid_t domid)
{
@@ -2234,7 +2363,7 @@ int do_update_va_mapping_otherdomain(unsigned long va,
return -ESRCH;
}
- rc = do_update_va_mapping(va, val, flags);
+ rc = do_update_va_mapping(va, val32, flags);
return rc;
}
@@ -2551,8 +2680,8 @@ void ptwr_flush(struct domain *d, const int which)
static int ptwr_emulated_update(
unsigned long addr,
- unsigned long old,
- unsigned long val,
+ physaddr_t old,
+ physaddr_t val,
unsigned int bytes,
unsigned int do_cmpxchg)
{
@@ -2570,21 +2699,22 @@ static int ptwr_emulated_update(
}
/* Turn a sub-word access into a full-word access. */
- /* FIXME: needs tweaks for PAE */
- if ( (addr & ((BITS_PER_LONG/8)-1)) != 0 )
+ if (bytes != sizeof(physaddr_t))
{
int rc;
- unsigned long full;
- unsigned int mask = addr & ((BITS_PER_LONG/8)-1);
+ physaddr_t full;
+ unsigned int offset = addr & (sizeof(physaddr_t)-1);
+
/* Align address; read full word. */
- addr &= ~((BITS_PER_LONG/8)-1);
- if ( (rc = x86_emulate_read_std(addr, &full, BITS_PER_LONG/8)) )
- return rc;
+ addr &= ~(sizeof(physaddr_t)-1);
+ if ( (rc = x86_emulate_read_std(addr, (unsigned long *)&full,
+ sizeof(physaddr_t))) )
+ return rc;
/* Mask out bits provided by caller. */
- full &= ~((1UL << (bytes*8)) - 1UL) << (mask*8);
+ full &= ~((((physaddr_t)1 << (bytes*8)) - 1) << (offset*8));
/* Shift the caller value and OR in the missing bits. */
- val &= (1UL << (bytes*8)) - 1UL;
- val <<= mask*8;
+ val &= (((physaddr_t)1 << (bytes*8)) - 1);
+ val <<= (offset)*8;
val |= full;
}
diff --git a/xen/arch/x86/setup.c b/xen/arch/x86/setup.c
index bd21c5bb18..ecfc988f05 100644
--- a/xen/arch/x86/setup.c
+++ b/xen/arch/x86/setup.c
@@ -87,7 +87,7 @@ extern unsigned long cpu0_stack[];
struct cpuinfo_x86 boot_cpu_data = { 0, 0, 0, 0, -1, 1, 0, 0, -1 };
-#if defined(CONFIG_X86_64)
+#if CONFIG_PAGING_LEVELS > 2
unsigned long mmu_cr4_features = X86_CR4_PSE | X86_CR4_PGE | X86_CR4_PAE;
#else
unsigned long mmu_cr4_features = X86_CR4_PSE | X86_CR4_PGE;
diff --git a/xen/arch/x86/shadow.c b/xen/arch/x86/shadow.c
index 83d7fc11b2..f14c6efbfb 100644
--- a/xen/arch/x86/shadow.c
+++ b/xen/arch/x86/shadow.c
@@ -358,13 +358,13 @@ free_shadow_hl2_table(struct domain *d, unsigned long smfn)
}
static void inline
-free_shadow_l2_table(struct domain *d, unsigned long smfn)
+free_shadow_l2_table(struct domain *d, unsigned long smfn, unsigned int type)
{
l2_pgentry_t *pl2e = map_domain_mem(smfn << PAGE_SHIFT);
int i, external = shadow_mode_external(d);
for ( i = 0; i < L2_PAGETABLE_ENTRIES; i++ )
- if ( external || is_guest_l2_slot(i) )
+ if ( external || is_guest_l2_slot(type, i) )
if ( l2e_get_flags(pl2e[i]) & _PAGE_PRESENT )
put_shadow_ref(l2e_get_pfn(pl2e[i]));
@@ -404,7 +404,7 @@ void free_shadow_page(unsigned long smfn)
case PGT_l2_shadow:
perfc_decr(shadow_l2_pages);
shadow_demote(d, gpfn, gmfn);
- free_shadow_l2_table(d, smfn);
+ free_shadow_l2_table(d, smfn, page->u.inuse.type_info);
break;
case PGT_hl2_shadow:
@@ -573,7 +573,7 @@ static void free_shadow_pages(struct domain *d)
//
for_each_exec_domain(d, ed)
{
- if ( pagetable_val(ed->arch.shadow_table) )
+ if ( pagetable_get_phys(ed->arch.shadow_table) )
{
put_shadow_ref(pagetable_get_pfn(ed->arch.shadow_table));
ed->arch.shadow_table = mk_pagetable(0);
@@ -684,7 +684,7 @@ static void alloc_monitor_pagetable(struct exec_domain *ed)
struct pfn_info *mmfn_info;
struct domain *d = ed->domain;
- ASSERT(pagetable_val(ed->arch.monitor_table) == 0);
+ ASSERT(pagetable_get_phys(ed->arch.monitor_table) == 0);
mmfn_info = alloc_domheap_page(NULL);
ASSERT(mmfn_info != NULL);
@@ -705,7 +705,7 @@ static void alloc_monitor_pagetable(struct exec_domain *ed)
// map the phys_to_machine map into the Read-Only MPT space for this domain
mpl2e[l2_table_offset(RO_MPT_VIRT_START)] =
- l2e_create_phys(pagetable_val(d->arch.phys_table),
+ l2e_create_phys(pagetable_get_phys(d->arch.phys_table),
__PAGE_HYPERVISOR);
// Don't (yet) have mappings for these...
@@ -726,7 +726,7 @@ void free_monitor_pagetable(struct exec_domain *ed)
l2_pgentry_t *mpl2e, hl2e, sl2e;
unsigned long mfn;
- ASSERT( pagetable_val(ed->arch.monitor_table) );
+ ASSERT( pagetable_get_phys(ed->arch.monitor_table) );
mpl2e = ed->arch.monitor_vtable;
@@ -766,7 +766,7 @@ set_p2m_entry(struct domain *d, unsigned long pfn, unsigned long mfn,
struct map_dom_mem_cache *l2cache,
struct map_dom_mem_cache *l1cache)
{
- unsigned long phystab = pagetable_val(d->arch.phys_table);
+ unsigned long phystab = pagetable_get_phys(d->arch.phys_table);
l2_pgentry_t *l2, l2e;
l1_pgentry_t *l1;
struct pfn_info *l1page;
@@ -965,7 +965,7 @@ int __shadow_mode_enable(struct domain *d, unsigned int mode)
{
if ( !(new_modes & SHM_external) )
{
- ASSERT( !pagetable_val(d->arch.phys_table) );
+ ASSERT( !pagetable_get_phys(d->arch.phys_table) );
if ( !alloc_p2m_table(d) )
{
printk("alloc_p2m_table failed (out-of-memory?)\n");
@@ -1051,7 +1051,7 @@ int __shadow_mode_enable(struct domain *d, unsigned int mode)
d->arch.shadow_dirty_bitmap = NULL;
}
if ( (new_modes & SHM_translate) && !(new_modes & SHM_external) &&
- pagetable_val(d->arch.phys_table) )
+ pagetable_get_phys(d->arch.phys_table) )
{
free_p2m_table(d);
}
@@ -1093,7 +1093,8 @@ translate_l1pgtable(struct domain *d, l1_pgentry_t *p2m, unsigned long l1mfn)
// up dom0.
//
void
-translate_l2pgtable(struct domain *d, l1_pgentry_t *p2m, unsigned long l2mfn)
+translate_l2pgtable(struct domain *d, l1_pgentry_t *p2m, unsigned long l2mfn,
+ unsigned int type)
{
int i;
l2_pgentry_t *l2;
@@ -1103,7 +1104,7 @@ translate_l2pgtable(struct domain *d, l1_pgentry_t *p2m, unsigned long l2mfn)
l2 = map_domain_mem(l2mfn << PAGE_SHIFT);
for (i = 0; i < L2_PAGETABLE_ENTRIES; i++)
{
- if ( is_guest_l2_slot(i) &&
+ if ( is_guest_l2_slot(type, i) &&
(l2e_get_flags(l2[i]) & _PAGE_PRESENT) )
{
unsigned long mfn = l2e_get_pfn(l2[i]);
@@ -1403,13 +1404,13 @@ gpfn_to_mfn_foreign(struct domain *d, unsigned long gpfn)
perfc_incrc(gpfn_to_mfn_foreign);
unsigned long va = gpfn << PAGE_SHIFT;
- unsigned long phystab = pagetable_val(d->arch.phys_table);
+ unsigned long phystab = pagetable_get_phys(d->arch.phys_table);
l2_pgentry_t *l2 = map_domain_mem(phystab);
l2_pgentry_t l2e = l2[l2_table_offset(va)];
unmap_domain_mem(l2);
if ( !(l2e_get_flags(l2e) & _PAGE_PRESENT) )
{
- printk("gpfn_to_mfn_foreign(d->id=%d, gpfn=%lx) => 0 l2e=%lx\n",
+ printk("gpfn_to_mfn_foreign(d->id=%d, gpfn=%lx) => 0 l2e=%" PRIpte "\n",
d->domain_id, gpfn, l2e_get_value(l2e));
return INVALID_MFN;
}
@@ -1425,7 +1426,7 @@ gpfn_to_mfn_foreign(struct domain *d, unsigned long gpfn)
if ( !(l1e_get_flags(l1e) & _PAGE_PRESENT) )
{
- printk("gpfn_to_mfn_foreign(d->id=%d, gpfn=%lx) => 0 l1e=%lx\n",
+ printk("gpfn_to_mfn_foreign(d->id=%d, gpfn=%lx) => 0 l1e=%" PRIpte "\n",
d->domain_id, gpfn, l1e_get_value(l1e));
return INVALID_MFN;
}
@@ -1540,7 +1541,7 @@ static unsigned long shadow_l2_table(
unsigned long hl2mfn;
spl2e[l2_table_offset(RO_MPT_VIRT_START)] =
- l2e_create_phys(pagetable_val(d->arch.phys_table),
+ l2e_create_phys(pagetable_get_phys(d->arch.phys_table),
__PAGE_HYPERVISOR);
if ( unlikely(!(hl2mfn = __shadow_status(d, gpfn, PGT_hl2_shadow))) )
@@ -2391,7 +2392,10 @@ static int resync_all(struct domain *d, u32 stype)
changed = 0;
for ( i = 0; i < L2_PAGETABLE_ENTRIES; i++ )
{
- if ( !is_guest_l2_slot(i) && !external )
+#if CONFIG_X86_PAE
+ BUG(); /* FIXME: need type_info */
+#endif
+ if ( !is_guest_l2_slot(0,i) && !external )
continue;
l2_pgentry_t new_pde = guest2[i];
@@ -2434,7 +2438,10 @@ static int resync_all(struct domain *d, u32 stype)
changed = 0;
for ( i = 0; i < L2_PAGETABLE_ENTRIES; i++ )
{
- if ( !is_guest_l2_slot(i) && !external )
+#if CONFIG_X86_PAE
+ BUG(); /* FIXME: need type_info */
+#endif
+ if ( !is_guest_l2_slot(0, i) && !external )
continue;
l2_pgentry_t new_pde = guest2[i];
@@ -2647,8 +2654,8 @@ int shadow_fault(unsigned long va, struct cpu_user_regs *regs)
&gpte, sizeof(gpte))) )
{
printk("%s() failed, crashing domain %d "
- "due to a read-only L2 page table (gpde=%lx), va=%lx\n",
- __func__, d->domain_id, l2e_get_value(gpde), va);
+ "due to a read-only L2 page table (gpde=%" PRIpte "), va=%lx\n",
+ __func__,d->domain_id, l2e_get_value(gpde), va);
domain_crash_synchronous();
}
@@ -2721,7 +2728,7 @@ void shadow_l2_normal_pt_update(
shadow_unlock(d);
}
-#ifdef __x86_64__
+#if CONFIG_PAGING_LEVELS >= 3
void shadow_l3_normal_pt_update(
struct domain *d,
unsigned long pa, l3_pgentry_t gpde,
@@ -2729,7 +2736,9 @@ void shadow_l3_normal_pt_update(
{
BUG(); // not yet implemented
}
+#endif
+#if CONFIG_PAGING_LEVELS >= 4
void shadow_l4_normal_pt_update(
struct domain *d,
unsigned long pa, l4_pgentry_t gpde,
diff --git a/xen/arch/x86/traps.c b/xen/arch/x86/traps.c
index 7907fe269d..989a07a381 100644
--- a/xen/arch/x86/traps.c
+++ b/xen/arch/x86/traps.c
@@ -797,7 +797,7 @@ static int emulate_privileged_op(struct cpu_user_regs *regs)
break;
case 3: /* Read CR3 */
- *reg = pagetable_val(ed->arch.guest_table);
+ *reg = pagetable_get_phys(ed->arch.guest_table);
break;
default:
diff --git a/xen/arch/x86/vmx.c b/xen/arch/x86/vmx.c
index 895c8c11ce..ef51424275 100644
--- a/xen/arch/x86/vmx.c
+++ b/xen/arch/x86/vmx.c
@@ -567,7 +567,7 @@ vmx_world_restore(struct exec_domain *d, struct vmx_assist_context *c)
if (!vmx_paging_enabled(d)) {
VMX_DBG_LOG(DBG_LEVEL_VMMU, "switching to vmxassist. use phys table");
- __vmwrite(GUEST_CR3, pagetable_val(d->domain->arch.phys_table));
+ __vmwrite(GUEST_CR3, pagetable_get_phys(d->domain->arch.phys_table));
goto skip_cr3;
}
@@ -578,7 +578,7 @@ vmx_world_restore(struct exec_domain *d, struct vmx_assist_context *c)
* We simply invalidate the shadow.
*/
mfn = phys_to_machine_mapping(c->cr3 >> PAGE_SHIFT);
- if ((mfn << PAGE_SHIFT) != pagetable_val(d->arch.guest_table)) {
+ if (mfn != pagetable_get_pfn(d->arch.guest_table)) {
printk("Invalid CR3 value=%lx", c->cr3);
domain_crash_synchronous();
return 0;
@@ -603,7 +603,7 @@ vmx_world_restore(struct exec_domain *d, struct vmx_assist_context *c)
*/
d->arch.arch_vmx.cpu_cr3 = c->cr3;
VMX_DBG_LOG(DBG_LEVEL_VMMU, "Update CR3 value = %lx", c->cr3);
- __vmwrite(GUEST_CR3, pagetable_val(d->arch.shadow_table));
+ __vmwrite(GUEST_CR3, pagetable_get_phys(d->arch.shadow_table));
}
skip_cr3:
@@ -769,7 +769,7 @@ static int vmx_set_cr0(unsigned long value)
VMX_DBG_LOG(DBG_LEVEL_VMMU, "New arch.guest_table = %lx",
(unsigned long) (mfn << PAGE_SHIFT));
- __vmwrite(GUEST_CR3, pagetable_val(d->arch.shadow_table));
+ __vmwrite(GUEST_CR3, pagetable_get_phys(d->arch.shadow_table));
/*
* arch->shadow_table should hold the next CR3 for shadow
*/
@@ -869,7 +869,7 @@ static int mov_to_cr(int gp, int cr, struct cpu_user_regs *regs)
* We simply invalidate the shadow.
*/
mfn = phys_to_machine_mapping(value >> PAGE_SHIFT);
- if ((mfn << PAGE_SHIFT) != pagetable_val(d->arch.guest_table))
+ if (mfn != pagetable_get_pfn(d->arch.guest_table))
__vmx_bug(regs);
shadow_sync_all(d->domain);
} else {
@@ -896,7 +896,7 @@ static int mov_to_cr(int gp, int cr, struct cpu_user_regs *regs)
d->arch.arch_vmx.cpu_cr3 = value;
VMX_DBG_LOG(DBG_LEVEL_VMMU, "Update CR3 value = %lx",
value);
- __vmwrite(GUEST_CR3, pagetable_val(d->arch.shadow_table));
+ __vmwrite(GUEST_CR3, pagetable_get_phys(d->arch.shadow_table));
}
break;
}
diff --git a/xen/arch/x86/vmx_io.c b/xen/arch/x86/vmx_io.c
index dc18839d2d..3c49b7b905 100644
--- a/xen/arch/x86/vmx_io.c
+++ b/xen/arch/x86/vmx_io.c
@@ -466,12 +466,12 @@ void vmx_do_resume(struct exec_domain *d)
{
vmx_stts();
if ( vmx_paging_enabled(d) )
- __vmwrite(GUEST_CR3, pagetable_val(d->arch.shadow_table));
+ __vmwrite(GUEST_CR3, pagetable_get_phys(d->arch.shadow_table));
else
// paging is not enabled in the guest
- __vmwrite(GUEST_CR3, pagetable_val(d->domain->arch.phys_table));
+ __vmwrite(GUEST_CR3, pagetable_get_phys(d->domain->arch.phys_table));
- __vmwrite(HOST_CR3, pagetable_val(d->arch.monitor_table));
+ __vmwrite(HOST_CR3, pagetable_get_phys(d->arch.monitor_table));
__vmwrite(HOST_ESP, (unsigned long)get_stack_bottom());
if (event_pending(d)) {
diff --git a/xen/arch/x86/vmx_vmcs.c b/xen/arch/x86/vmx_vmcs.c
index a770855c6a..b21db2a40a 100644
--- a/xen/arch/x86/vmx_vmcs.c
+++ b/xen/arch/x86/vmx_vmcs.c
@@ -196,8 +196,8 @@ void vmx_do_launch(struct exec_domain *ed)
error |= __vmwrite(GUEST_TR_BASE, 0);
error |= __vmwrite(GUEST_TR_LIMIT, 0xff);
- __vmwrite(GUEST_CR3, pagetable_val(ed->arch.guest_table));
- __vmwrite(HOST_CR3, pagetable_val(ed->arch.monitor_table));
+ __vmwrite(GUEST_CR3, pagetable_get_phys(ed->arch.guest_table));
+ __vmwrite(HOST_CR3, pagetable_get_phys(ed->arch.monitor_table));
__vmwrite(HOST_ESP, (unsigned long)get_stack_bottom());
ed->arch.schedule_tail = arch_vmx_do_resume;
diff --git a/xen/arch/x86/x86_32/domain_page.c b/xen/arch/x86/x86_32/domain_page.c
index 3c805016e8..8f3a813f5e 100644
--- a/xen/arch/x86/x86_32/domain_page.c
+++ b/xen/arch/x86/x86_32/domain_page.c
@@ -72,7 +72,7 @@ void *map_domain_mem(unsigned long pa)
shadow_epoch[cpu] = ++epoch;
}
}
- while ( l1e_get_value(cache[idx]) != 0 );
+ while ( l1e_get_flags(cache[idx]) & _PAGE_PRESENT );
cache[idx] = l1e_create_phys(pa, __PAGE_HYPERVISOR);
diff --git a/xen/arch/x86/x86_32/mm.c b/xen/arch/x86/x86_32/mm.c
index 902a9c64d4..0bbdabb1d6 100644
--- a/xen/arch/x86/x86_32/mm.c
+++ b/xen/arch/x86/x86_32/mm.c
@@ -27,6 +27,8 @@
#include <asm/fixmap.h>
#include <asm/domain_page.h>
+static unsigned long mpt_size;
+
struct pfn_info *alloc_xen_pagetable(void)
{
extern int early_boot;
@@ -51,69 +53,102 @@ void free_xen_pagetable(struct pfn_info *pg)
l2_pgentry_t *virt_to_xen_l2e(unsigned long v)
{
- return &idle_pg_table[l2_table_offset(v)];
+ return &idle_pg_table_l2[l2_linear_offset(v)];
}
void __init paging_init(void)
{
void *ioremap_pt;
- unsigned long v;
- struct pfn_info *m2p_pg;
+ unsigned long v,v2,i;
+ struct pfn_info *pg;
- /* Allocate and map the machine-to-phys table. */
- if ( (m2p_pg = alloc_domheap_pages(NULL, 10)) == NULL )
- panic("Not enough memory to bootstrap Xen.\n");
- idle_pg_table[l2_table_offset(RDWR_MPT_VIRT_START)] =
- l2e_create_page(m2p_pg, __PAGE_HYPERVISOR | _PAGE_PSE);
- memset((void *)RDWR_MPT_VIRT_START, 0x55, 4UL << 20);
+#ifdef CONFIG_X86_PAE
+ printk("PAE enabled, limit: %d GB\n", MACHPHYS_MBYTES);
+#else
+ printk("PAE disabled.\n");
+#endif
+
+ idle0_exec_domain.arch.monitor_table = mk_pagetable(__pa(idle_pg_table));
+
+ /* Allocate and map the machine-to-phys table and create read-only
+ * mapping of MPT for guest-OS use. Without PAE we'll end up with
+ * one 4MB page, with PAE we'll allocate 2MB pages depending on
+ * the amout of memory installed, but at least 4MB to cover 4GB
+ * address space. This is needed to make PCI I/O memory address
+ * lookups work in guests. -- kraxel */
+ mpt_size = max_page * 4;
+ if (mpt_size < 4*1024*1024)
+ mpt_size = 4*1024*1024;
+ for (v = RDWR_MPT_VIRT_START, v2 = RO_MPT_VIRT_START;
+ v != RDWR_MPT_VIRT_END && mpt_size > (v - RDWR_MPT_VIRT_START);
+ v += (1 << L2_PAGETABLE_SHIFT), v2 += (1 << L2_PAGETABLE_SHIFT)) {
+ if ( (pg = alloc_domheap_pages(NULL, PAGETABLE_ORDER)) == NULL )
+ panic("Not enough memory to bootstrap Xen.\n");
+ idle_pg_table_l2[l2_linear_offset(v)] =
+ l2e_create_page(pg, __PAGE_HYPERVISOR | _PAGE_PSE);
+ idle_pg_table_l2[l2_linear_offset(v2)] =
+ l2e_create_page(pg, (__PAGE_HYPERVISOR | _PAGE_PSE) & ~_PAGE_RW);
+ }
+ memset((void *)RDWR_MPT_VIRT_START, 0x55, mpt_size);
- /* Xen 4MB mappings can all be GLOBAL. */
+ /* Xen 2/4MB mappings can all be GLOBAL. */
if ( cpu_has_pge )
{
- for ( v = HYPERVISOR_VIRT_START; v; v += (1 << L2_PAGETABLE_SHIFT) )
- {
- if (l2e_get_flags(idle_pg_table[l2_table_offset(v)]) & _PAGE_PSE)
- l2e_add_flags(&idle_pg_table[l2_table_offset(v)],
- _PAGE_GLOBAL);
+ for ( v = HYPERVISOR_VIRT_START; v; v += (1 << L2_PAGETABLE_SHIFT) ) {
+ if (!l2e_get_flags(idle_pg_table_l2[l2_linear_offset(v)]) & _PAGE_PSE)
+ continue;
+ if (v >= RO_MPT_VIRT_START && v < RO_MPT_VIRT_END)
+ continue;
+ l2e_add_flags(&idle_pg_table_l2[l2_linear_offset(v)],
+ _PAGE_GLOBAL);
}
}
- /* Create page table for ioremap(). */
- ioremap_pt = (void *)alloc_xenheap_page();
- clear_page(ioremap_pt);
- idle_pg_table[l2_table_offset(IOREMAP_VIRT_START)] =
- l2e_create_page(virt_to_page(ioremap_pt), __PAGE_HYPERVISOR);
-
- /*
- * Create read-only mapping of MPT for guest-OS use.
- * NB. Remove the global bit so that shadow_mode_translate()==true domains
- * can reused this address space for their phys-to-machine mapping.
- */
- idle_pg_table[l2_table_offset(RO_MPT_VIRT_START)] =
- l2e_create_page(m2p_pg, (__PAGE_HYPERVISOR | _PAGE_PSE) & ~_PAGE_RW);
+ /* Create page table(s) for ioremap(). */
+ for (v = IOREMAP_VIRT_START; v != IOREMAP_VIRT_END; v += (1 << L2_PAGETABLE_SHIFT)) {
+ ioremap_pt = (void *)alloc_xenheap_page();
+ clear_page(ioremap_pt);
+ idle_pg_table_l2[l2_linear_offset(v)] =
+ l2e_create_page(virt_to_page(ioremap_pt), __PAGE_HYPERVISOR);
+ }
/* Set up mapping cache for domain pages. */
- mapcache = (l1_pgentry_t *)alloc_xenheap_page();
- clear_page(mapcache);
- idle_pg_table[l2_table_offset(MAPCACHE_VIRT_START)] =
- l2e_create_page(virt_to_page(mapcache), __PAGE_HYPERVISOR);
-
- /* Set up linear page table mapping. */
- idle_pg_table[l2_table_offset(LINEAR_PT_VIRT_START)] =
- l2e_create_page(virt_to_page(idle_pg_table), __PAGE_HYPERVISOR);
+ mapcache = (l1_pgentry_t*)alloc_xenheap_pages(10-PAGETABLE_ORDER);
+ for (v = MAPCACHE_VIRT_START, i = 0;
+ v != MAPCACHE_VIRT_END;
+ v += (1 << L2_PAGETABLE_SHIFT), i++) {
+ clear_page(mapcache + i*L1_PAGETABLE_ENTRIES);
+ idle_pg_table_l2[l2_linear_offset(v)] =
+ l2e_create_page(virt_to_page(mapcache + i*L1_PAGETABLE_ENTRIES),
+ __PAGE_HYPERVISOR);
+ }
+
+ for (v = LINEAR_PT_VIRT_START; v != LINEAR_PT_VIRT_END; v += (1 << L2_PAGETABLE_SHIFT)) {
+ idle_pg_table_l2[l2_linear_offset(v)] =
+ l2e_create_page(virt_to_page(idle_pg_table_l2 + ((v-RDWR_MPT_VIRT_START) >> PAGETABLE_ORDER)),
+ __PAGE_HYPERVISOR);
+ }
}
-void __init zap_low_mappings(void)
+void __init zap_low_mappings(l2_pgentry_t *base)
{
int i;
- for ( i = 0; i < DOMAIN_ENTRIES_PER_L2_PAGETABLE; i++ )
- idle_pg_table[i] = l2e_empty();
+ u32 addr;
+
+ for (i = 0; ; i++) {
+ addr = (i << L2_PAGETABLE_SHIFT);
+ if (addr >= HYPERVISOR_VIRT_START)
+ break;
+ if (l2e_get_phys(base[i]) != addr)
+ continue;
+ base[i] = l2e_empty();
+ }
flush_tlb_all_pge();
}
void subarch_init_memory(struct domain *dom_xen)
{
- unsigned long i, m2p_start_mfn;
+ unsigned long i, v, m2p_start_mfn;
/*
* We are rather picky about the layout of 'struct pfn_info'. The
@@ -129,19 +164,24 @@ void subarch_init_memory(struct domain *dom_xen)
offsetof(struct pfn_info, count_info),
offsetof(struct pfn_info, u.inuse._domain),
sizeof(struct pfn_info));
- for ( ; ; ) ;
+ for ( ; ; )
+ __asm__ __volatile__ ( "hlt" );
}
/* M2P table is mappable read-only by privileged domains. */
- m2p_start_mfn = l2e_get_pfn(
- idle_pg_table[l2_table_offset(RDWR_MPT_VIRT_START)]);
- for ( i = 0; i < 1024; i++ )
- {
- frame_table[m2p_start_mfn+i].count_info = PGC_allocated | 1;
- /* gdt to make sure it's only mapped read-only by non-privileged
- domains. */
- frame_table[m2p_start_mfn+i].u.inuse.type_info = PGT_gdt_page | 1;
- page_set_owner(&frame_table[m2p_start_mfn+i], dom_xen);
+ for (v = RDWR_MPT_VIRT_START;
+ v != RDWR_MPT_VIRT_END && mpt_size > (v - RDWR_MPT_VIRT_START);
+ v += (1 << L2_PAGETABLE_SHIFT)) {
+ m2p_start_mfn = l2e_get_pfn(
+ idle_pg_table_l2[l2_linear_offset(v)]);
+ for ( i = 0; i < L2_PAGETABLE_ENTRIES; i++ )
+ {
+ frame_table[m2p_start_mfn+i].count_info = PGC_allocated | 1;
+ /* gdt to make sure it's only mapped read-only by non-privileged
+ domains. */
+ frame_table[m2p_start_mfn+i].u.inuse.type_info = PGT_gdt_page | 1;
+ page_set_owner(&frame_table[m2p_start_mfn+i], dom_xen);
+ }
}
}
diff --git a/xen/arch/x86/x86_32/traps.c b/xen/arch/x86/x86_32/traps.c
index f545efcba4..8bb876f3f3 100644
--- a/xen/arch/x86/x86_32/traps.c
+++ b/xen/arch/x86/x86_32/traps.c
@@ -92,21 +92,24 @@ void show_registers(struct cpu_user_regs *regs)
void show_page_walk(unsigned long addr)
{
- unsigned long page;
+ l2_pgentry_t pmd;
+ l1_pgentry_t *pte;
if ( addr < PAGE_OFFSET )
return;
printk("Pagetable walk from %08lx:\n", addr);
- page = l2e_get_value(idle_pg_table[l2_table_offset(addr)]);
- printk(" L2 = %08lx %s\n", page, (page & _PAGE_PSE) ? "(4MB)" : "");
- if ( !(page & _PAGE_PRESENT) || (page & _PAGE_PSE) )
+ pmd = idle_pg_table_l2[l2_linear_offset(addr)];
+ printk(" L2 = %08llx %s\n", (u64)l2e_get_value(pmd),
+ (l2e_get_flags(pmd) & _PAGE_PSE) ? "(2/4MB)" : "");
+ if ( !(l2e_get_flags(pmd) & _PAGE_PRESENT) ||
+ (l2e_get_flags(pmd) & _PAGE_PSE) )
return;
- page &= PAGE_MASK;
- page = ((unsigned long *) __va(page))[l1_table_offset(addr)];
- printk(" L1 = %08lx\n", page);
+ pte = __va(l2e_get_phys(pmd));
+ pte += l1_table_offset(addr);
+ printk(" L1 = %08llx\n", (u64)l1e_get_value(*pte));
}
#define DOUBLEFAULT_STACK_SIZE 1024
diff --git a/xen/arch/x86/x86_64/mm.c b/xen/arch/x86/x86_64/mm.c
index 7fcb8cb51c..147d4eed31 100644
--- a/xen/arch/x86/x86_64/mm.c
+++ b/xen/arch/x86/x86_64/mm.c
@@ -79,6 +79,8 @@ void __init paging_init(void)
l2_pgentry_t *l2_ro_mpt;
struct pfn_info *pg;
+ idle0_exec_domain.arch.monitor_table = mk_pagetable(__pa(idle_pg_table));
+
/* Create user-accessible L2 directory to map the MPT for guests. */
l3_ro_mpt = (l3_pgentry_t *)alloc_xenheap_page();
clear_page(l3_ro_mpt);
diff --git a/xen/include/asm-x86/config.h b/xen/include/asm-x86/config.h
index 4f3925e894..48fe47a953 100644
--- a/xen/include/asm-x86/config.h
+++ b/xen/include/asm-x86/config.h
@@ -7,6 +7,19 @@
#ifndef __X86_CONFIG_H__
#define __X86_CONFIG_H__
+#if defined(__i386__)
+// # define CONFIG_X86_PAE 1 /* yes */
+ # undef CONFIG_X86_PAE /* no */
+#endif
+
+#if defined(__x86_64__)
+# define CONFIG_PAGING_LEVELS 4
+#elif defined(CONFIG_X86_PAE)
+# define CONFIG_PAGING_LEVELS 3
+#else
+# define CONFIG_PAGING_LEVELS 2
+#endif
+
#define CONFIG_X86 1
#define CONFIG_X86_HT 1
#define CONFIG_SHADOW 1
@@ -189,7 +202,7 @@ extern unsigned long _end; /* standard ELF symbol */
* Per-domain mappings ( 4MB)
* Shadow linear pagetable ( 4MB) ( 8MB)
* Guest linear pagetable ( 4MB) ( 8MB)
- * Machine-to-physical translation table [writable] ( 4MB)
+ * Machine-to-physical translation table [writable] ( 4MB) (16MB)
* Frame-info table (24MB) (96MB)
* * Start of guest inaccessible area
* Machine-to-physical translation table [read-only] ( 4MB)
@@ -203,8 +216,8 @@ extern unsigned long _end; /* standard ELF symbol */
#ifdef CONFIG_X86_PAE
# define LINEARPT_MBYTES 8
-# define MACHPHYS_MBYTES 4 /* KAF: This needs to be bigger */
-# define FRAMETABLE_MBYTES 96 /* 16 GB mem limit (total) */
+# define MACHPHYS_MBYTES 16 /* 1 MB needed per 1 GB memory */
+# define FRAMETABLE_MBYTES (MACHPHYS_MBYTES * 6)
#else
# define LINEARPT_MBYTES 4
# define MACHPHYS_MBYTES 4
@@ -237,21 +250,21 @@ extern unsigned long _end; /* standard ELF symbol */
#define GUEST_SEGMENT_MAX_ADDR RO_MPT_VIRT_END
#ifdef CONFIG_X86_PAE
-/* Hypervisor owns top 144MB of virtual address space. */
-# define __HYPERVISOR_VIRT_START 0xF7000000
-# define HYPERVISOR_VIRT_START (0xF7000000UL)
+/* Hypervisor owns top 168MB of virtual address space. */
+# define __HYPERVISOR_VIRT_START 0xF5800000
+# define HYPERVISOR_VIRT_START (0xF5800000UL)
#else
/* Hypervisor owns top 64MB of virtual address space. */
# define __HYPERVISOR_VIRT_START 0xFC000000
# define HYPERVISOR_VIRT_START (0xFC000000UL)
#endif
-#define ROOT_PAGETABLE_FIRST_XEN_SLOT \
+#define L2_PAGETABLE_FIRST_XEN_SLOT \
(HYPERVISOR_VIRT_START >> L2_PAGETABLE_SHIFT)
-#define ROOT_PAGETABLE_LAST_XEN_SLOT \
+#define L2_PAGETABLE_LAST_XEN_SLOT \
(~0UL >> L2_PAGETABLE_SHIFT)
-#define ROOT_PAGETABLE_XEN_SLOTS \
- (ROOT_PAGETABLE_LAST_XEN_SLOT - ROOT_PAGETABLE_FIRST_XEN_SLOT + 1)
+#define L2_PAGETABLE_XEN_SLOTS \
+ (L2_PAGETABLE_LAST_XEN_SLOT - L2_PAGETABLE_FIRST_XEN_SLOT + 1)
#define PGT_base_page_table PGT_l2_page_table
diff --git a/xen/include/asm-x86/domain.h b/xen/include/asm-x86/domain.h
index 98da7e017a..7c9aa59c62 100644
--- a/xen/include/asm-x86/domain.h
+++ b/xen/include/asm-x86/domain.h
@@ -119,12 +119,6 @@ struct arch_exec_domain
unsigned long shadow_ldt_mapcnt;
} __cacheline_aligned;
-#define IDLE0_ARCH_EXEC_DOMAIN \
-{ \
- perdomain_ptes: 0, \
- monitor_table: mk_pagetable(__pa(idle_pg_table)) \
-}
-
#endif /* __ASM_DOMAIN_H__ */
/*
diff --git a/xen/include/asm-x86/mm.h b/xen/include/asm-x86/mm.h
index e8a9617980..ac0d3cd40b 100644
--- a/xen/include/asm-x86/mm.h
+++ b/xen/include/asm-x86/mm.h
@@ -76,15 +76,15 @@ struct pfn_info
/* Owning guest has pinned this page to its current type? */
#define _PGT_pinned 27
#define PGT_pinned (1U<<_PGT_pinned)
- /* The 10 most significant bits of virt address if this is a page table. */
-#define PGT_va_shift 17
-#define PGT_va_mask (((1U<<10)-1)<<PGT_va_shift)
+ /* The 11 most significant bits of virt address if this is a page table. */
+#define PGT_va_shift 16
+#define PGT_va_mask (((1U<<11)-1)<<PGT_va_shift)
/* Is the back pointer still mutable (i.e. not fixed yet)? */
-#define PGT_va_mutable (((1U<<10)-1)<<PGT_va_shift)
+#define PGT_va_mutable (((1U<<11)-1)<<PGT_va_shift)
/* Is the back pointer unknown (e.g., p.t. is mapped at multiple VAs)? */
-#define PGT_va_unknown (((1U<<10)-2)<<PGT_va_shift)
- /* 17-bit count of uses of this frame as its current type. */
-#define PGT_count_mask ((1U<<17)-1)
+#define PGT_va_unknown (((1U<<11)-2)<<PGT_va_shift)
+ /* 16-bit count of uses of this frame as its current type. */
+#define PGT_count_mask ((1U<<16)-1)
#define PGT_mfn_mask ((1U<<20)-1) /* mfn mask for shadow types */
diff --git a/xen/include/asm-x86/page.h b/xen/include/asm-x86/page.h
index 30f1a8fe93..e82303b6e7 100644
--- a/xen/include/asm-x86/page.h
+++ b/xen/include/asm-x86/page.h
@@ -8,23 +8,132 @@
#define PAGE_SIZE (1 << PAGE_SHIFT)
#endif
#define PAGE_MASK (~(PAGE_SIZE-1))
+#define PAGE_FLAG_MASK (~0U)
+
+#ifndef __ASSEMBLY__
+# include <asm/types.h>
+#endif
#if defined(__i386__)
-#include <asm/x86_32/page.h>
+# include <asm/x86_32/page.h>
#elif defined(__x86_64__)
-#include <asm/x86_64/page.h>
+# include <asm/x86_64/page.h>
#endif
+/* Get pte contents as an integer (intpte_t). */
+#define l1e_get_value(x) ((x).l1)
+#define l2e_get_value(x) ((x).l2)
+#define l3e_get_value(x) ((x).l3)
+#define l4e_get_value(x) ((x).l4)
+
+/* Get pfn mapped by pte (unsigned long). */
+#define l1e_get_pfn(x) \
+ ((unsigned long)(((x).l1 & (PADDR_MASK&PAGE_MASK)) >> PAGE_SHIFT))
+#define l2e_get_pfn(x) \
+ ((unsigned long)(((x).l2 & (PADDR_MASK&PAGE_MASK)) >> PAGE_SHIFT))
+#define l3e_get_pfn(x) \
+ ((unsigned long)(((x).l3 & (PADDR_MASK&PAGE_MASK)) >> PAGE_SHIFT))
+#define l4e_get_pfn(x) \
+ ((unsigned long)(((x).l4 & (PADDR_MASK&PAGE_MASK)) >> PAGE_SHIFT))
+
+/* Get physical address of page mapped by pte (physaddr_t). */
+#define l1e_get_phys(x) \
+ ((physaddr_t)(((x).l1 & (PADDR_MASK&PAGE_MASK))))
+#define l2e_get_phys(x) \
+ ((physaddr_t)(((x).l2 & (PADDR_MASK&PAGE_MASK))))
+#define l3e_get_phys(x) \
+ ((physaddr_t)(((x).l3 & (PADDR_MASK&PAGE_MASK))))
+#define l4e_get_phys(x) \
+ ((physaddr_t)(((x).l4 & (PADDR_MASK&PAGE_MASK))))
+
+/* Get pte access flags (unsigned int). */
+#define l1e_get_flags(x) (get_pte_flags((x).l1))
+#define l2e_get_flags(x) (get_pte_flags((x).l2))
+#define l3e_get_flags(x) (get_pte_flags((x).l3))
+#define l4e_get_flags(x) (get_pte_flags((x).l4))
+
+/* Construct an empty pte. */
+#define l1e_empty() ((l1_pgentry_t) { 0 })
+#define l2e_empty() ((l2_pgentry_t) { 0 })
+#define l3e_empty() ((l3_pgentry_t) { 0 })
+#define l4e_empty() ((l4_pgentry_t) { 0 })
+
+/* Construct a pte from a pfn and access flags. */
+#define l1e_create_pfn(pfn, flags) \
+ ((l1_pgentry_t) { ((intpte_t)(pfn) << PAGE_SHIFT) | put_pte_flags(flags) })
+#define l2e_create_pfn(pfn, flags) \
+ ((l2_pgentry_t) { ((intpte_t)(pfn) << PAGE_SHIFT) | put_pte_flags(flags) })
+#define l3e_create_pfn(pfn, flags) \
+ ((l3_pgentry_t) { ((intpte_t)(pfn) << PAGE_SHIFT) | put_pte_flags(flags) })
+#define l4e_create_pfn(pfn, flags) \
+ ((l4_pgentry_t) { ((intpte_t)(pfn) << PAGE_SHIFT) | put_pte_flags(flags) })
+
+/* Construct a pte from a physical address and access flags. */
+#define l1e_create_phys(pa, flags) \
+ ((l1_pgentry_t) { (pa) | put_pte_flags(flags) })
+#define l2e_create_phys(pa, flags) \
+ ((l2_pgentry_t) { (pa) | put_pte_flags(flags) })
+#define l3e_create_phys(pa, flags) \
+ ((l3_pgentry_t) { (pa) | put_pte_flags(flags) })
+#define l4e_create_phys(pa, flags) \
+ ((l4_pgentry_t) { (pa) | put_pte_flags(flags) })
+
+/* Add extra flags to an existing pte. */
+#define l1e_add_flags(x, flags) ((x)->l1 |= put_pte_flags(flags))
+#define l2e_add_flags(x, flags) ((x)->l2 |= put_pte_flags(flags))
+#define l3e_add_flags(x, flags) ((x)->l3 |= put_pte_flags(flags))
+#define l4e_add_flags(x, flags) ((x)->l4 |= put_pte_flags(flags))
+
+/* Remove flags from an existing pte. */
+#define l1e_remove_flags(x, flags) ((x)->l1 &= ~put_pte_flags(flags))
+#define l2e_remove_flags(x, flags) ((x)->l2 &= ~put_pte_flags(flags))
+#define l3e_remove_flags(x, flags) ((x)->l3 &= ~put_pte_flags(flags))
+#define l4e_remove_flags(x, flags) ((x)->l4 &= ~put_pte_flags(flags))
+
+/* Check if a pte's page mapping or significant access flags have changed. */
+#define l1e_has_changed(x,y,flags) \
+ ( !!(((x)->l1 ^ (y)->l1) & ((PADDR_MASK&PAGE_MASK)|put_pte_flags(flags))) )
+#define l2e_has_changed(x,y,flags) \
+ ( !!(((x)->l2 ^ (y)->l2) & ((PADDR_MASK&PAGE_MASK)|put_pte_flags(flags))) )
+#define l3e_has_changed(x,y,flags) \
+ ( !!(((x)->l3 ^ (y)->l3) & ((PADDR_MASK&PAGE_MASK)|put_pte_flags(flags))) )
+#define l4e_has_changed(x,y,flags) \
+ ( !!(((x)->l4 ^ (y)->l4) & ((PADDR_MASK&PAGE_MASK)|put_pte_flags(flags))) )
+
+/* Pagetable walking. */
+#define l2e_to_l1e(x) ((l1_pgentry_t *)__va(l2e_get_phys(x)))
+#define l3e_to_l2e(x) ((l2_pgentry_t *)__va(l3e_get_phys(x)))
+#define l4e_to_l3e(x) ((l3_pgentry_t *)__va(l4e_get_phys(x)))
+
+/* Given a virtual address, get an entry offset into a page table. */
+#define l1_table_offset(a) \
+ (((a) >> L1_PAGETABLE_SHIFT) & (L1_PAGETABLE_ENTRIES - 1))
+#define l2_table_offset(a) \
+ (((a) >> L2_PAGETABLE_SHIFT) & (L2_PAGETABLE_ENTRIES - 1))
+#define l3_table_offset(a) \
+ (((a) >> L3_PAGETABLE_SHIFT) & (L3_PAGETABLE_ENTRIES - 1))
+#define l4_table_offset(a) \
+ (((a) >> L4_PAGETABLE_SHIFT) & (L4_PAGETABLE_ENTRIES - 1))
+
/* Convert a pointer to a page-table entry into pagetable slot index. */
#define pgentry_ptr_to_slot(_p) \
(((unsigned long)(_p) & ~PAGE_MASK) / sizeof(*(_p)))
/* Page-table type. */
#ifndef __ASSEMBLY__
-typedef struct { unsigned long pt_lo; } pagetable_t;
-#define pagetable_val(_x) ((_x).pt_lo)
-#define pagetable_get_pfn(_x) ((_x).pt_lo >> PAGE_SHIFT)
-#define mk_pagetable(_x) ( (pagetable_t) { (_x) } )
+#if CONFIG_PAGING_LEVELS == 2
+/* x86_32 default */
+typedef struct { u32 pfn; } pagetable_t;
+#elif CONFIG_PAGING_LEVELS == 3
+/* x86_32 PAE */
+typedef struct { u32 pfn; } pagetable_t;
+#elif CONFIG_PAGING_LEVELS == 4
+/* x86_64 */
+typedef struct { u64 pfn; } pagetable_t;
+#endif
+#define pagetable_get_phys(_x) ((physaddr_t)(_x).pfn << PAGE_SHIFT)
+#define pagetable_get_pfn(_x) ((_x).pfn)
+#define mk_pagetable(_phys) ({ pagetable_t __p; __p.pfn = _phys >> PAGE_SHIFT; __p; })
#endif
#define clear_page(_p) memset((void *)(_p), 0, PAGE_SIZE)
@@ -49,6 +158,7 @@ typedef struct { unsigned long pt_lo; } pagetable_t;
#define l4e_create_page(_x,_y) (l4e_create_pfn(page_to_pfn(_x),(_y)))
/* High table entries are reserved by the hypervisor. */
+/* FIXME: this breaks with PAE -- kraxel */
#define DOMAIN_ENTRIES_PER_L2_PAGETABLE \
(HYPERVISOR_VIRT_START >> L2_PAGETABLE_SHIFT)
#define HYPERVISOR_ENTRIES_PER_L2_PAGETABLE \
@@ -78,7 +188,14 @@ typedef struct { unsigned long pt_lo; } pagetable_t;
(l2e_get_pfn(linear_l2_table(_ed)[_va>>L2_PAGETABLE_SHIFT]))
#ifndef __ASSEMBLY__
+#if CONFIG_PAGING_LEVELS == 3
+extern root_pgentry_t idle_pg_table[ROOT_PAGETABLE_ENTRIES];
+extern l3_pgentry_t idle_pg_table_l3[ROOT_PAGETABLE_ENTRIES];
+extern l2_pgentry_t idle_pg_table_l2[ROOT_PAGETABLE_ENTRIES*L2_PAGETABLE_ENTRIES];
+#else
extern root_pgentry_t idle_pg_table[ROOT_PAGETABLE_ENTRIES];
+extern l2_pgentry_t idle_pg_table_l2[ROOT_PAGETABLE_ENTRIES];
+#endif
extern void paging_init(void);
#endif
@@ -96,17 +213,17 @@ extern void paging_init(void);
: : "r" (mmu_cr4_features) ); \
} while ( 0 )
-#define _PAGE_PRESENT 0x001UL
-#define _PAGE_RW 0x002UL
-#define _PAGE_USER 0x004UL
-#define _PAGE_PWT 0x008UL
-#define _PAGE_PCD 0x010UL
-#define _PAGE_ACCESSED 0x020UL
-#define _PAGE_DIRTY 0x040UL
-#define _PAGE_PAT 0x080UL
-#define _PAGE_PSE 0x080UL
-#define _PAGE_GLOBAL 0x100UL
-#define _PAGE_AVAIL 0xe00UL
+#define _PAGE_PRESENT 0x001U
+#define _PAGE_RW 0x002U
+#define _PAGE_USER 0x004U
+#define _PAGE_PWT 0x008U
+#define _PAGE_PCD 0x010U
+#define _PAGE_ACCESSED 0x020U
+#define _PAGE_DIRTY 0x040U
+#define _PAGE_PAT 0x080U
+#define _PAGE_PSE 0x080U
+#define _PAGE_GLOBAL 0x100U
+#define _PAGE_AVAIL 0xE00U
#define __PAGE_HYPERVISOR \
(_PAGE_PRESENT | _PAGE_RW | _PAGE_DIRTY | _PAGE_ACCESSED)
diff --git a/xen/include/asm-x86/shadow.h b/xen/include/asm-x86/shadow.h
index 42a9c1e6f1..665e5e9026 100644
--- a/xen/include/asm-x86/shadow.h
+++ b/xen/include/asm-x86/shadow.h
@@ -130,10 +130,12 @@ extern void shadow_l1_normal_pt_update(struct domain *d,
extern void shadow_l2_normal_pt_update(struct domain *d,
unsigned long pa, l2_pgentry_t l2e,
struct map_dom_mem_cache *cache);
-#ifdef __x86_64__
+#if CONFIG_PAGING_LEVELS >= 3
extern void shadow_l3_normal_pt_update(struct domain *d,
unsigned long pa, l3_pgentry_t l3e,
struct map_dom_mem_cache *cache);
+#endif
+#if CONFIG_PAGING_LEVELS >= 4
extern void shadow_l4_normal_pt_update(struct domain *d,
unsigned long pa, l4_pgentry_t l4e,
struct map_dom_mem_cache *cache);
@@ -1682,7 +1684,7 @@ static inline void update_pagetables(struct exec_domain *ed)
// HACK ALERT: there's currently no easy way to figure out if a domU
// has set its arch.guest_table to zero, vs not yet initialized it.
//
- paging_enabled = !!pagetable_val(ed->arch.guest_table);
+ paging_enabled = !!pagetable_get_phys(ed->arch.guest_table);
/*
* We don't call __update_pagetables() when vmx guest paging is
diff --git a/xen/include/asm-x86/smp.h b/xen/include/asm-x86/smp.h
index 336cab889f..08995644e8 100644
--- a/xen/include/asm-x86/smp.h
+++ b/xen/include/asm-x86/smp.h
@@ -39,7 +39,12 @@ extern cpumask_t cpu_sibling_map[];
extern void smp_flush_tlb(void);
extern void smp_invalidate_rcv(void); /* Process an NMI */
extern void (*mtrr_hook) (void);
-extern void zap_low_mappings (void);
+
+#ifdef CONFIG_X86_64
+extern void zap_low_mappings(void);
+#else
+extern void zap_low_mappings(l2_pgentry_t *base);
+#endif
#define MAX_APICID 256
extern u8 x86_cpu_to_apicid[];
diff --git a/xen/include/asm-x86/types.h b/xen/include/asm-x86/types.h
index 9bb1f6ec85..5dd6c6c1af 100644
--- a/xen/include/asm-x86/types.h
+++ b/xen/include/asm-x86/types.h
@@ -44,11 +44,17 @@ typedef signed long long s64;
typedef unsigned long long u64;
#define BITS_PER_LONG 32
typedef unsigned int size_t;
+#if defined(CONFIG_X86_PAE)
+typedef u64 physaddr_t;
+#else
+typedef u32 physaddr_t;
+#endif
#elif defined(__x86_64__)
typedef signed long s64;
typedef unsigned long u64;
#define BITS_PER_LONG 64
typedef unsigned long size_t;
+typedef u64 physaddr_t;
#endif
/* DMA addresses come in generic and 64-bit flavours. */
diff --git a/xen/include/asm-x86/x86_32/page-2level.h b/xen/include/asm-x86/x86_32/page-2level.h
new file mode 100644
index 0000000000..2c029b2317
--- /dev/null
+++ b/xen/include/asm-x86/x86_32/page-2level.h
@@ -0,0 +1,49 @@
+#ifndef __X86_32_PAGE_2L_H__
+#define __X86_32_PAGE_2L_H__
+
+#define L1_PAGETABLE_SHIFT 12
+#define L2_PAGETABLE_SHIFT 22
+#define PAGE_SHIFT L1_PAGETABLE_SHIFT
+#define ROOT_PAGETABLE_SHIFT L2_PAGETABLE_SHIFT
+
+#define PAGETABLE_ORDER 10
+#define L1_PAGETABLE_ENTRIES (1<<PAGETABLE_ORDER)
+#define L2_PAGETABLE_ENTRIES (1<<PAGETABLE_ORDER)
+#define ROOT_PAGETABLE_ENTRIES L2_PAGETABLE_ENTRIES
+
+#define PADDR_BITS 32
+#define PADDR_MASK (~0UL)
+
+#ifndef __ASSEMBLY__
+
+#include <asm/types.h>
+
+/* read access (should only be used for debug printk's) */
+typedef u32 intpte_t;
+#define PRIpte "08x"
+
+typedef struct { intpte_t l1; } l1_pgentry_t;
+typedef struct { intpte_t l2; } l2_pgentry_t;
+typedef l2_pgentry_t root_pgentry_t;
+
+#endif /* !__ASSEMBLY__ */
+
+/* root table */
+#define root_get_pfn l2e_get_pfn
+#define root_get_flags l2e_get_flags
+#define root_get_value l2e_get_value
+#define root_empty l2e_empty
+#define root_create_phys l2e_create_phys
+#define PGT_root_page_table PGT_l2_page_table
+
+/* misc */
+#define is_guest_l1_slot(_s) (1)
+#define is_guest_l2_slot(_t,_s) ((_s) < L2_PAGETABLE_FIRST_XEN_SLOT)
+
+#define get_pte_flags(x) ((int)(x) & 0xFFF)
+#define put_pte_flags(x) ((intpte_t)(x))
+
+#define L1_DISALLOW_MASK (0xFFFFF180U) /* PAT/GLOBAL */
+#define L2_DISALLOW_MASK (0xFFFFF180U) /* PSE/GLOBAL */
+
+#endif /* __X86_32_PAGE_2L_H__ */
diff --git a/xen/include/asm-x86/x86_32/page-3level.h b/xen/include/asm-x86/x86_32/page-3level.h
new file mode 100644
index 0000000000..d8cf21bfe5
--- /dev/null
+++ b/xen/include/asm-x86/x86_32/page-3level.h
@@ -0,0 +1,56 @@
+#ifndef __X86_32_PAGE_3L_H__
+#define __X86_32_PAGE_3L_H__
+
+#define L1_PAGETABLE_SHIFT 12
+#define L2_PAGETABLE_SHIFT 21
+#define L3_PAGETABLE_SHIFT 30
+#define PAGE_SHIFT L1_PAGETABLE_SHIFT
+#define ROOT_PAGETABLE_SHIFT L3_PAGETABLE_SHIFT
+
+#define PAGETABLE_ORDER 9
+#define L1_PAGETABLE_ENTRIES (1<<PAGETABLE_ORDER)
+#define L2_PAGETABLE_ENTRIES (1<<PAGETABLE_ORDER)
+#define L3_PAGETABLE_ENTRIES 4
+#define ROOT_PAGETABLE_ENTRIES L3_PAGETABLE_ENTRIES
+
+#define PADDR_BITS 52
+#define PADDR_MASK ((1ULL << PADDR_BITS)-1)
+
+#ifndef __ASSEMBLY__
+
+#include <asm/types.h>
+
+/* read access (should only be used for debug printk's) */
+typedef u64 intpte_t;
+#define PRIpte "016llx"
+
+typedef struct { intpte_t l1; } l1_pgentry_t;
+typedef struct { intpte_t l2; } l2_pgentry_t;
+typedef struct { intpte_t l3; } l3_pgentry_t;
+typedef l3_pgentry_t root_pgentry_t;
+
+#endif /* !__ASSEMBLY__ */
+
+/* root table */
+#define root_get_pfn l3e_get_pfn
+#define root_get_flags l3e_get_flags
+#define root_get_value l3e_get_value
+#define root_empty l3e_empty
+#define root_init_phys l3e_create_phys
+#define PGT_root_page_table PGT_l3_page_table
+
+/* misc */
+#define is_guest_l1_slot(_s) (1)
+#define is_guest_l2_slot(_t,_s) \
+ ((3 != (((_t) & PGT_va_mask) >> PGT_va_shift)) || \
+ ((_s) < (L2_PAGETABLE_FIRST_XEN_SLOT & (L2_PAGETABLE_ENTRIES-1))))
+#define is_guest_l3_slot(_s) (1)
+
+#define get_pte_flags(x) ((int)((x) >> 40) | ((int)(x) & 0xFFF))
+#define put_pte_flags(x) ((((intpte_t)((x) & ~0xFFF)) << 40) | ((x) & 0xFFF))
+
+#define L1_DISALLOW_MASK (0xFFFFF180U & ~_PAGE_NX) /* PAT/GLOBAL */
+#define L2_DISALLOW_MASK (0xFFFFF180U & ~_PAGE_NX) /* PSE/GLOBAL */
+#define L3_DISALLOW_MASK (0xFFFFF1E6U) /* must-be-zero */
+
+#endif /* __X86_32_PAGE_3L_H__ */
diff --git a/xen/include/asm-x86/x86_32/page.h b/xen/include/asm-x86/x86_32/page.h
index b48971921c..988da65811 100644
--- a/xen/include/asm-x86/x86_32/page.h
+++ b/xen/include/asm-x86/x86_32/page.h
@@ -2,134 +2,23 @@
#ifndef __X86_32_PAGE_H__
#define __X86_32_PAGE_H__
-#define L1_PAGETABLE_SHIFT 12
-#define L2_PAGETABLE_SHIFT 22
-#define PAGE_SHIFT L1_PAGETABLE_SHIFT
-#define ROOT_PAGETABLE_SHIFT L2_PAGETABLE_SHIFT
-
-#define PAGETABLE_ORDER 10
-#define L1_PAGETABLE_ENTRIES (1<<PAGETABLE_ORDER)
-#define L2_PAGETABLE_ENTRIES (1<<PAGETABLE_ORDER)
-#define ROOT_PAGETABLE_ENTRIES L2_PAGETABLE_ENTRIES
-
#define __PAGE_OFFSET (0xFF000000)
-#define PADDR_BITS 32
#define VADDR_BITS 32
-#define PADDR_MASK (~0UL)
#define VADDR_MASK (~0UL)
-#define _PAGE_NX 0UL
-#define PAGE_FLAG_MASK 0xfff
+#define _PAGE_NX 0U
-#ifndef __ASSEMBLY__
#include <xen/config.h>
-#include <asm/types.h>
-typedef struct { u32 l1_lo; } l1_pgentry_t;
-typedef struct { u32 l2_lo; } l2_pgentry_t;
-typedef l2_pgentry_t root_pgentry_t;
-
-/* read access (deprecated) */
-#define l1e_get_value(_x) ((unsigned long)((_x).l1_lo))
-#define l2e_get_value(_x) ((unsigned long)((_x).l2_lo))
-
-/* read access */
-#define l1e_get_pfn(_x) ((unsigned long)((_x).l1_lo >> PAGE_SHIFT))
-#define l1e_get_phys(_x) ((unsigned long)((_x).l1_lo & PAGE_MASK))
-#define l1e_get_flags(_x) ((unsigned long)((_x).l1_lo & PAGE_FLAG_MASK))
-
-#define l2e_get_pfn(_x) ((unsigned long)((_x).l2_lo >> PAGE_SHIFT))
-#define l2e_get_phys(_x) ((unsigned long)((_x).l2_lo & PAGE_MASK))
-#define l2e_get_flags(_x) ((unsigned long)((_x).l2_lo & PAGE_FLAG_MASK))
-
-/* write access */
-static inline l1_pgentry_t l1e_empty(void)
-{
- l1_pgentry_t e = { .l1_lo = 0 };
- return e;
-}
-static inline l1_pgentry_t l1e_create_pfn(u32 pfn, u32 flags)
-{
- l1_pgentry_t e = { .l1_lo = (pfn << PAGE_SHIFT) | flags };
- return e;
-}
-static inline l1_pgentry_t l1e_create_phys(u32 addr, u32 flags)
-{
- l1_pgentry_t e = { .l1_lo = (addr & PAGE_MASK) | flags };
- return e;
-}
-static inline void l1e_add_flags(l1_pgentry_t *e, u32 flags)
-{
- e->l1_lo |= flags;
-}
-static inline void l1e_remove_flags(l1_pgentry_t *e, u32 flags)
-{
- e->l1_lo &= ~flags;
-}
-
-static inline l2_pgentry_t l2e_empty(void)
-{
- l2_pgentry_t e = { .l2_lo = 0 };
- return e;
-}
-static inline l2_pgentry_t l2e_create_pfn(u32 pfn, u32 flags)
-{
- l2_pgentry_t e = { .l2_lo = (pfn << PAGE_SHIFT) | flags };
- return e;
-}
-static inline l2_pgentry_t l2e_create_phys(u32 addr, u32 flags)
-{
- l2_pgentry_t e = { .l2_lo = (addr & PAGE_MASK) | flags };
- return e;
-}
-static inline void l2e_add_flags(l2_pgentry_t *e, u32 flags)
-{
- e->l2_lo |= flags;
-}
-static inline void l2e_remove_flags(l2_pgentry_t *e, u32 flags)
-{
- e->l2_lo &= ~flags;
-}
-
-/* check entries */
-static inline int l1e_has_changed(l1_pgentry_t *e1, l1_pgentry_t *e2, u32 flags)
-{
- return ((e1->l1_lo ^ e2->l1_lo) & (PAGE_MASK | flags)) != 0;
-}
-static inline int l2e_has_changed(l2_pgentry_t *e1, l2_pgentry_t *e2, u32 flags)
-{
- return ((e1->l2_lo ^ e2->l2_lo) & (PAGE_MASK | flags)) != 0;
-}
-
-#endif /* !__ASSEMBLY__ */
-
-/* Pagetable walking. */
-#define l2e_to_l1e(_x) \
- ((l1_pgentry_t *)__va(l2e_get_phys(_x)))
-
-/* Given a virtual address, get an entry offset into a page table. */
-#define l1_table_offset(_a) \
- (((_a) >> L1_PAGETABLE_SHIFT) & (L1_PAGETABLE_ENTRIES - 1))
-#define l2_table_offset(_a) \
- ((_a) >> L2_PAGETABLE_SHIFT)
+#ifdef CONFIG_X86_PAE
+# include <asm/x86_32/page-3level.h>
+#else
+# include <asm/x86_32/page-2level.h>
+#endif
/* Given a virtual address, get an entry offset into a linear page table. */
-#define l1_linear_offset(_a) ((_a) >> PAGE_SHIFT)
-
-#define is_guest_l1_slot(_s) (1)
-#define is_guest_l2_slot(_s) ((_s) < ROOT_PAGETABLE_FIRST_XEN_SLOT)
-
-#define root_get_pfn l2e_get_pfn
-#define root_get_flags l2e_get_flags
-#define root_get_value l2e_get_value
-#define root_empty l2e_empty
-#define root_create_phys l2e_create_phys
-#define PGT_root_page_table PGT_l2_page_table
-
-#define L1_DISALLOW_MASK (3UL << 7)
-#define L2_DISALLOW_MASK (7UL << 7)
-#define L3_DISALLOW_MASK (7UL << 7)
-#define L4_DISALLOW_MASK (7UL << 7)
+#define l1_linear_offset(_a) ((_a) >> L1_PAGETABLE_SHIFT)
+#define l2_linear_offset(_a) ((_a) >> L2_PAGETABLE_SHIFT)
#endif /* __X86_32_PAGE_H__ */
diff --git a/xen/include/asm-x86/x86_64/page.h b/xen/include/asm-x86/x86_64/page.h
index abe56c9952..75bff5b6e9 100644
--- a/xen/include/asm-x86/x86_64/page.h
+++ b/xen/include/asm-x86/x86_64/page.h
@@ -24,181 +24,28 @@
#define PADDR_MASK ((1UL << PADDR_BITS)-1)
#define VADDR_MASK ((1UL << VADDR_BITS)-1)
-#define _PAGE_NX (cpu_has_nx ? (1UL<<63) : 0UL)
-#define PAGE_FLAG_MASK 0xfff
-
#ifndef __ASSEMBLY__
+
#include <xen/config.h>
#include <asm/types.h>
-typedef struct { u64 l1_lo; } l1_pgentry_t;
-typedef struct { u64 l2_lo; } l2_pgentry_t;
-typedef struct { u64 l3_lo; } l3_pgentry_t;
-typedef struct { u64 l4_lo; } l4_pgentry_t;
-typedef l4_pgentry_t root_pgentry_t;
-/* read access (depricated) */
-#define l1e_get_value(_x) ((_x).l1_lo)
-#define l2e_get_value(_x) ((_x).l2_lo)
-#define l3e_get_value(_x) ((_x).l3_lo)
-#define l4e_get_value(_x) ((_x).l4_lo)
-
-/* read access */
-#define l1e_get_pfn(_x) (((_x).l1_lo & (PADDR_MASK&PAGE_MASK)) >> PAGE_SHIFT)
-#define l1e_get_phys(_x) (((_x).l1_lo & (PADDR_MASK&PAGE_MASK)))
-#define l1e_get_flags(_x) ((_x).l1_lo & PAGE_FLAG_MASK)
-
-#define l2e_get_pfn(_x) (((_x).l2_lo & (PADDR_MASK&PAGE_MASK)) >> PAGE_SHIFT)
-#define l2e_get_phys(_x) (((_x).l2_lo & (PADDR_MASK&PAGE_MASK)))
-#define l2e_get_flags(_x) ((_x).l2_lo & PAGE_FLAG_MASK)
-
-#define l3e_get_pfn(_x) (((_x).l3_lo & (PADDR_MASK&PAGE_MASK)) >> PAGE_SHIFT)
-#define l3e_get_phys(_x) (((_x).l3_lo & (PADDR_MASK&PAGE_MASK)))
-#define l3e_get_flags(_x) ((_x).l3_lo & PAGE_FLAG_MASK)
-
-#define l4e_get_pfn(_x) (((_x).l4_lo & (PADDR_MASK&PAGE_MASK)) >> PAGE_SHIFT)
-#define l4e_get_phys(_x) (((_x).l4_lo & (PADDR_MASK&PAGE_MASK)))
-#define l4e_get_flags(_x) ((_x).l4_lo & PAGE_FLAG_MASK)
-
-/* write access */
-static inline l1_pgentry_t l1e_empty(void)
-{
- l1_pgentry_t e = { .l1_lo = 0 };
- return e;
-}
-static inline l1_pgentry_t l1e_create_pfn(u64 pfn, u64 flags)
-{
- l1_pgentry_t e = { .l1_lo = (pfn << PAGE_SHIFT) | flags };
- return e;
-}
-static inline l1_pgentry_t l1e_create_phys(u64 addr, u64 flags)
-{
- l1_pgentry_t e = { .l1_lo = (addr & (PADDR_MASK&PAGE_MASK)) | flags };
- return e;
-}
-static inline void l1e_add_flags(l1_pgentry_t *e, u64 flags)
-{
- e->l1_lo |= flags;
-}
-static inline void l1e_remove_flags(l1_pgentry_t *e, u64 flags)
-{
- e->l1_lo &= ~flags;
-}
-
-static inline l2_pgentry_t l2e_empty(void)
-{
- l2_pgentry_t e = { .l2_lo = 0 };
- return e;
-}
-static inline l2_pgentry_t l2e_create_pfn(u64 pfn, u64 flags)
-{
- l2_pgentry_t e = { .l2_lo = (pfn << PAGE_SHIFT) | flags };
- return e;
-}
-static inline l2_pgentry_t l2e_create_phys(u64 addr, u64 flags)
-{
- l2_pgentry_t e = { .l2_lo = (addr & (PADDR_MASK&PAGE_MASK)) | flags };
- return e;
-}
-static inline void l2e_add_flags(l2_pgentry_t *e, u64 flags)
-{
- e->l2_lo |= flags;
-}
-static inline void l2e_remove_flags(l2_pgentry_t *e, u64 flags)
-{
- e->l2_lo &= ~flags;
-}
-
-static inline l3_pgentry_t l3e_empty(void)
-{
- l3_pgentry_t e = { .l3_lo = 0 };
- return e;
-}
-static inline l3_pgentry_t l3e_create_pfn(u64 pfn, u64 flags)
-{
- l3_pgentry_t e = { .l3_lo = (pfn << PAGE_SHIFT) | flags };
- return e;
-}
-static inline l3_pgentry_t l3e_create_phys(u64 addr, u64 flags)
-{
- l3_pgentry_t e = { .l3_lo = (addr & (PADDR_MASK&PAGE_MASK)) | flags };
- return e;
-}
-static inline void l3e_add_flags(l3_pgentry_t *e, u64 flags)
-{
- e->l3_lo |= flags;
-}
-static inline void l3e_remove_flags(l3_pgentry_t *e, u64 flags)
-{
- e->l3_lo &= ~flags;
-}
-
-static inline l4_pgentry_t l4e_empty(void)
-{
- l4_pgentry_t e = { .l4_lo = 0 };
- return e;
-}
-static inline l4_pgentry_t l4e_create_pfn(u64 pfn, u64 flags)
-{
- l4_pgentry_t e = { .l4_lo = (pfn << PAGE_SHIFT) | flags };
- return e;
-}
-static inline l4_pgentry_t l4e_create_phys(u64 addr, u64 flags)
-{
- l4_pgentry_t e = { .l4_lo = (addr & (PADDR_MASK&PAGE_MASK)) | flags };
- return e;
-}
-static inline void l4e_add_flags(l4_pgentry_t *e, u64 flags)
-{
- e->l4_lo |= flags;
-}
-static inline void l4e_remove_flags(l4_pgentry_t *e, u64 flags)
-{
- e->l4_lo &= ~flags;
-}
-
-/* check entries */
-static inline int l1e_has_changed(l1_pgentry_t *e1, l1_pgentry_t *e2, u32 flags)
-{
- return ((e1->l1_lo ^ e2->l1_lo) & ((PADDR_MASK&PAGE_MASK) | flags)) != 0;
-}
-static inline int l2e_has_changed(l2_pgentry_t *e1, l2_pgentry_t *e2, u32 flags)
-{
- return ((e1->l2_lo ^ e2->l2_lo) & ((PADDR_MASK&PAGE_MASK) | flags)) != 0;
-}
-static inline int l3e_has_changed(l3_pgentry_t *e1, l3_pgentry_t *e2, u32 flags)
-{
- return ((e1->l3_lo ^ e2->l3_lo) & ((PADDR_MASK&PAGE_MASK) | flags)) != 0;
-}
-static inline int l4e_has_changed(l4_pgentry_t *e1, l4_pgentry_t *e2, u32 flags)
-{
- return ((e1->l4_lo ^ e2->l4_lo) & ((PADDR_MASK&PAGE_MASK) | flags)) != 0;
-}
+/* read access (should only be used for debug printk's) */
+typedef u64 intpte_t;
+#define PRIpte "016lx"
-#endif /* !__ASSEMBLY__ */
+typedef struct { intpte_t l1; } l1_pgentry_t;
+typedef struct { intpte_t l2; } l2_pgentry_t;
+typedef struct { intpte_t l3; } l3_pgentry_t;
+typedef struct { intpte_t l4; } l4_pgentry_t;
+typedef l4_pgentry_t root_pgentry_t;
-/* Pagetable walking. */
-#define l2e_to_l1e(_x) \
- ((l1_pgentry_t *)__va(l2e_get_phys(_x)))
-#define l3e_to_l2e(_x) \
- ((l2_pgentry_t *)__va(l3e_get_phys(_x)))
-#define l4e_to_l3e(_x) \
- ((l3_pgentry_t *)__va(l4e_get_phys(_x)))
-
-/* Given a virtual address, get an entry offset into a page table. */
-#define l1_table_offset(_a) \
- (((_a) >> L1_PAGETABLE_SHIFT) & (L1_PAGETABLE_ENTRIES - 1))
-#define l2_table_offset(_a) \
- (((_a) >> L2_PAGETABLE_SHIFT) & (L2_PAGETABLE_ENTRIES - 1))
-#define l3_table_offset(_a) \
- (((_a) >> L3_PAGETABLE_SHIFT) & (L3_PAGETABLE_ENTRIES - 1))
-#define l4_table_offset(_a) \
- (((_a) >> L4_PAGETABLE_SHIFT) & (L4_PAGETABLE_ENTRIES - 1))
+#endif /* !__ASSEMBLY__ */
/* Given a virtual address, get an entry offset into a linear page table. */
#define l1_linear_offset(_a) (((_a) & VADDR_MASK) >> PAGE_SHIFT)
#define is_guest_l1_slot(_s) (1)
-#define is_guest_l2_slot(_s) (1)
+#define is_guest_l2_slot(_t, _s) (1)
#define is_guest_l3_slot(_s) (1)
#define is_guest_l4_slot(_s) \
(((_s) < ROOT_PAGETABLE_FIRST_XEN_SLOT) || \
@@ -211,10 +58,15 @@ static inline int l4e_has_changed(l4_pgentry_t *e1, l4_pgentry_t *e2, u32 flags)
#define root_create_phys l4e_create_phys
#define PGT_root_page_table PGT_l4_page_table
-#define L1_DISALLOW_MASK ((cpu_has_nx?0:(1UL<<63)) | (3UL << 7))
-#define L2_DISALLOW_MASK ((cpu_has_nx?0:(1UL<<63)) | (7UL << 7))
-#define L3_DISALLOW_MASK ((cpu_has_nx?0:(1UL<<63)) | (7UL << 7))
-#define L4_DISALLOW_MASK ((cpu_has_nx?0:(1UL<<63)) | (7UL << 7))
+#define get_pte_flags(x) ((int)((x) >> 40) | ((int)(x) & 0xFFF))
+#define put_pte_flags(x) ((((intpte_t)((x) & ~0xFFF)) << 40) | ((x) & 0xFFF))
+
+#define _PAGE_NX (cpu_has_nx ? (1U<<23) : 0U)
+
+#define L1_DISALLOW_MASK (0xFFFFF180U & ~_PAGE_NX) /* PAT/GLOBAL */
+#define L2_DISALLOW_MASK (0xFFFFF180U & ~_PAGE_NX) /* PSE/GLOBAL */
+#define L3_DISALLOW_MASK (0xFFFFF180U & ~_PAGE_NX) /* must-be-zero */
+#define L4_DISALLOW_MASK (0xFFFFF180U & ~_PAGE_NX) /* must-be-zero */
#endif /* __X86_64_PAGE_H__ */
diff --git a/xen/include/public/arch-x86_32.h b/xen/include/public/arch-x86_32.h
index 6ebfd11399..84ba88e7f7 100644
--- a/xen/include/public/arch-x86_32.h
+++ b/xen/include/public/arch-x86_32.h
@@ -64,7 +64,11 @@
* Virtual addresses beyond this are not modifiable by guest OSes. The
* machine->physical mapping table starts at this address, read-only.
*/
-#define HYPERVISOR_VIRT_START (0xFC000000UL)
+#ifdef CONFIG_X86_PAE
+# define HYPERVISOR_VIRT_START (0xF5800000UL)
+#else
+# define HYPERVISOR_VIRT_START (0xFC000000UL)
+#endif
#ifndef machine_to_phys_mapping
#define machine_to_phys_mapping ((u32 *)HYPERVISOR_VIRT_START)
#endif