aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorkaf24@firebug.cl.cam.ac.uk <kaf24@firebug.cl.cam.ac.uk>2005-07-11 16:00:46 +0000
committerkaf24@firebug.cl.cam.ac.uk <kaf24@firebug.cl.cam.ac.uk>2005-07-11 16:00:46 +0000
commit0ea8f12a67e038a3c8a9275080dcd3325c82f8e8 (patch)
treeec2f6ff42ba7571eba63c0df5433d95f09c2b6c0
parent220d08e7f14bfdbaa97c4173e4aa15ebd4d54e35 (diff)
downloadxen-0ea8f12a67e038a3c8a9275080dcd3325c82f8e8.tar.gz
xen-0ea8f12a67e038a3c8a9275080dcd3325c82f8e8.tar.bz2
xen-0ea8f12a67e038a3c8a9275080dcd3325c82f8e8.zip
non-PAE behavior should be identical after applying these
patches, i.e. both dom0 and domU boots work as usual. In PAE mode dom0 boot works and seems to be stable, running linux kernel builds with -j12 at the moment ;) Actually using more than 4GB isn't tested yet, my machine has only one GB. Also this needs a patch for the e820 code in xen, right now xen will not even try to use memory above 4GB. Signed-off-by: Gerd Knorr <kraxel@suse.de>
-rw-r--r--linux-2.6-xen-sparse/arch/xen/i386/Kconfig10
-rw-r--r--linux-2.6-xen-sparse/arch/xen/i386/kernel/head.S12
-rw-r--r--linux-2.6-xen-sparse/arch/xen/i386/mm/hypervisor.c23
-rw-r--r--linux-2.6-xen-sparse/arch/xen/i386/mm/init.c63
-rw-r--r--linux-2.6-xen-sparse/arch/xen/i386/mm/pgtable.c1
-rw-r--r--linux-2.6-xen-sparse/include/asm-xen/asm-i386/page.h60
-rw-r--r--linux-2.6-xen-sparse/include/asm-xen/asm-i386/pgtable.h4
-rw-r--r--linux-2.6-xen-sparse/include/asm-xen/hypervisor.h16
8 files changed, 149 insertions, 40 deletions
diff --git a/linux-2.6-xen-sparse/arch/xen/i386/Kconfig b/linux-2.6-xen-sparse/arch/xen/i386/Kconfig
index f0cd7eac8f..066828d224 100644
--- a/linux-2.6-xen-sparse/arch/xen/i386/Kconfig
+++ b/linux-2.6-xen-sparse/arch/xen/i386/Kconfig
@@ -581,11 +581,11 @@ config HIGHMEM4G
Select this if you have a 32-bit processor and between 1 and 4
gigabytes of physical RAM.
-#config HIGHMEM64G
-# bool "64GB"
-# help
-# Select this if you have a 32-bit processor and more than 4
-# gigabytes of physical RAM.
+config HIGHMEM64G
+ bool "64GB"
+ help
+ Select this if you have a 32-bit processor and more than 4
+ gigabytes of physical RAM.
endchoice
diff --git a/linux-2.6-xen-sparse/arch/xen/i386/kernel/head.S b/linux-2.6-xen-sparse/arch/xen/i386/kernel/head.S
index a0ccdbc2d9..b440104ff3 100644
--- a/linux-2.6-xen-sparse/arch/xen/i386/kernel/head.S
+++ b/linux-2.6-xen-sparse/arch/xen/i386/kernel/head.S
@@ -5,6 +5,11 @@
.ascii "GUEST_OS=linux,GUEST_VER=2.6"
.ascii ",XEN_VER=3.0"
.ascii ",VIRT_BASE=0xC0000000"
+#ifdef CONFIG_X86_PAE
+ .ascii ",PAE=yes"
+#else
+ .ascii ",PAE=no"
+#endif
.ascii ",LOADER=generic"
.byte 0
@@ -145,10 +150,17 @@ ENTRY(cpu_gdt_table)
.quad 0x0000000000000000 /* 0x53 reserved */
.quad 0x0000000000000000 /* 0x5b reserved */
+#ifdef CONFIG_X86_PAE
+ .quad 0x00cfbb00000067ff /* 0x60 kernel 4GB code at 0x00000000 */
+ .quad 0x00cfb300000067ff /* 0x68 kernel 4GB data at 0x00000000 */
+ .quad 0x00cffb00000067ff /* 0x73 user 4GB code at 0x00000000 */
+ .quad 0x00cff300000067ff /* 0x7b user 4GB data at 0x00000000 */
+#else
.quad 0x00cfbb000000c3ff /* 0x60 kernel 4GB code at 0x00000000 */
.quad 0x00cfb3000000c3ff /* 0x68 kernel 4GB data at 0x00000000 */
.quad 0x00cffb000000c3ff /* 0x73 user 4GB code at 0x00000000 */
.quad 0x00cff3000000c3ff /* 0x7b user 4GB data at 0x00000000 */
+#endif
.quad 0x0000000000000000 /* 0x80 TSS descriptor */
.quad 0x0000000000000000 /* 0x88 LDT descriptor */
diff --git a/linux-2.6-xen-sparse/arch/xen/i386/mm/hypervisor.c b/linux-2.6-xen-sparse/arch/xen/i386/mm/hypervisor.c
index cf0488175f..ed71010e14 100644
--- a/linux-2.6-xen-sparse/arch/xen/i386/mm/hypervisor.c
+++ b/linux-2.6-xen-sparse/arch/xen/i386/mm/hypervisor.c
@@ -47,15 +47,20 @@
#elif defined(CONFIG_X86_64)
#define pmd_val_ma(v) (v).pmd
#else
-#define pmd_val_ma(v) (v).pud.pgd.pgd
+#ifdef CONFIG_X86_PAE
+# define pmd_val_ma(v) ((v).pmd)
+# define pud_val_ma(v) ((v).pgd.pgd)
+#else
+# define pmd_val_ma(v) ((v).pud.pgd.pgd)
+#endif
#endif
#ifndef CONFIG_XEN_SHADOW_MODE
-void xen_l1_entry_update(pte_t *ptr, unsigned long val)
+void xen_l1_entry_update(pte_t *ptr, pte_t val)
{
mmu_update_t u;
u.ptr = virt_to_machine(ptr);
- u.val = val;
+ u.val = pte_val_ma(val);
BUG_ON(HYPERVISOR_mmu_update(&u, 1, NULL, DOMID_SELF) < 0);
}
@@ -67,6 +72,16 @@ void xen_l2_entry_update(pmd_t *ptr, pmd_t val)
BUG_ON(HYPERVISOR_mmu_update(&u, 1, NULL, DOMID_SELF) < 0);
}
+#ifdef CONFIG_X86_PAE
+void xen_l3_entry_update(pud_t *ptr, pud_t val)
+{
+ mmu_update_t u;
+ u.ptr = virt_to_machine(ptr);
+ u.val = pud_val_ma(val);
+ BUG_ON(HYPERVISOR_mmu_update(&u, 1, NULL, DOMID_SELF) < 0);
+}
+#endif
+
#ifdef CONFIG_X86_64
void xen_l3_entry_update(pud_t *ptr, pud_t val)
{
@@ -171,6 +186,8 @@ void xen_pgd_pin(unsigned long ptr)
struct mmuext_op op;
#ifdef CONFIG_X86_64
op.cmd = MMUEXT_PIN_L4_TABLE;
+#elif defined(CONFIG_X86_PAE)
+ op.cmd = MMUEXT_PIN_L3_TABLE;
#else
op.cmd = MMUEXT_PIN_L2_TABLE;
#endif
diff --git a/linux-2.6-xen-sparse/arch/xen/i386/mm/init.c b/linux-2.6-xen-sparse/arch/xen/i386/mm/init.c
index 7c8b95a8f2..b8dfade5ec 100644
--- a/linux-2.6-xen-sparse/arch/xen/i386/mm/init.c
+++ b/linux-2.6-xen-sparse/arch/xen/i386/mm/init.c
@@ -57,9 +57,10 @@ static pmd_t * __init one_md_table_init(pgd_t *pgd)
{
pud_t *pud;
pmd_t *pmd_table;
-
+
#ifdef CONFIG_X86_PAE
pmd_table = (pmd_t *) alloc_bootmem_low_pages(PAGE_SIZE);
+ make_page_readonly(pmd_table);
set_pgd(pgd, __pgd(__pa(pmd_table) | _PAGE_PRESENT));
pud = pud_offset(pgd, 0);
if (pmd_table != pmd_offset(pud, 0))
@@ -115,13 +116,13 @@ static void __init page_table_range_init (unsigned long start, unsigned long end
pmd_idx = pmd_index(vaddr);
pgd = pgd_base + pgd_idx;
- for ( ; (pgd_idx < PTRS_PER_PGD_NO_HV) && (vaddr != end); pgd++, pgd_idx++) {
+ for ( ; (pgd_idx < PTRS_PER_PGD) && (vaddr != end); pgd++, pgd_idx++) {
if (pgd_none(*pgd))
one_md_table_init(pgd);
pud = pud_offset(pgd, vaddr);
pmd = pmd_offset(pud, vaddr);
for (; (pmd_idx < PTRS_PER_PMD) && (vaddr != end); pmd++, pmd_idx++) {
- if (pmd_none(*pmd))
+ if (vaddr < HYPERVISOR_VIRT_START && pmd_none(*pmd))
one_page_table_init(pmd);
vaddr += PMD_SIZE;
@@ -160,13 +161,26 @@ static void __init kernel_physical_mapping_init(pgd_t *pgd_base)
pmd_idx = pmd_index(PAGE_OFFSET);
pte_ofs = pte_index(PAGE_OFFSET);
- for (; pgd_idx < PTRS_PER_PGD_NO_HV; pgd++, pgd_idx++) {
+ for (; pgd_idx < PTRS_PER_PGD; pgd++, pgd_idx++) {
+#ifdef CONFIG_XEN
+ /*
+ * Native linux hasn't PAE-paging enabled yet at this
+ * point. When running as xen domain we are in PAE
+ * mode already, thus we can't simply hook a empty
+ * pmd. That would kill the mappings we are currently
+ * using ...
+ */
+ pmd = pmd_offset(pud_offset(pgd, PAGE_OFFSET), PAGE_OFFSET);
+#else
pmd = one_md_table_init(pgd);
+#endif
if (pfn >= max_low_pfn)
continue;
pmd += pmd_idx;
for (; pmd_idx < PTRS_PER_PMD && pfn < max_low_pfn; pmd++, pmd_idx++) {
unsigned int address = pfn * PAGE_SIZE + PAGE_OFFSET;
+ if (address >= HYPERVISOR_VIRT_START)
+ continue;
/* Map with big pages if possible, otherwise create normal page tables. */
if (cpu_has_pse) {
@@ -350,6 +364,7 @@ static void __init pagetable_init (void)
* page directory, write-protect the new page directory, then switch to
* it. We clean up by write-enabling and then freeing the old page dir.
*/
+#ifndef CONFIG_X86_PAE
memcpy(pgd_base, old_pgd, PTRS_PER_PGD_NO_HV*sizeof(pgd_t));
make_page_readonly(pgd_base);
xen_pgd_pin(__pa(pgd_base));
@@ -358,8 +373,31 @@ static void __init pagetable_init (void)
make_page_writable(old_pgd);
__flush_tlb_all();
free_bootmem(__pa(old_pgd), PAGE_SIZE);
- init_mm.context.pinned = 1;
+#else
+ {
+ pud_t *old_pud = pud_offset(old_pgd+3, PAGE_OFFSET);
+ pmd_t *old_pmd = pmd_offset(old_pud, PAGE_OFFSET);
+ pmd_t *new_pmd = alloc_bootmem_low_pages(PAGE_SIZE);
+
+ memcpy(new_pmd, old_pmd, PAGE_SIZE);
+ memcpy(pgd_base, old_pgd, PTRS_PER_PGD_NO_HV*sizeof(pgd_t));
+ set_pgd(&pgd_base[3], __pgd(__pa(new_pmd) | _PAGE_PRESENT));
+
+ make_page_readonly(new_pmd);
+ make_page_readonly(pgd_base);
+ xen_pgd_pin(__pa(pgd_base));
+ load_cr3(pgd_base);
+ xen_pgd_unpin(__pa(old_pgd));
+ make_page_writable(old_pgd);
+ make_page_writable(old_pmd);
+ __flush_tlb_all();
+
+ free_bootmem(__pa(old_pgd), PAGE_SIZE);
+ free_bootmem(__pa(old_pmd), PAGE_SIZE);
+ }
+#endif
+ init_mm.context.pinned = 1;
kernel_physical_mapping_init(pgd_base);
remap_numa_kva();
@@ -372,7 +410,7 @@ static void __init pagetable_init (void)
permanent_kmaps_init(pgd_base);
-#ifdef CONFIG_X86_PAE
+#if 0 /* def CONFIG_X86_PAE */
/*
* Add low memory identity-mappings - SMP needs it when
* starting up on an AP from real-mode. In the non-PAE
@@ -380,7 +418,7 @@ static void __init pagetable_init (void)
* All user-space mappings are explicitly cleared after
* SMP startup.
*/
- pgd_base[0] = pgd_base[USER_PTRS_PER_PGD];
+ set_pgd(&pgd_base[0], pgd_base[USER_PTRS_PER_PGD]);
#endif
}
@@ -415,7 +453,7 @@ void zap_low_mappings (void)
* us, because pgd_clear() is a no-op on i386.
*/
for (i = 0; i < USER_PTRS_PER_PGD; i++)
-#ifdef CONFIG_X86_PAE
+#if defined(CONFIG_X86_PAE) && !defined(CONFIG_XEN)
set_pgd(swapper_pg_dir+i, __pgd(1 + __pa(empty_zero_page)));
#else
set_pgd(swapper_pg_dir+i, __pgd(0));
@@ -514,10 +552,12 @@ void __init paging_init(void)
pagetable_init();
-#ifdef CONFIG_X86_PAE
+#if defined(CONFIG_X86_PAE) && !defined(CONFIG_XEN)
/*
* We will bail out later - printk doesn't work right now so
* the user would just see a hanging kernel.
+ * when running as xen domain we are already in PAE mode at
+ * this point.
*/
if (cpu_has_pae)
set_in_cr4(X86_CR4_PAE);
@@ -690,8 +730,13 @@ void __init pgtable_cache_init(void)
panic("pgtable_cache_init(): cannot create pmd cache");
}
pgd_cache = kmem_cache_create("pgd",
+#if 0 /* How the heck _this_ works in native linux ??? */
PTRS_PER_PGD*sizeof(pgd_t),
PTRS_PER_PGD*sizeof(pgd_t),
+#else
+ PAGE_SIZE,
+ PAGE_SIZE,
+#endif
0,
pgd_ctor,
pgd_dtor);
diff --git a/linux-2.6-xen-sparse/arch/xen/i386/mm/pgtable.c b/linux-2.6-xen-sparse/arch/xen/i386/mm/pgtable.c
index cd03f91a97..ffe3ca561a 100644
--- a/linux-2.6-xen-sparse/arch/xen/i386/mm/pgtable.c
+++ b/linux-2.6-xen-sparse/arch/xen/i386/mm/pgtable.c
@@ -364,6 +364,7 @@ void pgd_free(pgd_t *pgd)
if (!HAVE_SHARED_KERNEL_PMD) {
pmd_t *pmd = (void *)__va(pgd_val(pgd[USER_PTRS_PER_PGD])-1);
make_page_writable(pmd);
+ memset(pmd, 0, PTRS_PER_PMD*sizeof(pmd_t));
kmem_cache_free(pmd_cache, pmd);
}
}
diff --git a/linux-2.6-xen-sparse/include/asm-xen/asm-i386/page.h b/linux-2.6-xen-sparse/include/asm-xen/asm-i386/page.h
index af8ccd4652..f696989209 100644
--- a/linux-2.6-xen-sparse/include/asm-xen/asm-i386/page.h
+++ b/linux-2.6-xen-sparse/include/asm-xen/asm-i386/page.h
@@ -15,6 +15,8 @@
#include <linux/config.h>
#include <linux/string.h>
#include <linux/types.h>
+#include <linux/kernel.h>
+#include <asm/bug.h>
#include <asm-xen/xen-public/xen.h>
#include <asm-xen/foreign_page.h>
@@ -84,9 +86,40 @@ typedef struct { unsigned long pte_low, pte_high; } pte_t;
typedef struct { unsigned long long pmd; } pmd_t;
typedef struct { unsigned long long pgd; } pgd_t;
typedef struct { unsigned long long pgprot; } pgprot_t;
-#define pmd_val(x) ((x).pmd)
-#define pte_val(x) ((x).pte_low | ((unsigned long long)(x).pte_high << 32))
-#define __pmd(x) ((pmd_t) { (x) } )
+#define __pte(x) ({ unsigned long long _x = (x); \
+ (((_x)&1) ? ((pte_t) {phys_to_machine(_x)}) : ((pte_t) {(_x)})); })
+#define __pgd(x) ({ unsigned long long _x = (x); \
+ (((_x)&1) ? ((pgd_t) {phys_to_machine(_x)}) : ((pgd_t) {(_x)})); })
+#define __pmd(x) ({ unsigned long long _x = (x); \
+ (((_x)&1) ? ((pmd_t) {phys_to_machine(_x)}) : ((pmd_t) {(_x)})); })
+static inline unsigned long long pte_val(pte_t x)
+{
+ unsigned long long ret;
+
+ if (x.pte_low) {
+ ret = x.pte_low | (unsigned long long)x.pte_high << 32;
+ ret = machine_to_phys(ret) | 1;
+ } else {
+ ret = 0;
+ }
+ return ret;
+}
+static inline unsigned long long pmd_val(pmd_t x)
+{
+ unsigned long long ret = x.pmd;
+ if (ret) ret = machine_to_phys(ret) | 1;
+ return ret;
+}
+static inline unsigned long long pgd_val(pgd_t x)
+{
+ unsigned long long ret = x.pgd;
+ if (ret) ret = machine_to_phys(ret) | 1;
+ return ret;
+}
+static inline unsigned long long pte_val_ma(pte_t x)
+{
+ return (unsigned long long)x.pte_high << 32 | x.pte_low;
+}
#define HPAGE_SHIFT 21
#else
typedef struct { unsigned long pte_low; } pte_t;
@@ -96,6 +129,16 @@ typedef struct { unsigned long pgprot; } pgprot_t;
#define pte_val(x) (((x).pte_low & 1) ? machine_to_phys((x).pte_low) : \
(x).pte_low)
#define pte_val_ma(x) ((x).pte_low)
+#define __pte(x) ({ unsigned long _x = (x); \
+ (((_x)&1) ? ((pte_t) {phys_to_machine(_x)}) : ((pte_t) {(_x)})); })
+#define __pgd(x) ({ unsigned long _x = (x); \
+ (((_x)&1) ? ((pgd_t) {phys_to_machine(_x)}) : ((pgd_t) {(_x)})); })
+static inline unsigned long pgd_val(pgd_t x)
+{
+ unsigned long ret = x.pgd;
+ if (ret) ret = machine_to_phys(ret) | 1;
+ return ret;
+}
#define HPAGE_SHIFT 22
#endif
#define PTE_MASK PAGE_MASK
@@ -107,20 +150,9 @@ typedef struct { unsigned long pgprot; } pgprot_t;
#define HAVE_ARCH_HUGETLB_UNMAPPED_AREA
#endif
-
-static inline unsigned long pgd_val(pgd_t x)
-{
- unsigned long ret = x.pgd;
- if (ret) ret = machine_to_phys(ret);
- return ret;
-}
#define pgprot_val(x) ((x).pgprot)
-#define __pte(x) ({ unsigned long _x = (x); \
- (((_x)&1) ? ((pte_t) {phys_to_machine(_x)}) : ((pte_t) {(_x)})); })
#define __pte_ma(x) ((pte_t) { (x) } )
-#define __pgd(x) ({ unsigned long _x = (x); \
- (((_x)&1) ? ((pgd_t) {phys_to_machine(_x)}) : ((pgd_t) {(_x)})); })
#define __pgprot(x) ((pgprot_t) { (x) } )
#endif /* !__ASSEMBLY__ */
diff --git a/linux-2.6-xen-sparse/include/asm-xen/asm-i386/pgtable.h b/linux-2.6-xen-sparse/include/asm-xen/asm-i386/pgtable.h
index 4890d7a479..ef48c20ab1 100644
--- a/linux-2.6-xen-sparse/include/asm-xen/asm-i386/pgtable.h
+++ b/linux-2.6-xen-sparse/include/asm-xen/asm-i386/pgtable.h
@@ -400,7 +400,7 @@ extern void noexec_setup(const char *str);
if ( likely((__vma)->vm_mm == current->mm) ) { \
HYPERVISOR_update_va_mapping((__address), (__entry), UVMF_INVLPG|UVMF_MULTI|(unsigned long)((__vma)->vm_mm->cpu_vm_mask.bits)); \
} else { \
- xen_l1_entry_update((__ptep), (__entry).pte_low); \
+ xen_l1_entry_update((__ptep), (__entry)); \
flush_tlb_page((__vma), (__address)); \
} \
} \
@@ -419,7 +419,7 @@ do { \
HYPERVISOR_update_va_mapping((__address), \
__entry, 0); \
} else { \
- xen_l1_entry_update((__ptep), (__entry).pte_low); \
+ xen_l1_entry_update((__ptep), (__entry)); \
} \
} while (0)
diff --git a/linux-2.6-xen-sparse/include/asm-xen/hypervisor.h b/linux-2.6-xen-sparse/include/asm-xen/hypervisor.h
index 0ee9e3d12e..1fd5aaf694 100644
--- a/linux-2.6-xen-sparse/include/asm-xen/hypervisor.h
+++ b/linux-2.6-xen-sparse/include/asm-xen/hypervisor.h
@@ -40,9 +40,13 @@
#include <asm/ptrace.h>
#include <asm/page.h>
#if defined(__i386__)
-#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,0)
-#include <asm-generic/pgtable-nopmd.h>
-#endif
+# if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,0)
+# ifdef CONFIG_X86_PAE
+# include <asm-generic/pgtable-nopud.h>
+# else
+# include <asm-generic/pgtable-nopmd.h>
+# endif
+# endif
#endif
/* arch/xen/i386/kernel/setup.c */
@@ -80,11 +84,9 @@ void xen_tlb_flush(void);
void xen_invlpg(unsigned long ptr);
#ifndef CONFIG_XEN_SHADOW_MODE
-void xen_l1_entry_update(pte_t *ptr, unsigned long val);
+void xen_l1_entry_update(pte_t *ptr, pte_t val);
void xen_l2_entry_update(pmd_t *ptr, pmd_t val);
-#ifdef __x86_64__
-void xen_l3_entry_update(pud_t *ptr, pud_t val); /* x86_64 only */
-#endif
+void xen_l3_entry_update(pud_t *ptr, pud_t val); /* x86_64/PAE */
void xen_l4_entry_update(pgd_t *ptr, pgd_t val); /* x86_64 only */
void xen_pgd_pin(unsigned long ptr);
void xen_pgd_unpin(unsigned long ptr);