aboutsummaryrefslogtreecommitdiffstats
path: root/xen
diff options
context:
space:
mode:
authorJan Beulich <jbeulich@suse.com>2013-01-23 14:15:16 +0100
committerJan Beulich <jbeulich@suse.com>2013-01-23 14:15:16 +0100
commitadc5afbf1c70ef55c260fb93e4b8ce5ccb918706 (patch)
tree2893064bbbb1e457fba52be6eadc8587a8497761 /xen
parent53a4e820f2888b1c7fcac6cc65c5ce854a2ff1ea (diff)
downloadxen-adc5afbf1c70ef55c260fb93e4b8ce5ccb918706.tar.gz
xen-adc5afbf1c70ef55c260fb93e4b8ce5ccb918706.tar.bz2
xen-adc5afbf1c70ef55c260fb93e4b8ce5ccb918706.zip
x86: support up to 16Tb
This mainly involves adjusting the number of L4 entries needing copying between page tables (which is now different between PV and HVM/idle domains), and changing the cutoff point and method when more than the supported amount of memory is found in a system. Since TMEM doesn't currently cope with the full 1:1 map not always being visible, it gets forcefully disabled in that case. Signed-off-by: Jan Beulich <jbeulich@suse.com> Acked-by: Keir Fraser <keir@xen.org> Acked-by: Dan Magenheimer <dan.magenheimer@oracle.com>
Diffstat (limited to 'xen')
-rw-r--r--xen/arch/x86/efi/boot.c2
-rw-r--r--xen/arch/x86/mm.c2
-rw-r--r--xen/arch/x86/setup.c53
-rw-r--r--xen/arch/x86/x86_64/mm.c21
-rw-r--r--xen/common/page_alloc.c19
-rw-r--r--xen/include/asm-x86/config.h14
-rw-r--r--xen/include/xen/mm.h3
7 files changed, 98 insertions, 16 deletions
diff --git a/xen/arch/x86/efi/boot.c b/xen/arch/x86/efi/boot.c
index 725b52c3c7..4005b5e7d6 100644
--- a/xen/arch/x86/efi/boot.c
+++ b/xen/arch/x86/efi/boot.c
@@ -1591,7 +1591,7 @@ void __init efi_init_memory(void)
/* Insert Xen mappings. */
for ( i = l4_table_offset(HYPERVISOR_VIRT_START);
- i < l4_table_offset(HYPERVISOR_VIRT_END); ++i )
+ i < l4_table_offset(DIRECTMAP_VIRT_END); ++i )
efi_l4_pgtable[i] = idle_pg_table[i];
#endif
}
diff --git a/xen/arch/x86/mm.c b/xen/arch/x86/mm.c
index a3c4b6bcbc..aefac6d959 100644
--- a/xen/arch/x86/mm.c
+++ b/xen/arch/x86/mm.c
@@ -1320,7 +1320,7 @@ void init_guest_l4_table(l4_pgentry_t l4tab[], const struct domain *d)
/* Xen private mappings. */
memcpy(&l4tab[ROOT_PAGETABLE_FIRST_XEN_SLOT],
&idle_pg_table[ROOT_PAGETABLE_FIRST_XEN_SLOT],
- ROOT_PAGETABLE_XEN_SLOTS * sizeof(l4_pgentry_t));
+ ROOT_PAGETABLE_PV_XEN_SLOTS * sizeof(l4_pgentry_t));
l4tab[l4_table_offset(LINEAR_PT_VIRT_START)] =
l4e_from_pfn(domain_page_map_to_mfn(l4tab), __PAGE_HYPERVISOR);
l4tab[l4_table_offset(PERDOMAIN_VIRT_START)] =
diff --git a/xen/arch/x86/setup.c b/xen/arch/x86/setup.c
index f9ed5804b2..e06ada1a70 100644
--- a/xen/arch/x86/setup.c
+++ b/xen/arch/x86/setup.c
@@ -25,6 +25,7 @@
#include <xen/dmi.h>
#include <xen/pfn.h>
#include <xen/nodemask.h>
+#include <xen/tmem_xen.h> /* for opt_tmem only */
#include <public/version.h>
#include <compat/platform.h>
#include <compat/xen.h>
@@ -381,6 +382,9 @@ static void __init setup_max_pdx(void)
if ( max_pdx > FRAMETABLE_NR )
max_pdx = FRAMETABLE_NR;
+ if ( max_pdx >= PAGE_LIST_NULL )
+ max_pdx = PAGE_LIST_NULL - 1;
+
max_page = pdx_to_pfn(max_pdx - 1) + 1;
}
@@ -1031,9 +1035,23 @@ void __init __start_xen(unsigned long mbi_p)
/* Create new mappings /before/ passing memory to the allocator. */
if ( map_e < e )
{
- map_pages_to_xen((unsigned long)__va(map_e), map_e >> PAGE_SHIFT,
- (e - map_e) >> PAGE_SHIFT, PAGE_HYPERVISOR);
- init_boot_pages(map_e, e);
+ uint64_t limit = __pa(HYPERVISOR_VIRT_END - 1) + 1;
+ uint64_t end = min(e, limit);
+
+ if ( map_e < end )
+ {
+ map_pages_to_xen((unsigned long)__va(map_e), PFN_DOWN(map_e),
+ PFN_DOWN(end - map_e), PAGE_HYPERVISOR);
+ init_boot_pages(map_e, end);
+ map_e = end;
+ }
+ }
+ if ( map_e < e )
+ {
+ /* This range must not be passed to the boot allocator and
+ * must also not be mapped with _PAGE_GLOBAL. */
+ map_pages_to_xen((unsigned long)__va(map_e), PFN_DOWN(map_e),
+ PFN_DOWN(e - map_e), __PAGE_HYPERVISOR);
}
if ( s < map_s )
{
@@ -1104,6 +1122,35 @@ void __init __start_xen(unsigned long mbi_p)
end_boot_allocator();
system_state = SYS_STATE_boot;
+ if ( max_page - 1 > virt_to_mfn(HYPERVISOR_VIRT_END - 1) )
+ {
+ unsigned long limit = virt_to_mfn(HYPERVISOR_VIRT_END - 1);
+ uint64_t mask = PAGE_SIZE - 1;
+
+ xenheap_max_mfn(limit);
+
+ /* Pass the remaining memory to the allocator. */
+ for ( i = 0; i < boot_e820.nr_map; i++ )
+ {
+ uint64_t s, e;
+
+ s = (boot_e820.map[i].addr + mask) & ~mask;
+ e = (boot_e820.map[i].addr + boot_e820.map[i].size) & ~mask;
+ if ( PFN_DOWN(e) <= limit )
+ continue;
+ if ( PFN_DOWN(s) <= limit )
+ s = pfn_to_paddr(limit + 1);
+ init_domheap_pages(s, e);
+ }
+
+ if ( opt_tmem )
+ {
+ printk(XENLOG_WARNING
+ "TMEM physical RAM limit exceeded, disabling TMEM\n");
+ opt_tmem = 0;
+ }
+ }
+
vm_init();
vesa_init();
diff --git a/xen/arch/x86/x86_64/mm.c b/xen/arch/x86/x86_64/mm.c
index bdf042de9e..aae69e0799 100644
--- a/xen/arch/x86/x86_64/mm.c
+++ b/xen/arch/x86/x86_64/mm.c
@@ -1471,10 +1471,23 @@ int memory_add(unsigned long spfn, unsigned long epfn, unsigned int pxm)
return -EINVAL;
}
- ret = map_pages_to_xen((unsigned long)mfn_to_virt(spfn), spfn,
- epfn - spfn, PAGE_HYPERVISOR);
- if ( ret )
- return ret;
+ i = virt_to_mfn(HYPERVISOR_VIRT_END - 1) + 1;
+ if ( spfn < i )
+ {
+ ret = map_pages_to_xen((unsigned long)mfn_to_virt(spfn), spfn,
+ min(epfn, i) - spfn, PAGE_HYPERVISOR);
+ if ( ret )
+ return ret;
+ }
+ if ( i < epfn )
+ {
+ if ( i < spfn )
+ i = spfn;
+ ret = map_pages_to_xen((unsigned long)mfn_to_virt(i), i,
+ epfn - i, __PAGE_HYPERVISOR);
+ if ( ret )
+ return ret;
+ }
old_node_start = NODE_DATA(node)->node_start_pfn;
old_node_span = NODE_DATA(node)->node_spanned_pages;
diff --git a/xen/common/page_alloc.c b/xen/common/page_alloc.c
index e273bb7eff..9593743ef5 100644
--- a/xen/common/page_alloc.c
+++ b/xen/common/page_alloc.c
@@ -255,6 +255,9 @@ static unsigned long init_node_heap(int node, unsigned long mfn,
unsigned long needed = (sizeof(**_heap) +
sizeof(**avail) * NR_ZONES +
PAGE_SIZE - 1) >> PAGE_SHIFT;
+#ifdef DIRECTMAP_VIRT_END
+ unsigned long eva = min(DIRECTMAP_VIRT_END, HYPERVISOR_VIRT_END);
+#endif
int i, j;
if ( !first_node_initialised )
@@ -266,14 +269,14 @@ static unsigned long init_node_heap(int node, unsigned long mfn,
}
#ifdef DIRECTMAP_VIRT_END
else if ( *use_tail && nr >= needed &&
- (mfn + nr) <= (virt_to_mfn(DIRECTMAP_VIRT_END - 1) + 1) )
+ (mfn + nr) <= (virt_to_mfn(eva - 1) + 1) )
{
_heap[node] = mfn_to_virt(mfn + nr - needed);
avail[node] = mfn_to_virt(mfn + nr - 1) +
PAGE_SIZE - sizeof(**avail) * NR_ZONES;
}
else if ( nr >= needed &&
- (mfn + needed) <= (virt_to_mfn(DIRECTMAP_VIRT_END - 1) + 1) )
+ (mfn + needed) <= (virt_to_mfn(eva - 1) + 1) )
{
_heap[node] = mfn_to_virt(mfn);
avail[node] = mfn_to_virt(mfn + needed - 1) +
@@ -1205,6 +1208,13 @@ void free_xenheap_pages(void *v, unsigned int order)
#else
+static unsigned int __read_mostly xenheap_bits;
+
+void __init xenheap_max_mfn(unsigned long mfn)
+{
+ xenheap_bits = fls(mfn) + PAGE_SHIFT - 1;
+}
+
void init_xenheap_pages(paddr_t ps, paddr_t pe)
{
init_domheap_pages(ps, pe);
@@ -1217,6 +1227,11 @@ void *alloc_xenheap_pages(unsigned int order, unsigned int memflags)
ASSERT(!in_irq());
+ if ( xenheap_bits && (memflags >> _MEMF_bits) > xenheap_bits )
+ memflags &= ~MEMF_bits(~0);
+ if ( !(memflags >> _MEMF_bits) )
+ memflags |= MEMF_bits(xenheap_bits);
+
pg = alloc_domheap_pages(NULL, order, memflags);
if ( unlikely(pg == NULL) )
return NULL;
diff --git a/xen/include/asm-x86/config.h b/xen/include/asm-x86/config.h
index 3a4223952d..652f6b79a3 100644
--- a/xen/include/asm-x86/config.h
+++ b/xen/include/asm-x86/config.h
@@ -163,8 +163,12 @@ extern unsigned char boot_edid_info[128];
* Page-frame information array.
* 0xffff830000000000 - 0xffff87ffffffffff [5TB, 5*2^40 bytes, PML4:262-271]
* 1:1 direct mapping of all physical memory.
- * 0xffff880000000000 - 0xffffffffffffffff [120TB, PML4:272-511]
- * Guest-defined use.
+ * 0xffff880000000000 - 0xffffffffffffffff [120TB, PML4:272-511]
+ * PV: Guest-defined use.
+ * 0xffff880000000000 - 0xffffff7fffffffff [119.5TB, PML4:272-510]
+ * HVM/idle: continuation of 1:1 mapping
+ * 0xffffff8000000000 - 0xffffffffffffffff [512GB, 2^39 bytes PML4:511]
+ * HVM/idle: unused
*
* Compatibility guest area layout:
* 0x0000000000000000 - 0x00000000f57fffff [3928MB, PML4:0]
@@ -183,6 +187,8 @@ extern unsigned char boot_edid_info[128];
#define ROOT_PAGETABLE_FIRST_XEN_SLOT 256
#define ROOT_PAGETABLE_LAST_XEN_SLOT 271
#define ROOT_PAGETABLE_XEN_SLOTS \
+ (L4_PAGETABLE_ENTRIES - ROOT_PAGETABLE_FIRST_XEN_SLOT - 1)
+#define ROOT_PAGETABLE_PV_XEN_SLOTS \
(ROOT_PAGETABLE_LAST_XEN_SLOT - ROOT_PAGETABLE_FIRST_XEN_SLOT + 1)
/* Hypervisor reserves PML4 slots 256 to 271 inclusive. */
@@ -241,9 +247,9 @@ extern unsigned char boot_edid_info[128];
#define FRAMETABLE_SIZE GB(128)
#define FRAMETABLE_NR (FRAMETABLE_SIZE / sizeof(*frame_table))
#define FRAMETABLE_VIRT_START (FRAMETABLE_VIRT_END - FRAMETABLE_SIZE)
-/* Slot 262-271: A direct 1:1 mapping of all of physical memory. */
+/* Slot 262-271/510: A direct 1:1 mapping of all of physical memory. */
#define DIRECTMAP_VIRT_START (PML4_ADDR(262))
-#define DIRECTMAP_SIZE (PML4_ENTRY_BYTES*10)
+#define DIRECTMAP_SIZE (PML4_ENTRY_BYTES * (511 - 262))
#define DIRECTMAP_VIRT_END (DIRECTMAP_VIRT_START + DIRECTMAP_SIZE)
#ifndef __ASSEMBLY__
diff --git a/xen/include/xen/mm.h b/xen/include/xen/mm.h
index 00b191527f..2f701f5d08 100644
--- a/xen/include/xen/mm.h
+++ b/xen/include/xen/mm.h
@@ -43,6 +43,7 @@ void end_boot_allocator(void);
/* Xen suballocator. These functions are interrupt-safe. */
void init_xenheap_pages(paddr_t ps, paddr_t pe);
+void xenheap_max_mfn(unsigned long mfn);
void *alloc_xenheap_pages(unsigned int order, unsigned int memflags);
void free_xenheap_pages(void *v, unsigned int order);
#define alloc_xenheap_page() (alloc_xenheap_pages(0,0))
@@ -111,7 +112,7 @@ struct page_list_head
/* These must only have instances in struct page_info. */
# define page_list_entry
-#define PAGE_LIST_NULL (~0)
+# define PAGE_LIST_NULL ((typeof(((struct page_info){}).list.next))~0)
# if !defined(pdx_to_page) && !defined(page_to_pdx)
# if defined(__page_to_mfn) || defined(__mfn_to_page)