aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authoriap10@labyrinth.cl.cam.ac.uk <iap10@labyrinth.cl.cam.ac.uk>2004-08-06 18:45:12 +0000
committeriap10@labyrinth.cl.cam.ac.uk <iap10@labyrinth.cl.cam.ac.uk>2004-08-06 18:45:12 +0000
commitbea911cf0dc0f7c5b3da2f27c9c6bd3f7c16fcd9 (patch)
tree2cfe5d724b1b5daa2efa9566e3aee6d8337b81e3
parent1821da0c9b12d28d857abe29713b78f4bdfa30ef (diff)
downloadxen-bea911cf0dc0f7c5b3da2f27c9c6bd3f7c16fcd9.tar.gz
xen-bea911cf0dc0f7c5b3da2f27c9c6bd3f7c16fcd9.tar.bz2
xen-bea911cf0dc0f7c5b3da2f27c9c6bd3f7c16fcd9.zip
bitkeeper revision 1.1159.4.1 (4113d1b8oHof-8weGeQ3gNQFteLGEg)
David Becker writes: This patch adds support for high memory to /proc/xen/memory_target, and it restores the mem= linux boot parameter. This is for 2.4.26 only. I haven't looked at 2.6 yet. (remmeber that CONFIG_HIGHMEM is not on by default in the xen0 and xenU configs) I have racks of 2g and 4g machines, hence my interest. The 'mem=' param tells linux the maximum amount of memory it can use. The feature originated for hosts where the normal mem detection mechanisms failed. These days its main use in stock linux is to artificially constrain how much memory linux uses for testing low memory systems. With this patch, mem= now means the max memory linux could ever use. When a domain is created with less than that the mem= value, linux will behave as though that 'missing' memory is 'allocated'. To give the domain more memory, first run setdomainmaxmem in Dom-0 to raise Xen's limit, then write the new total to /proc/xen/memory_target in the domain. When mem= is not explicitly set, it defaults to the boottime size of the domain. dom-0# xm create name=dom-1 memory=100 extra='-b mem=2g' dom-0# setdomainmaxmem 1 2g dom-1# echo 2g > /proc/xen/memory_target
-rw-r--r--.rootkeys1
-rw-r--r--linux-2.4.26-xen-sparse/arch/xen/drivers/balloon/balloon.c312
-rw-r--r--linux-2.4.26-xen-sparse/arch/xen/kernel/setup.c47
-rw-r--r--linux-2.4.26-xen-sparse/arch/xen/mm/init.c24
-rw-r--r--tools/misc/setdomainmaxmem34
5 files changed, 331 insertions, 87 deletions
diff --git a/.rootkeys b/.rootkeys
index 41716c6936..8cdac7d9ab 100644
--- a/.rootkeys
+++ b/.rootkeys
@@ -298,6 +298,7 @@
4022a73cEKvrYe_DVZW2JlAxobg9wg tools/misc/nsplitd/Makefile
4022a73cKms4Oq030x2JBzUB426lAQ tools/misc/nsplitd/nsplitd.c
3f870808_8aFBAcZbWiWGdgrGQyIEw tools/misc/p4perf.h
+4113d1afyPjO8m8-9E1pVBDHzGe1jQ tools/misc/setdomainmaxmem
3f5ef5a2ir1kVAthS14Dc5QIRCEFWg tools/misc/xen-clone
3f5ef5a2dTZP0nnsFoeq2jRf3mWDDg tools/misc/xen-clone.README
3f870808zS6T6iFhqYPGelroZlVfGQ tools/misc/xen_cpuperf.c
diff --git a/linux-2.4.26-xen-sparse/arch/xen/drivers/balloon/balloon.c b/linux-2.4.26-xen-sparse/arch/xen/drivers/balloon/balloon.c
index dba26c9c80..f5c447e9cf 100644
--- a/linux-2.4.26-xen-sparse/arch/xen/drivers/balloon/balloon.c
+++ b/linux-2.4.26-xen-sparse/arch/xen/drivers/balloon/balloon.c
@@ -17,6 +17,8 @@
#include <linux/mman.h>
#include <linux/smp_lock.h>
#include <linux/pagemap.h>
+#include <linux/bootmem.h>
+#include <linux/highmem.h>
#include <linux/vmalloc.h>
#include <asm/hypervisor.h>
@@ -39,7 +41,7 @@ typedef struct user_balloon_op {
static struct proc_dir_entry *balloon_pde;
unsigned long credit;
-static unsigned long current_pages, max_pages;
+static unsigned long current_pages, most_seen_pages;
static inline pte_t *get_ptep(unsigned long addr)
{
@@ -69,41 +71,43 @@ static unsigned long inflate_balloon(unsigned long num_pages)
parray = (unsigned long *)vmalloc(num_pages * sizeof(unsigned long));
if ( parray == NULL )
{
- printk("inflate_balloon: Unable to vmalloc parray\n");
- return 0;
+ printk(KERN_ERR "inflate_balloon: Unable to vmalloc parray\n");
+ return -EFAULT;
}
currp = parray;
- for ( i = 0; i < num_pages; i++ )
+ for ( i = 0; i < num_pages; i++, currp++ )
{
- /* NB. Should be GFP_ATOMIC for a less aggressive inflation. */
- vaddr = __get_free_page(GFP_KERNEL);
+ struct page *page = alloc_page(GFP_HIGHUSER);
+ unsigned long pfn = page - mem_map;
/* If allocation fails then free all reserved pages. */
- if ( vaddr == 0 )
+ if ( page == 0 )
{
- printk("Unable to inflate balloon by %ld, only %ld pages free.",
+ printk(KERN_ERR "Unable to inflate balloon by %ld, only %ld pages free.",
num_pages, i);
currp = parray;
- for(j = 0; j < i; j++){
- free_page(*currp++);
+ for(j = 0; j < i; j++, ++currp){
+ __free_page((struct page *) (mem_map + *currp));
}
+ ret = -EFAULT;
goto cleanup;
}
- *currp++ = vaddr;
+ *currp = pfn;
}
- currp = parray;
- for ( i = 0; i < num_pages; i++ )
+ for ( i = 0, currp = parray; i < num_pages; i++, currp++ )
{
- curraddr = *currp;
- *currp = virt_to_machine(*currp) >> PAGE_SHIFT;
- queue_l1_entry_update(get_ptep(curraddr), 0);
- phys_to_machine_mapping[__pa(curraddr) >> PAGE_SHIFT] = DEAD;
- currp++;
+ unsigned long mfn = phys_to_machine_mapping[*currp];
+ curraddr = page_address(mem_map + *currp);
+ if (curraddr)
+ queue_l1_entry_update(get_ptep(curraddr), 0);
+
+ phys_to_machine_mapping[*currp] = DEAD;
+ *currp = mfn;
}
XEN_flush_page_update_queue();
@@ -112,7 +116,7 @@ static unsigned long inflate_balloon(unsigned long num_pages)
parray, num_pages, 0);
if ( unlikely(ret != num_pages) )
{
- printk("Unable to inflate balloon, error %lx\n", ret);
+ printk(KERN_ERR "Unable to inflate balloon, error %lx\n", ret);
goto cleanup;
}
@@ -130,7 +134,7 @@ static unsigned long inflate_balloon(unsigned long num_pages)
* phys->machine mapping table looking for DEAD entries and populates
* them.
*/
-static unsigned long process_new_pages(unsigned long * parray,
+static unsigned long process_returned_pages(unsigned long * parray,
unsigned long num)
{
/* currently, this function is rather simplistic as
@@ -140,7 +144,7 @@ static unsigned long process_new_pages(unsigned long * parray,
* incorporated here.
*/
- unsigned long tot_pages = start_info.nr_pages;
+ unsigned long tot_pages = most_seen_pages;
unsigned long * curr = parray;
unsigned long num_installed;
unsigned long i;
@@ -152,29 +156,18 @@ static unsigned long process_new_pages(unsigned long * parray,
{
phys_to_machine_mapping[i] = *curr;
queue_machphys_update(*curr, i);
- queue_l1_entry_update(
+ if (i<max_low_pfn)
+ queue_l1_entry_update(
get_ptep((unsigned long)__va(i << PAGE_SHIFT)),
((*curr) << PAGE_SHIFT) | pgprot_val(PAGE_KERNEL));
- *curr = (unsigned long)__va(i << PAGE_SHIFT);
+ __free_page(mem_map + i);
+
curr++;
num_installed++;
}
}
- /*
- * This is tricky (and will also change for machine addrs that
- * are mapped to not previously released addresses). We free pages
- * that were allocated by get_free_page (the mappings are different
- * now, of course).
- */
- curr = parray;
- for ( i = 0; i < num_installed; i++ )
- {
- free_page(*curr);
- curr++;
- }
-
return num_installed;
}
@@ -185,14 +178,15 @@ unsigned long deflate_balloon(unsigned long num_pages)
if ( num_pages > credit )
{
- printk("Can not allocate more pages than previously released.\n");
+ printk(KERN_ERR "deflate_balloon: %d pages > %d credit.\n",
+ num_pages, credit);
return -EAGAIN;
}
parray = (unsigned long *)vmalloc(num_pages * sizeof(unsigned long));
if ( parray == NULL )
{
- printk("inflate_balloon: Unable to vmalloc parray\n");
+ printk(KERN_ERR "deflate_balloon: Unable to vmalloc parray\n");
return 0;
}
@@ -202,14 +196,16 @@ unsigned long deflate_balloon(unsigned long num_pages)
parray, num_pages, 0);
if ( unlikely(ret != num_pages) )
{
- printk("Unable to deflate balloon, error %lx\n", ret);
+ printk(KERN_ERR "deflate_balloon: xen increase_reservation err %lx\n",
+ ret);
goto cleanup;
}
- if ( (ret = process_new_pages(parray, num_pages)) < num_pages )
+ if ( (ret = process_returned_pages(parray, num_pages)) < num_pages )
{
- printk("Unable to deflate balloon by specified %lx pages, only %lx.\n",
- num_pages, ret);
+ printk(KERN_WARNING
+ "deflate_balloon: restored only %lx of %lx pages.\n",
+ ret, num_pages);
goto cleanup;
}
@@ -224,20 +220,170 @@ unsigned long deflate_balloon(unsigned long num_pages)
#define PAGE_TO_MB_SHIFT 8
+/*
+ * pagetable_extend() mimics pagetable_init() from arch/xen/mm/init.c
+ * The loops do go through all of low memory (ZONE_NORMAL). The
+ * old pages have _PAGE_PRESENT set and so get skipped.
+ * If low memory is not full, the new pages are used to fill it, going
+ * from cur_low_pfn to low_pfn. high memory is not direct mapped so
+ * no extension is needed for new high memory.
+ */
+
+static void pagetable_extend (int cur_low_pfn, int newpages)
+{
+ unsigned long vaddr, end;
+ pgd_t *kpgd, *pgd, *pgd_base;
+ int i, j, k;
+ pmd_t *kpmd, *pmd;
+ pte_t *kpte, *pte, *pte_base;
+ int low_pfn = min(cur_low_pfn+newpages,(int)max_low_pfn);
+
+ /*
+ * This can be zero as well - no problem, in that case we exit
+ * the loops anyway due to the PTRS_PER_* conditions.
+ */
+ end = (unsigned long)__va(low_pfn*PAGE_SIZE);
+
+ pgd_base = init_mm.pgd;
+ i = __pgd_offset(PAGE_OFFSET);
+ pgd = pgd_base + i;
+
+ for (; i < PTRS_PER_PGD; pgd++, i++) {
+ vaddr = i*PGDIR_SIZE;
+ if (end && (vaddr >= end))
+ break;
+ pmd = (pmd_t *)pgd;
+ for (j = 0; j < PTRS_PER_PMD; pmd++, j++) {
+ vaddr = i*PGDIR_SIZE + j*PMD_SIZE;
+ if (end && (vaddr >= end))
+ break;
+
+ /* Filled in for us already? */
+ if ( pmd_val(*pmd) & _PAGE_PRESENT )
+ continue;
+
+ pte_base = pte = (pte_t *) __get_free_page(GFP_KERNEL);
+
+ for (k = 0; k < PTRS_PER_PTE; pte++, k++) {
+ vaddr = i*PGDIR_SIZE + j*PMD_SIZE + k*PAGE_SIZE;
+ if (end && (vaddr >= end))
+ break;
+ *pte = mk_pte_phys(__pa(vaddr), PAGE_KERNEL);
+ }
+ kpgd = pgd_offset_k((unsigned long)pte_base);
+ kpmd = pmd_offset(kpgd, (unsigned long)pte_base);
+ kpte = pte_offset(kpmd, (unsigned long)pte_base);
+ queue_l1_entry_update(kpte,
+ (*(unsigned long *)kpte)&~_PAGE_RW);
+ set_pmd(pmd, __pmd(_KERNPG_TABLE + __pa(pte_base)));
+ XEN_flush_page_update_queue();
+ }
+ }
+}
+
+/*
+ * claim_new_pages() asks xen to increase this domain's memory reservation
+ * and return a list of the new pages of memory. This new pages are
+ * added to the free list of the memory manager.
+ *
+ * Available RAM does not normally change while Linux runs. To make this work,
+ * the linux mem= boottime command line param must say how big memory could
+ * possibly grow. Then setup_arch() in arch/xen/kernel/setup.c
+ * sets max_pfn, max_low_pfn and the zones according to
+ * this max memory size. The page tables themselves can only be
+ * extended after xen has assigned new pages to this domain.
+ */
+
+static unsigned long
+claim_new_pages(unsigned long num_pages)
+{
+ unsigned long new_page_cnt, pfn;
+ unsigned long * parray, *curr;
+
+ if (most_seen_pages+num_pages> max_pfn)
+ num_pages = max_pfn-most_seen_pages;
+ if (num_pages==0) return 0;
+
+ parray = (unsigned long *)vmalloc(num_pages * sizeof(unsigned long));
+ if ( parray == NULL )
+ {
+ printk(KERN_ERR "claim_new_pages: Unable to vmalloc parray\n");
+ return 0;
+ }
+
+ XEN_flush_page_update_queue();
+ new_page_cnt = HYPERVISOR_dom_mem_op(MEMOP_increase_reservation,
+ parray, num_pages, 0);
+ if (new_page_cnt != num_pages)
+ {
+ printk(KERN_WARNING
+ "claim_new_pages: xen granted only %lu of %lu requested pages\n",
+ new_page_cnt, num_pages);
+
+ /* XXX
+ * avoid xen lockup when user forgot to setdomainmaxmem. xen
+ * usually can dribble out a few pages and then hangs
+ */
+ if (new_page_cnt < 1000) {
+ printk(KERN_WARNING "Remember to use setdomainmaxmem\n");
+ HYPERVISOR_dom_mem_op(MEMOP_decrease_reservation,
+ parray, new_page_cnt, 0);
+ return -EFAULT;
+ }
+ }
+ memcpy(phys_to_machine_mapping+most_seen_pages, parray,
+ new_page_cnt * sizeof(unsigned long));
+
+ pagetable_extend(most_seen_pages,new_page_cnt);
+
+ for (pfn = most_seen_pages, curr = parray;
+ pfn < most_seen_pages+new_page_cnt;
+ pfn++, curr++ )
+ {
+ struct page *page = mem_map + pfn;
+
+#ifndef CONFIG_HIGHMEM
+ if (pfn>=max_low_pfn) {
+ printk(KERN_WARNING "Warning only %ldMB will be used.\n",
+ pfn>>PAGE_TO_MB_SHIFT);
+ printk(KERN_WARNING "Use a HIGHMEM enabled kernel.\n");
+ break;
+ }
+#endif
+ queue_machphys_update(*curr, pfn);
+ XEN_flush_page_update_queue();
+ if (pfn<max_low_pfn) {
+ queue_l1_entry_update(get_ptep((unsigned long)__va(pfn << PAGE_SHIFT)),
+ ((*curr) << PAGE_SHIFT) | pgprot_val(PAGE_KERNEL));
+ XEN_flush_page_update_queue();
+ }
+
+ /* this next bit mimics arch/xen/mm/init.c:one_highpage_init() */
+ ClearPageReserved(page);
+ if (pfn>=max_low_pfn) set_bit(PG_highmem, &page->flags);
+ set_page_count(page, 1);
+ __free_page(page);
+ }
+
+ vfree(parray);
+
+ return new_page_cnt;
+}
+
static int balloon_write(struct file *file, const char *buffer,
u_long count, void *data)
{
char memstring[64], *endchar;
int len, i;
- unsigned long pages;
- unsigned long long target;
+ unsigned long target;
+ unsigned long long targetbytes;
/* Only admin can play with the balloon :) */
if ( !capable(CAP_SYS_ADMIN) )
return -EPERM;
if (count>sizeof memstring) {
- return -EFBIG;
+ return -EFBIG;
}
len = strnlen_user(buffer, count);
@@ -248,53 +394,66 @@ static int balloon_write(struct file *file, const char *buffer,
endchar = memstring;
for(i=0; i<len; ++i,++endchar) {
- if ('0'>memstring[i] || memstring[i]>'9') break;
+ if ('0'>memstring[i] || memstring[i]>'9') break;
}
if (i==0) return -EBADMSG;
- target = memparse(memstring,&endchar);
- pages = target >> PAGE_SHIFT;
+ targetbytes = memparse(memstring,&endchar);
+ target = targetbytes >> PAGE_SHIFT;
- if (pages < current_pages) {
- int change = inflate_balloon(current_pages-pages);
- if (change<0) return change;
+ if (target < current_pages) {
+ int change = inflate_balloon(current_pages-target);
+ if (change<=0) return change;
- current_pages -= change;
- printk("Relinquish %dMB to xen. Domain now has %ldMB\n",
- change>>PAGE_TO_MB_SHIFT, current_pages>>PAGE_TO_MB_SHIFT);
+ current_pages -= change;
+ printk(KERN_INFO "Relinquish %dMB to xen. Domain now has %luMB\n",
+ change>>PAGE_TO_MB_SHIFT, current_pages>>PAGE_TO_MB_SHIFT);
}
- else if (pages > current_pages) {
- int change = deflate_balloon(min(pages,max_pages) - current_pages);
- if (change<0) return change;
+ else if (target > current_pages) {
+ int change, reclaim = min(target,most_seen_pages) - current_pages;
+
+ if (reclaim) {
+ change = deflate_balloon( reclaim);
+ if (change<=0) return change;
+ current_pages += change;
+ printk(KERN_INFO "Reclaim %dMB from xen. Domain now has %luMB\n",
+ change>>PAGE_TO_MB_SHIFT, current_pages>>PAGE_TO_MB_SHIFT);
+ }
- current_pages += change;
- printk("Reclaim %dMB from xen. Domain now has %ldMB\n",
- change>>PAGE_TO_MB_SHIFT, current_pages>>PAGE_TO_MB_SHIFT);
+ if (most_seen_pages<target) {
+ int growth = claim_new_pages(target-most_seen_pages);
+ if (growth<=0) return growth;
+ most_seen_pages += growth;
+ current_pages += growth;
+ printk(KERN_INFO "Granted %dMB new mem by xen. Domain now has %luMB\n",
+ growth>>PAGE_TO_MB_SHIFT, current_pages>>PAGE_TO_MB_SHIFT);
+ }
}
+
return len;
}
static int balloon_read(char *page, char **start, off_t off,
- int count, int *eof, void *data)
+ int count, int *eof, void *data)
{
- int len;
- len = sprintf(page,"%lu\n",current_pages<<PAGE_SHIFT);
-
- if (len <= off+count) *eof = 1;
- *start = page + off;
- len -= off;
- if (len>count) len = count;
- if (len<0) len = 0;
- return len;
+ int len;
+ len = sprintf(page,"%lu\n",current_pages<<PAGE_SHIFT);
+
+ if (len <= off+count) *eof = 1;
+ *start = page + off;
+ len -= off;
+ if (len>count) len = count;
+ if (len<0) len = 0;
+ return len;
}
static int __init init_module(void)
{
printk(KERN_ALERT "Starting Xen Balloon driver\n");
- max_pages = current_pages = start_info.nr_pages;
+ most_seen_pages = current_pages = min(start_info.nr_pages,max_pfn);
if ( (balloon_pde = create_xen_proc_entry("memory_target", 0644)) == NULL )
{
printk(KERN_ALERT "Unable to create balloon driver proc entry!");
@@ -304,6 +463,17 @@ static int __init init_module(void)
balloon_pde->write_proc = balloon_write;
balloon_pde->read_proc = balloon_read;
+ /*
+ * make a new phys map if mem= says xen can give us memory to grow
+ */
+ if (max_pfn > start_info.nr_pages) {
+ extern unsigned long *phys_to_machine_mapping;
+ unsigned long *newmap;
+ newmap = (unsigned long *)vmalloc(max_pfn * sizeof(unsigned long));
+ phys_to_machine_mapping = memcpy(newmap, phys_to_machine_mapping,
+ start_info.nr_pages * sizeof(unsigned long));
+ }
+
return 0;
}
diff --git a/linux-2.4.26-xen-sparse/arch/xen/kernel/setup.c b/linux-2.4.26-xen-sparse/arch/xen/kernel/setup.c
index 039fdaf162..87fbefd1c9 100644
--- a/linux-2.4.26-xen-sparse/arch/xen/kernel/setup.c
+++ b/linux-2.4.26-xen-sparse/arch/xen/kernel/setup.c
@@ -120,10 +120,15 @@ union start_info_union start_info_union;
static char command_line[COMMAND_LINE_SIZE];
char saved_command_line[COMMAND_LINE_SIZE];
-static void __init parse_mem_cmdline (char ** cmdline_p)
+/* parse_mem_cmdline()
+ * returns the value of the mem= boot param converted to pages or 0
+ */
+static int __init parse_mem_cmdline (char ** cmdline_p)
{
char c = ' ', *to = command_line, *from = saved_command_line;
int len = 0;
+ unsigned long long bytes;
+ int mem_param = 0;
/* Save unparsed command line copy for /proc/cmdline */
memcpy(saved_command_line, start_info.cmd_line, COMMAND_LINE_SIZE);
@@ -145,8 +150,9 @@ static void __init parse_mem_cmdline (char ** cmdline_p)
} else if (!memcmp(from+4, "exactmap", 8)) {
from += 8+4;
} else {
- (void)memparse(from+4, &from);
- if (*from == '@')
+ bytes = memparse(from+4, &from);
+ mem_param = bytes>>PAGE_SHIFT;
+ if (*from == '@')
(void)memparse(from+1, &from);
}
}
@@ -160,6 +166,8 @@ static void __init parse_mem_cmdline (char ** cmdline_p)
}
*to = '\0';
*cmdline_p = command_line;
+
+ return mem_param;
}
/*
@@ -194,7 +202,9 @@ int xen_module_init(struct module *mod)
void __init setup_arch(char **cmdline_p)
{
- unsigned long bootmap_size, start_pfn, max_low_pfn;
+ unsigned long bootmap_size, start_pfn, lmax_low_pfn;
+ int mem_param; /* user specified memory size in pages */
+ int boot_pfn; /* low pages available for bootmem */
extern void hypervisor_callback(void);
extern void failsafe_callback(void);
@@ -252,7 +262,16 @@ void __init setup_arch(char **cmdline_p)
init_mm.end_data = (unsigned long) &_edata;
init_mm.brk = (unsigned long) &_end;
- parse_mem_cmdline(cmdline_p);
+ /* The mem= kernel command line param overrides the detected amount
+ * of memory. For xenolinux, if this override is larger than detected
+ * memory, then boot using only detected memory and make provisions to
+ * use all of the override value. The hypervisor can give this
+ * domain more memory later on and it will be added to the free
+ * lists at that time. See claim_new_pages() in
+ * arch/xen/drivers/balloon/balloon.c
+ */
+ mem_param = parse_mem_cmdline(cmdline_p);
+ if (!mem_param) mem_param = start_info.nr_pages;
#define PFN_UP(x) (((x) + PAGE_SIZE-1) >> PAGE_SHIFT)
#define PFN_DOWN(x) ((x) >> PAGE_SHIFT)
@@ -269,9 +288,9 @@ void __init setup_arch(char **cmdline_p)
/*
* Determine low and high memory ranges:
*/
- max_low_pfn = max_pfn = start_info.nr_pages;
- if (max_low_pfn > MAXMEM_PFN) {
- max_low_pfn = MAXMEM_PFN;
+ lmax_low_pfn = max_pfn = mem_param;
+ if (lmax_low_pfn > MAXMEM_PFN) {
+ lmax_low_pfn = MAXMEM_PFN;
#ifndef CONFIG_HIGHMEM
/* Maximum memory usable is what is directly addressable */
printk(KERN_WARNING "Warning only %ldMB will be used.\n",
@@ -314,12 +333,20 @@ void __init setup_arch(char **cmdline_p)
* bootstrap page table. We are guaranteed to get >=512kB unused 'padding'
* for our own use after all bootstrap elements (see hypervisor-if.h).
*/
- bootmap_size = init_bootmem(start_pfn, max_low_pfn);
- free_bootmem(0, PFN_PHYS(max_low_pfn));
+ boot_pfn = min((int)start_info.nr_pages,lmax_low_pfn);
+ bootmap_size = init_bootmem(start_pfn,boot_pfn);
+ free_bootmem(0, PFN_PHYS(boot_pfn));
reserve_bootmem(__pa(&_stext),
PFN_PHYS(start_pfn) + bootmap_size + PAGE_SIZE-1 -
__pa(&_stext));
+ /* init_bootmem() set the global max_low_pfn to boot_pfn. Now max_low_pfn
+ * can be set to the override value.
+ */
+ max_low_pfn = lmax_low_pfn;
+
+
+
#ifdef CONFIG_BLK_DEV_INITRD
if ( start_info.mod_start != 0 )
{
diff --git a/linux-2.4.26-xen-sparse/arch/xen/mm/init.c b/linux-2.4.26-xen-sparse/arch/xen/mm/init.c
index 30a9e45c91..0766deef50 100644
--- a/linux-2.4.26-xen-sparse/arch/xen/mm/init.c
+++ b/linux-2.4.26-xen-sparse/arch/xen/mm/init.c
@@ -219,11 +219,17 @@ static void __init pagetable_init (void)
pmd_t *kpmd, *pmd;
pte_t *kpte, *pte, *pte_base;
+ /* create tables only for boot_pfn frames. max_low_pfn may be sized for
+ * pages yet to be allocated from the hypervisor, or it may be set
+ * to override the start_info amount of memory
+ */
+ int boot_pfn = min(start_info.nr_pages,max_low_pfn);
+
/*
* This can be zero as well - no problem, in that case we exit
* the loops anyway due to the PTRS_PER_* conditions.
*/
- end = (unsigned long)__va(max_low_pfn*PAGE_SIZE);
+ end = (unsigned long)__va(boot_pfn *PAGE_SIZE);
pgd_base = init_mm.pgd;
i = __pgd_offset(PAGE_OFFSET);
@@ -308,7 +314,6 @@ void __init paging_init(void)
pagetable_init();
zone_sizes_init();
-
/* Switch to the real shared_info page, and clear the dummy page. */
set_fixmap(FIX_SHARED_INFO, start_info.shared_info);
HYPERVISOR_shared_info = (shared_info_t *)fix_to_virt(FIX_SHARED_INFO);
@@ -368,11 +373,18 @@ static int __init free_pages_init(void)
#endif
int reservedpages, pfn;
+ /* add only boot_pfn pages of low memory to free list.
+ * max_low_pfn may be sized for
+ * pages yet to be allocated from the hypervisor, or it may be set
+ * to override the start_info amount of memory
+ */
+ int boot_pfn = min(start_info.nr_pages,max_low_pfn);
+
/* this will put all low memory onto the freelists */
totalram_pages += free_all_bootmem();
reservedpages = 0;
- for (pfn = 0; pfn < max_low_pfn; pfn++) {
+ for (pfn = 0; pfn < boot_pfn ; pfn++) {
/*
* Only count reserved RAM pages
*/
@@ -380,7 +392,7 @@ static int __init free_pages_init(void)
reservedpages++;
}
#ifdef CONFIG_HIGHMEM
- for (pfn = highend_pfn-1; pfn >= highstart_pfn; pfn--)
+ for (pfn = start_info.nr_pages-1; pfn >= highstart_pfn; pfn--)
one_highpage_init((struct page *) (mem_map + pfn), pfn, bad_ppro);
totalram_pages += totalhigh_pages;
#endif
@@ -460,11 +472,11 @@ void free_initrd_mem(unsigned long start, unsigned long end)
void si_meminfo(struct sysinfo *val)
{
- val->totalram = totalram_pages;
+ val->totalram = max_pfn;
val->sharedram = 0;
val->freeram = nr_free_pages();
val->bufferram = atomic_read(&buffermem_pages);
- val->totalhigh = totalhigh_pages;
+ val->totalhigh = max_pfn-max_low_pfn;
val->freehigh = nr_free_highpages();
val->mem_unit = PAGE_SIZE;
return;
diff --git a/tools/misc/setdomainmaxmem b/tools/misc/setdomainmaxmem
new file mode 100644
index 0000000000..4800cff2f3
--- /dev/null
+++ b/tools/misc/setdomainmaxmem
@@ -0,0 +1,34 @@
+#!/usr/bin/env perl
+
+use strict;
+require "sys/ioctl.ph";
+
+sub SIZEOF_HYPERCALL () { 24; }
+sub STRUCT_PRIVCMD_HYPERCALL () {"L P";}
+sub IOCTL_PRIVCMD_HYPERCALL ()
+ { &_IOC( &_IOC_NONE, ord('P'), 0, SIZEOF_HYPERCALL );}
+sub __HYPERVISOR_dom0_op () {7;}
+sub DOM0_INTERFACE_VERSION () {0xaaaa0010;}
+sub DOM0_SETDOMAINMAXMEM () {28;}
+sub STRUCT_DOM0_OP_PREFIX () {"L L";}
+sub STRUCT_SETDOMAINMAXMEM () {STRUCT_DOM0_OP_PREFIX."L x4 L";}
+sub XEN_PRIVCMD () {"/proc/xen/privcmd";}
+
+sub setdomainmaxmem($$) {
+ my ($domain,$bytes) = @_;
+ my $msg = pack(STRUCT_SETDOMAINMAXMEM,DOM0_SETDOMAINMAXMEM,
+ DOM0_INTERFACE_VERSION, $domain, $bytes);
+ my $cmd = pack(STRUCT_PRIVCMD_HYPERCALL,__HYPERVISOR_dom0_op,$msg);
+ open(XEN,XEN_PRIVCMD) or die "$!\n";
+ ioctl(XEN, IOCTL_PRIVCMD_HYPERCALL, $cmd) or die "ioctl: $!";
+ close XEN;
+}
+
+my ($bytes,$suffix) = $ARGV[1] =~ m/(^\d+)([mMkKgG])/;
+$bytes<<=10 if $suffix =~ m/[kK]/;
+$bytes<<=20 if $suffix =~ m/[mM]/;
+$bytes<<=30 if $suffix =~ m/[gG]/;
+
+printf "set domain $ARGV[0] to $bytes\n";
+setdomainmaxmem($ARGV[0],$bytes);
+