aboutsummaryrefslogtreecommitdiffstats
path: root/xen/common/page_alloc.c
diff options
context:
space:
mode:
authorKeir Fraser <keir.fraser@citrix.com>2010-08-04 15:35:28 +0100
committerKeir Fraser <keir.fraser@citrix.com>2010-08-04 15:35:28 +0100
commit91c6005e01321a07b0ea8ecc5a3066ba70389e47 (patch)
tree5f6d52a7e734aab495c0b7d01eeb832317701b8b /xen/common/page_alloc.c
parent6044f6fa8d0c4f1ddf42eb18d91f4e4044d83cae (diff)
downloadxen-91c6005e01321a07b0ea8ecc5a3066ba70389e47.tar.gz
xen-91c6005e01321a07b0ea8ecc5a3066ba70389e47.tar.bz2
xen-91c6005e01321a07b0ea8ecc5a3066ba70389e47.zip
numa: Attempt more efficient NUMA allocation in hypervisor by default.
1. Try to allocate from nodes containing CPUs which a guest can be scheduled on. 2. Remember which node we allocated from last, and round-robin allocations among above-mentioned nodes. Signed-off-by: Keir Fraser <keir.fraser@citrix.com>
Diffstat (limited to 'xen/common/page_alloc.c')
-rw-r--r--xen/common/page_alloc.c68
1 files changed, 47 insertions, 21 deletions
diff --git a/xen/common/page_alloc.c b/xen/common/page_alloc.c
index 1588a2a092..9aa621a24a 100644
--- a/xen/common/page_alloc.c
+++ b/xen/common/page_alloc.c
@@ -295,20 +295,29 @@ static unsigned long init_node_heap(int node, unsigned long mfn,
/* Allocate 2^@order contiguous pages. */
static struct page_info *alloc_heap_pages(
unsigned int zone_lo, unsigned int zone_hi,
- unsigned int node, unsigned int order, unsigned int memflags)
+ unsigned int order, unsigned int memflags,
+ struct domain *d)
{
- unsigned int i, j, zone = 0;
- unsigned int num_nodes = num_online_nodes();
+ unsigned int first_node, i, j, zone = 0, nodemask_retry = 0;
+ unsigned int node = (uint8_t)((memflags >> _MEMF_node) - 1);
unsigned long request = 1UL << order;
- bool_t exact_node_request = !!(memflags & MEMF_exact_node);
cpumask_t extra_cpus_mask, mask;
struct page_info *pg;
+ nodemask_t nodemask = (d != NULL ) ? d->node_affinity : node_online_map;
if ( node == NUMA_NO_NODE )
{
- node = cpu_to_node(smp_processor_id());
- exact_node_request = 0;
+ memflags &= ~MEMF_exact_node;
+ if ( d != NULL )
+ {
+ node = next_node(d->last_alloc_node, nodemask);
+ if ( node >= MAX_NUMNODES )
+ node = first_node(nodemask);
+ }
+ if ( node >= MAX_NUMNODES )
+ node = cpu_to_node(smp_processor_id());
}
+ first_node = node;
ASSERT(node >= 0);
ASSERT(zone_lo <= zone_hi);
@@ -335,7 +344,7 @@ static struct page_info *alloc_heap_pages(
* zone before failing, only calc new node value if we fail to find memory
* in target node, this avoids needless computation on fast-path.
*/
- for ( i = 0; i < num_nodes; i++ )
+ for ( ; ; )
{
zone = zone_hi;
do {
@@ -349,18 +358,35 @@ static struct page_info *alloc_heap_pages(
goto found;
} while ( zone-- > zone_lo ); /* careful: unsigned zone may wrap */
- if ( exact_node_request )
+ if ( memflags & MEMF_exact_node )
goto not_found;
- /* Pick next node, wrapping around if needed. */
- node = next_node(node, node_online_map);
- if (node == MAX_NUMNODES)
- node = first_node(node_online_map);
+ /* Pick next node. */
+ if ( !node_isset(node, nodemask) )
+ {
+ /* Very first node may be caller-specified and outside nodemask. */
+ ASSERT(!nodemask_retry);
+ first_node = node = first_node(nodemask);
+ if ( node < MAX_NUMNODES )
+ continue;
+ }
+ else if ( (node = next_node(node, nodemask)) >= MAX_NUMNODES )
+ node = first_node(nodemask);
+ if ( node == first_node )
+ {
+ /* When we have tried all in nodemask, we fall back to others. */
+ if ( nodemask_retry++ )
+ goto not_found;
+ nodes_andnot(nodemask, node_online_map, nodemask);
+ first_node = node = first_node(nodemask);
+ if ( node >= MAX_NUMNODES )
+ goto not_found;
+ }
}
try_tmem:
/* Try to free memory from tmem */
- if ( (pg = tmem_relinquish_pages(order,memflags)) != NULL )
+ if ( (pg = tmem_relinquish_pages(order, memflags)) != NULL )
{
/* reassigning an already allocated anonymous heap page */
spin_unlock(&heap_lock);
@@ -386,6 +412,9 @@ static struct page_info *alloc_heap_pages(
total_avail_pages -= request;
ASSERT(total_avail_pages >= 0);
+ if ( d != NULL )
+ d->last_alloc_node = node;
+
spin_unlock(&heap_lock);
cpus_clear(mask);
@@ -1010,7 +1039,7 @@ void *alloc_xenheap_pages(unsigned int order, unsigned int memflags)
ASSERT(!in_irq());
pg = alloc_heap_pages(MEMZONE_XEN, MEMZONE_XEN,
- cpu_to_node(smp_processor_id()), order, memflags);
+ order, memflags, NULL);
if ( unlikely(pg == NULL) )
return NULL;
@@ -1153,24 +1182,21 @@ struct page_info *alloc_domheap_pages(
{
struct page_info *pg = NULL;
unsigned int bits = memflags >> _MEMF_bits, zone_hi = NR_ZONES - 1;
- unsigned int node = (uint8_t)((memflags >> _MEMF_node) - 1), dma_zone;
+ unsigned int dma_zone;
ASSERT(!in_irq());
- if ( (node == NUMA_NO_NODE) && (d != NULL) )
- node = domain_to_node(d);
-
bits = domain_clamp_alloc_bitsize(d, bits ? : (BITS_PER_LONG+PAGE_SHIFT));
if ( (zone_hi = min_t(unsigned int, bits_to_zone(bits), zone_hi)) == 0 )
return NULL;
if ( dma_bitsize && ((dma_zone = bits_to_zone(dma_bitsize)) < zone_hi) )
- pg = alloc_heap_pages(dma_zone + 1, zone_hi, node, order, memflags);
+ pg = alloc_heap_pages(dma_zone + 1, zone_hi, order, memflags, d);
if ( (pg == NULL) &&
((memflags & MEMF_no_dma) ||
- ((pg = alloc_heap_pages(MEMZONE_XEN + 1, zone_hi,
- node, order, memflags)) == NULL)) )
+ ((pg = alloc_heap_pages(MEMZONE_XEN + 1, zone_hi, order,
+ memflags, d)) == NULL)) )
return NULL;
if ( (d != NULL) && assign_pages(d, pg, order, memflags) )