aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--tools/libxl/libxl_dom.c40
-rw-r--r--tools/libxl/libxl_internal.h5
-rw-r--r--tools/libxl/libxl_numa.c88
3 files changed, 95 insertions, 38 deletions
diff --git a/tools/libxl/libxl_dom.c b/tools/libxl/libxl_dom.c
index e13fb49f7f..d749983ce7 100644
--- a/tools/libxl/libxl_dom.c
+++ b/tools/libxl/libxl_dom.c
@@ -128,26 +128,30 @@ static int numa_cmpf(const libxl__numa_candidate *c1,
}
/* The actual automatic NUMA placement routine */
-static int numa_place_domain(libxl__gc *gc, libxl_domain_build_info *info)
+static int numa_place_domain(libxl__gc *gc, uint32_t domid,
+ libxl_domain_build_info *info)
{
int found;
libxl__numa_candidate candidate;
libxl_bitmap candidate_nodemap;
- libxl_cpupoolinfo *pinfo;
- int nr_pools, rc = 0;
+ libxl_cpupoolinfo cpupool_info;
+ int i, cpupool, rc = 0;
uint32_t memkb;
libxl__numa_candidate_init(&candidate);
libxl_bitmap_init(&candidate_nodemap);
- /* First of all, if cpupools are in use, better not to mess with them */
- pinfo = libxl_list_cpupool(CTX, &nr_pools);
- if (!pinfo)
- return ERROR_FAIL;
- if (nr_pools > 1) {
- LOG(NOTICE, "Skipping NUMA placement as cpupools are in use");
- goto out;
- }
+ /*
+ * Extract the cpumap from the cpupool the domain belong to. In fact,
+ * it only makes sense to consider the cpus/nodes that are in there
+ * for placement.
+ */
+ rc = cpupool = libxl__domain_cpupool(gc, domid);
+ if (rc < 0)
+ return rc;
+ rc = libxl_cpupool_info(CTX, &cpupool_info, cpupool);
+ if (rc)
+ return rc;
rc = libxl_domain_need_memory(CTX, info, &memkb);
if (rc)
@@ -159,7 +163,8 @@ static int numa_place_domain(libxl__gc *gc, libxl_domain_build_info *info)
/* Find the best candidate with enough free memory and at least
* as much pcpus as the domain has vcpus. */
- rc = libxl__get_numa_candidate(gc, memkb, info->max_vcpus, 0, 0,
+ rc = libxl__get_numa_candidate(gc, memkb, info->max_vcpus,
+ 0, 0, &cpupool_info.cpumap,
numa_cmpf, &candidate, &found);
if (rc)
goto out;
@@ -175,6 +180,13 @@ static int numa_place_domain(libxl__gc *gc, libxl_domain_build_info *info)
if (rc)
goto out;
+ /* Avoid trying to set the affinity to cpus that might be in the
+ * nodemap but not in our cpupool. */
+ libxl_for_each_set_bit(i, info->cpumap) {
+ if (!libxl_bitmap_test(&cpupool_info.cpumap, i))
+ libxl_bitmap_reset(&info->cpumap, i);
+ }
+
LOG(DETAIL, "NUMA placement candidate with %d nodes, %d cpus and "
"%"PRIu32" KB free selected", candidate.nr_nodes,
candidate.nr_cpus, candidate.free_memkb / 1024);
@@ -182,7 +194,7 @@ static int numa_place_domain(libxl__gc *gc, libxl_domain_build_info *info)
out:
libxl__numa_candidate_dispose(&candidate);
libxl_bitmap_dispose(&candidate_nodemap);
- libxl_cpupoolinfo_list_free(pinfo, nr_pools);
+ libxl_cpupoolinfo_dispose(&cpupool_info);
return rc;
}
@@ -214,7 +226,7 @@ int libxl__build_pre(libxl__gc *gc, uint32_t domid,
return ERROR_INVAL;
}
- rc = numa_place_domain(gc, info);
+ rc = numa_place_domain(gc, domid, info);
if (rc)
return rc;
}
diff --git a/tools/libxl/libxl_internal.h b/tools/libxl/libxl_internal.h
index fd1b0cedfa..4938510ec0 100644
--- a/tools/libxl/libxl_internal.h
+++ b/tools/libxl/libxl_internal.h
@@ -2568,6 +2568,10 @@ typedef int (*libxl__numa_candidate_cmpf)(const libxl__numa_candidate *c1,
* other hand, if not even one single candidate can be found, the function
* still returns successfully but cndt_found will be zero.
*
+ * Finally, suitable_cpumap is useful for telling that only the cpus in that
+ * mask should be considered when generating placement candidates (for
+ * example because of cpupools).
+ *
* It is up to the function to properly allocate cndt_out (by calling
* libxl__numa_candidate_alloc()), while it is the caller that should init
* (libxl__numa_candidate_init()) and free (libxl__numa_candidate_dispose())
@@ -2576,6 +2580,7 @@ typedef int (*libxl__numa_candidate_cmpf)(const libxl__numa_candidate *c1,
_hidden int libxl__get_numa_candidate(libxl__gc *gc,
uint32_t min_free_memkb, int min_cpus,
int min_nodes, int max_nodes,
+ const libxl_bitmap *suitable_cpumap,
libxl__numa_candidate_cmpf numa_cmpf,
libxl__numa_candidate *cndt_out,
int *cndt_found);
diff --git a/tools/libxl/libxl_numa.c b/tools/libxl/libxl_numa.c
index d95ed2af09..5301ec4f0c 100644
--- a/tools/libxl/libxl_numa.c
+++ b/tools/libxl/libxl_numa.c
@@ -105,30 +105,48 @@ static int comb_next(comb_iter_t it, int n, int k)
/* NUMA automatic placement (see libxl_internal.h for details) */
/*
- * This function turns a k-combination iterator into a node map.
- * This means the bits in the node map corresponding to the indexes
- * of the given combination are the ones that will be set.
- * For example, if the iterator represents the combination { 0, 2, 4},
- * the node map will have bits #0, #2 and #4 set.
+ * This function turns a k-combination iterator into a node map,
+ * given another map, telling us which nodes should be considered.
+ *
+ * This means the bits that are set in suitable_nodemap and that
+ * corresponds to the indexes of the given combination are the ones
+ * that will be set in nodemap.
+ *
+ * For example, given a fully set suitable_nodemap, if the iterator
+ * represents the combination { 0, 2, 4}, nodmeap will have bits #0,
+ * #2 and #4 set.
+ * On the other hand, if, say, suitable_nodemap=01011011, the same
+ * iterator will cause bits #1, #4 and #7 of nodemap to be set.
*/
-static void comb_get_nodemap(comb_iter_t it, libxl_bitmap *nodemap, int k)
+static void comb_get_nodemap(comb_iter_t it, libxl_bitmap *suitable_nodemap,
+ libxl_bitmap *nodemap, int k)
{
- int i;
+ int i, m = 0, n = 0;
libxl_bitmap_set_none(nodemap);
- for (i = 0; i < k; i++)
- libxl_bitmap_set(nodemap, it[i]);
+ libxl_for_each_set_bit(i, *suitable_nodemap) {
+ /* Check wether the n-th set bit of suitable_nodemap
+ * matches with the m-th element of the iterator (and,
+ * only if it does, advance to the next one) */
+ if (m < k && n == it[m]) {
+ libxl_bitmap_set(nodemap, i);
+ m++;
+ }
+ n++;
+ }
}
/* Retrieve the number of cpus that the nodes that are part of the nodemap
- * span. */
+ * span and are also set in suitable_cpumap. */
static int nodemap_to_nr_cpus(libxl_cputopology *tinfo, int nr_cpus,
+ const libxl_bitmap *suitable_cpumap,
const libxl_bitmap *nodemap)
{
int i, nodes_cpus = 0;
for (i = 0; i < nr_cpus; i++) {
- if (libxl_bitmap_test(nodemap, tinfo[i].node))
+ if (libxl_bitmap_test(suitable_cpumap, i) &&
+ libxl_bitmap_test(nodemap, tinfo[i].node))
nodes_cpus++;
}
return nodes_cpus;
@@ -242,6 +260,7 @@ static int count_cpus_per_node(libxl_cputopology *tinfo, int nr_cpus,
int libxl__get_numa_candidate(libxl__gc *gc,
uint32_t min_free_memkb, int min_cpus,
int min_nodes, int max_nodes,
+ const libxl_bitmap *suitable_cpumap,
libxl__numa_candidate_cmpf numa_cmpf,
libxl__numa_candidate *cndt_out,
int *cndt_found)
@@ -249,11 +268,12 @@ int libxl__get_numa_candidate(libxl__gc *gc,
libxl__numa_candidate new_cndt;
libxl_cputopology *tinfo = NULL;
libxl_numainfo *ninfo = NULL;
- int nr_nodes = 0, nr_cpus = 0;
- libxl_bitmap nodemap;
+ int nr_nodes = 0, nr_suit_nodes, nr_cpus = 0;
+ libxl_bitmap suitable_nodemap, nodemap;
int rc = 0;
libxl_bitmap_init(&nodemap);
+ libxl_bitmap_init(&suitable_nodemap);
libxl__numa_candidate_init(&new_cndt);
/* Get platform info and prepare the map for testing the combinations */
@@ -300,6 +320,15 @@ int libxl__get_numa_candidate(libxl__gc *gc,
if (rc)
goto out;
+ /* Allocate and prepare the map of the node that can be utilized for
+ * placement, basing on the map of suitable cpus. */
+ rc = libxl_node_bitmap_alloc(CTX, &suitable_nodemap, 0);
+ if (rc)
+ goto out;
+ rc = libxl_cpumap_to_nodemap(CTX, suitable_cpumap, &suitable_nodemap);
+ if (rc)
+ goto out;
+
/*
* If the minimum number of NUMA nodes is not explicitly specified
* (i.e., min_nodes == 0), we try to figure out a sensible number of nodes
@@ -317,10 +346,14 @@ int libxl__get_numa_candidate(libxl__gc *gc,
else
min_nodes = (min_cpus + cpus_per_node - 1) / cpus_per_node;
}
- if (min_nodes > nr_nodes)
- min_nodes = nr_nodes;
- if (!max_nodes || max_nodes > nr_nodes)
- max_nodes = nr_nodes;
+ /* We also need to be sure we do not exceed the number of
+ * nodes we are allowed to use. */
+ nr_suit_nodes = libxl_bitmap_count_set(&suitable_nodemap);
+
+ if (min_nodes > nr_suit_nodes)
+ min_nodes = nr_suit_nodes;
+ if (!max_nodes || max_nodes > nr_suit_nodes)
+ max_nodes = nr_suit_nodes;
if (min_nodes > max_nodes) {
LOG(ERROR, "Inconsistent minimum or maximum number of guest nodes");
rc = ERROR_INVAL;
@@ -353,12 +386,16 @@ int libxl__get_numa_candidate(libxl__gc *gc,
* amount of free memory and number of cpus) and it can concur to
* become our best placement iff it passes the check.
*/
- for (comb_ok = comb_init(gc, &comb_iter, nr_nodes, min_nodes); comb_ok;
- comb_ok = comb_next(comb_iter, nr_nodes, min_nodes)) {
+ for (comb_ok = comb_init(gc, &comb_iter, nr_suit_nodes, min_nodes);
+ comb_ok;
+ comb_ok = comb_next(comb_iter, nr_suit_nodes, min_nodes)) {
uint32_t nodes_free_memkb;
int nodes_cpus;
- comb_get_nodemap(comb_iter, &nodemap, min_nodes);
+ /* Get the nodemap for the combination, only considering
+ * suitable nodes. */
+ comb_get_nodemap(comb_iter, &suitable_nodemap,
+ &nodemap, min_nodes);
/* If there is not enough memory in this combination, skip it
* and go generating the next one... */
@@ -367,7 +404,8 @@ int libxl__get_numa_candidate(libxl__gc *gc,
continue;
/* And the same applies if this combination is short in cpus */
- nodes_cpus = nodemap_to_nr_cpus(tinfo, nr_cpus, &nodemap);
+ nodes_cpus = nodemap_to_nr_cpus(tinfo, nr_cpus, suitable_cpumap,
+ &nodemap);
if (min_cpus && nodes_cpus < min_cpus)
continue;
@@ -378,7 +416,7 @@ int libxl__get_numa_candidate(libxl__gc *gc,
libxl__numa_candidate_put_nodemap(gc, &new_cndt, &nodemap);
new_cndt.nr_vcpus = nodemap_to_nr_vcpus(gc, tinfo, &nodemap);
new_cndt.free_memkb = nodes_free_memkb;
- new_cndt.nr_nodes = min_nodes;
+ new_cndt.nr_nodes = libxl_bitmap_count_set(&nodemap);
new_cndt.nr_cpus = nodes_cpus;
/*
@@ -392,8 +430,9 @@ int libxl__get_numa_candidate(libxl__gc *gc,
LOG(DEBUG, "New best NUMA placement candidate found: "
"nr_nodes=%d, nr_cpus=%d, nr_vcpus=%d, "
- "free_memkb=%"PRIu32"", min_nodes, new_cndt.nr_cpus,
- new_cndt.nr_vcpus, new_cndt.free_memkb / 1024);
+ "free_memkb=%"PRIu32"", new_cndt.nr_nodes,
+ new_cndt.nr_cpus, new_cndt.nr_vcpus,
+ new_cndt.free_memkb / 1024);
libxl__numa_candidate_put_nodemap(gc, cndt_out, &nodemap);
cndt_out->nr_vcpus = new_cndt.nr_vcpus;
@@ -413,6 +452,7 @@ int libxl__get_numa_candidate(libxl__gc *gc,
out:
libxl_bitmap_dispose(&nodemap);
+ libxl_bitmap_dispose(&suitable_nodemap);
libxl__numa_candidate_dispose(&new_cndt);
libxl_numainfo_list_free(ninfo, nr_nodes);
libxl_cputopology_list_free(tinfo, nr_cpus);