aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--tools/flask/policy/policy/mls4
-rw-r--r--tools/flask/policy/policy/modules/xen/xen.if6
-rw-r--r--tools/flask/policy/policy/modules/xen/xen.te2
-rw-r--r--xen/common/domain.c60
-rw-r--r--xen/common/domctl.c17
-rw-r--r--xen/common/keyhandler.c11
-rw-r--r--xen/common/sched_credit.c48
-rw-r--r--xen/common/schedule.c5
-rw-r--r--xen/include/public/domctl.h13
-rw-r--r--xen/include/xen/nodemask.h14
-rw-r--r--xen/include/xen/sched-if.h2
-rw-r--r--xen/include/xen/sched.h9
-rw-r--r--xen/xsm/flask/hooks.c6
-rw-r--r--xen/xsm/flask/policy/access_vectors6
14 files changed, 182 insertions, 21 deletions
diff --git a/tools/flask/policy/policy/mls b/tools/flask/policy/policy/mls
index a3dde706b4..9290a76ae9 100644
--- a/tools/flask/policy/policy/mls
+++ b/tools/flask/policy/policy/mls
@@ -70,11 +70,11 @@ mlsconstrain domain transition
(( h1 dom h2 ) and (( l1 eq l2 ) or (t1 == mls_priv)));
# all the domain "read" ops
-mlsconstrain domain { getvcpuaffinity getdomaininfo getvcpuinfo getvcpucontext getaddrsize getextvcpucontext }
+mlsconstrain domain { getaffinity getdomaininfo getvcpuinfo getvcpucontext getaddrsize getextvcpucontext }
((l1 dom l2) or (t1 == mls_priv));
# all the domain "write" ops
-mlsconstrain domain { setvcpucontext pause unpause resume create max_vcpus destroy setvcpuaffinity scheduler setdomainmaxmem setdomainhandle setdebugging hypercall settime set_target shutdown setaddrsize trigger setextvcpucontext }
+mlsconstrain domain { setvcpucontext pause unpause resume create max_vcpus destroy setaffinity scheduler setdomainmaxmem setdomainhandle setdebugging hypercall settime set_target shutdown setaddrsize trigger setextvcpucontext }
((l1 eq l2) or (t1 == mls_priv));
# This is incomplete - similar constraints must be written for all classes
diff --git a/tools/flask/policy/policy/modules/xen/xen.if b/tools/flask/policy/policy/modules/xen/xen.if
index 2ce22125c1..18647c9327 100644
--- a/tools/flask/policy/policy/modules/xen/xen.if
+++ b/tools/flask/policy/policy/modules/xen/xen.if
@@ -48,7 +48,7 @@ define(`create_domain_common', `
allow $1 $2:domain { create max_vcpus setdomainmaxmem setaddrsize
getdomaininfo hypercall setvcpucontext setextvcpucontext
getscheduler getvcpuinfo getvcpuextstate getaddrsize
- getvcpuaffinity setvcpuaffinity };
+ getaffinity setaffinity };
allow $1 $2:domain2 { set_cpuid settsc setscheduler };
allow $1 $2:security check_context;
allow $1 $2:shadow enable;
@@ -77,9 +77,9 @@ define(`create_domain_build_label', `
# manage_domain(priv, target)
# Allow managing a running domain
define(`manage_domain', `
- allow $1 $2:domain { getdomaininfo getvcpuinfo getvcpuaffinity
+ allow $1 $2:domain { getdomaininfo getvcpuinfo getaffinity
getaddrsize pause unpause trigger shutdown destroy
- setvcpuaffinity setdomainmaxmem getscheduler };
+ setaffinity setdomainmaxmem getscheduler };
')
# migrate_domain_out(priv, target)
diff --git a/tools/flask/policy/policy/modules/xen/xen.te b/tools/flask/policy/policy/modules/xen/xen.te
index 454e27ec71..c89ce28765 100644
--- a/tools/flask/policy/policy/modules/xen/xen.te
+++ b/tools/flask/policy/policy/modules/xen/xen.te
@@ -69,7 +69,7 @@ allow dom0_t xen_t:mmu memorymap;
# Allow dom0 to use these domctls on itself. For domctls acting on other
# domains, see the definitions of create_domain and manage_domain.
allow dom0_t dom0_t:domain {
- setvcpucontext max_vcpus setvcpuaffinity getvcpuaffinity getscheduler
+ setvcpucontext max_vcpus setaffinity getaffinity getscheduler
getdomaininfo getvcpuinfo getvcpucontext setdomainmaxmem setdomainhandle
setdebugging hypercall settime setaddrsize getaddrsize trigger
getextvcpucontext setextvcpucontext getvcpuextstate setvcpuextstate
diff --git a/xen/common/domain.c b/xen/common/domain.c
index 590548e101..ce45d66b45 100644
--- a/xen/common/domain.c
+++ b/xen/common/domain.c
@@ -224,6 +224,7 @@ struct domain *domain_create(
spin_lock_init(&d->node_affinity_lock);
d->node_affinity = NODE_MASK_ALL;
+ d->auto_node_affinity = 1;
spin_lock_init(&d->shutdown_lock);
d->shutdown_code = -1;
@@ -364,11 +365,32 @@ void domain_update_node_affinity(struct domain *d)
cpumask_or(cpumask, cpumask, online_affinity);
}
- for_each_online_node ( node )
- if ( cpumask_intersects(&node_to_cpumask(node), cpumask) )
- node_set(node, nodemask);
+ if ( d->auto_node_affinity )
+ {
+ /* Node-affinity is automaically computed from all vcpu-affinities */
+ for_each_online_node ( node )
+ if ( cpumask_intersects(&node_to_cpumask(node), cpumask) )
+ node_set(node, nodemask);
+
+ d->node_affinity = nodemask;
+ }
+ else
+ {
+ /* Node-affinity is provided by someone else, just filter out cpus
+ * that are either offline or not in the affinity of any vcpus. */
+ nodemask = d->node_affinity;
+ for_each_node_mask ( node, d->node_affinity )
+ if ( !cpumask_intersects(&node_to_cpumask(node), cpumask) )
+ node_clear(node, nodemask);//d->node_affinity);
+
+ /* Avoid loosing track of node-affinity because of a bad
+ * vcpu-affinity has been specified. */
+ if ( !nodes_empty(nodemask) )
+ d->node_affinity = nodemask;
+ }
+
+ sched_set_node_affinity(d, &d->node_affinity);
- d->node_affinity = nodemask;
spin_unlock(&d->node_affinity_lock);
free_cpumask_var(online_affinity);
@@ -376,6 +398,36 @@ void domain_update_node_affinity(struct domain *d)
}
+int domain_set_node_affinity(struct domain *d, const nodemask_t *affinity)
+{
+ /* Being affine with no nodes is just wrong */
+ if ( nodes_empty(*affinity) )
+ return -EINVAL;
+
+ spin_lock(&d->node_affinity_lock);
+
+ /*
+ * Being/becoming explicitly affine to all nodes is not particularly
+ * useful. Let's take it as the `reset node affinity` command.
+ */
+ if ( nodes_full(*affinity) )
+ {
+ d->auto_node_affinity = 1;
+ goto out;
+ }
+
+ d->auto_node_affinity = 0;
+ d->node_affinity = *affinity;
+
+out:
+ spin_unlock(&d->node_affinity_lock);
+
+ domain_update_node_affinity(d);
+
+ return 0;
+}
+
+
struct domain *get_domain_by_id(domid_t dom)
{
struct domain *d;
diff --git a/xen/common/domctl.c b/xen/common/domctl.c
index 37532c25c6..6bd8efdf06 100644
--- a/xen/common/domctl.c
+++ b/xen/common/domctl.c
@@ -560,6 +560,23 @@ long do_domctl(XEN_GUEST_HANDLE_PARAM(xen_domctl_t) u_domctl)
}
break;
+ case XEN_DOMCTL_setnodeaffinity:
+ {
+ nodemask_t new_affinity;
+
+ ret = xenctl_bitmap_to_nodemask(&new_affinity,
+ &op->u.nodeaffinity.nodemap);
+ if ( !ret )
+ ret = domain_set_node_affinity(d, &new_affinity);
+ }
+ break;
+ case XEN_DOMCTL_getnodeaffinity:
+ {
+ ret = nodemask_to_xenctl_bitmap(&op->u.nodeaffinity.nodemap,
+ &d->node_affinity);
+ }
+ break;
+
case XEN_DOMCTL_setvcpuaffinity:
case XEN_DOMCTL_getvcpuaffinity:
{
diff --git a/xen/common/keyhandler.c b/xen/common/keyhandler.c
index e9ef45f462..507213360c 100644
--- a/xen/common/keyhandler.c
+++ b/xen/common/keyhandler.c
@@ -218,6 +218,14 @@ static void cpuset_print(char *set, int size, const cpumask_t *mask)
*set++ = '\0';
}
+static void nodeset_print(char *set, int size, const nodemask_t *mask)
+{
+ *set++ = '[';
+ set += nodelist_scnprintf(set, size-2, mask);
+ *set++ = ']';
+ *set++ = '\0';
+}
+
static void periodic_timer_print(char *str, int size, uint64_t period)
{
if ( period == 0 )
@@ -273,6 +281,9 @@ static void dump_domains(unsigned char key)
dump_pageframe_info(d);
+ nodeset_print(tmpstr, sizeof(tmpstr), &d->node_affinity);
+ printk("NODE affinity for domain %d: %s\n", d->domain_id, tmpstr);
+
printk("VCPU information and callbacks for domain %u:\n",
d->domain_id);
for_each_vcpu ( d, v )
diff --git a/xen/common/sched_credit.c b/xen/common/sched_credit.c
index be0b4b0c6c..318831054d 100644
--- a/xen/common/sched_credit.c
+++ b/xen/common/sched_credit.c
@@ -261,17 +261,50 @@ __runq_remove(struct csched_vcpu *svc)
list_del_init(&svc->runq_elem);
}
+/*
+ * Translates node-affinity mask into a cpumask, so that we can use it during
+ * actual scheduling. That of course will contain all the cpus from all the
+ * set nodes in the original node-affinity mask.
+ *
+ * Note that any serialization needed to access mask safely is complete
+ * responsibility of the caller of this function/hook.
+ */
+static void csched_set_node_affinity(
+ const struct scheduler *ops,
+ struct domain *d,
+ nodemask_t *mask)
+{
+ struct csched_dom *sdom;
+ int node;
+
+ /* Skip idle domain since it doesn't even have a node_affinity_cpumask */
+ if ( unlikely(is_idle_domain(d)) )
+ return;
+
+ sdom = CSCHED_DOM(d);
+ cpumask_clear(sdom->node_affinity_cpumask);
+ for_each_node_mask( node, *mask )
+ cpumask_or(sdom->node_affinity_cpumask, sdom->node_affinity_cpumask,
+ &node_to_cpumask(node));
+}
+
#define for_each_csched_balance_step(step) \
for ( (step) = 0; (step) <= CSCHED_BALANCE_CPU_AFFINITY; (step)++ )
/*
* vcpu-affinity balancing is always necessary and must never be skipped.
- * OTOH, if a domain's node-affinity spans all the nodes, we can safely
- * avoid dealing with node-affinity entirely.
+ * OTOH, if a domain's node-affinity is said to be automatically computed
+ * (or if it just spans all the nodes), we can safely avoid dealing with
+ * node-affinity entirely. Ah, node-affinity is also deemed meaningless
+ * in case it has empty intersection with the vcpu's vcpu-affinity, as it
+ * would mean trying to schedule it on _no_ pcpu!
*/
-#define __vcpu_has_node_affinity(vc) \
- ( !cpumask_full(CSCHED_DOM(vc->domain)->node_affinity_cpumask) )
+#define __vcpu_has_node_affinity(vc) \
+ ( !(cpumask_full(CSCHED_DOM(vc->domain)->node_affinity_cpumask) \
+ || !cpumask_intersects(vc->cpu_affinity, \
+ CSCHED_DOM(vc->domain)->node_affinity_cpumask) \
+ || vc->domain->auto_node_affinity == 1) )
/*
* Each csched-balance step uses its own cpumask. This function determines
@@ -284,8 +317,13 @@ static void
csched_balance_cpumask(const struct vcpu *vc, int step, cpumask_t *mask)
{
if ( step == CSCHED_BALANCE_NODE_AFFINITY )
+ {
cpumask_and(mask, CSCHED_DOM(vc->domain)->node_affinity_cpumask,
vc->cpu_affinity);
+
+ if ( unlikely(cpumask_empty(mask)) )
+ cpumask_copy(mask, vc->cpu_affinity);
+ }
else /* step == CSCHED_BALANCE_CPU_AFFINITY */
cpumask_copy(mask, vc->cpu_affinity);
}
@@ -1898,6 +1936,8 @@ const struct scheduler sched_credit_def = {
.adjust = csched_dom_cntl,
.adjust_global = csched_sys_cntl,
+ .set_node_affinity = csched_set_node_affinity,
+
.pick_cpu = csched_cpu_pick,
.do_schedule = csched_schedule,
diff --git a/xen/common/schedule.c b/xen/common/schedule.c
index 7364ff8a7d..c1cd3d0f15 100644
--- a/xen/common/schedule.c
+++ b/xen/common/schedule.c
@@ -638,6 +638,11 @@ int cpu_disable_scheduler(unsigned int cpu)
return ret;
}
+void sched_set_node_affinity(struct domain *d, nodemask_t *mask)
+{
+ SCHED_OP(DOM2OP(d), set_node_affinity, d, mask);
+}
+
int vcpu_set_affinity(struct vcpu *v, const cpumask_t *affinity)
{
cpumask_t online_affinity;
diff --git a/xen/include/public/domctl.h b/xen/include/public/domctl.h
index 9f5ed48a0e..4c5b2bbbbd 100644
--- a/xen/include/public/domctl.h
+++ b/xen/include/public/domctl.h
@@ -280,6 +280,16 @@ typedef struct xen_domctl_getvcpuinfo xen_domctl_getvcpuinfo_t;
DEFINE_XEN_GUEST_HANDLE(xen_domctl_getvcpuinfo_t);
+/* Get/set the NUMA node(s) with which the guest has affinity with. */
+/* XEN_DOMCTL_setnodeaffinity */
+/* XEN_DOMCTL_getnodeaffinity */
+struct xen_domctl_nodeaffinity {
+ struct xenctl_bitmap nodemap;/* IN */
+};
+typedef struct xen_domctl_nodeaffinity xen_domctl_nodeaffinity_t;
+DEFINE_XEN_GUEST_HANDLE(xen_domctl_nodeaffinity_t);
+
+
/* Get/set which physical cpus a vcpu can execute on. */
/* XEN_DOMCTL_setvcpuaffinity */
/* XEN_DOMCTL_getvcpuaffinity */
@@ -908,6 +918,8 @@ struct xen_domctl {
#define XEN_DOMCTL_audit_p2m 65
#define XEN_DOMCTL_set_virq_handler 66
#define XEN_DOMCTL_set_broken_page_p2m 67
+#define XEN_DOMCTL_setnodeaffinity 68
+#define XEN_DOMCTL_getnodeaffinity 69
#define XEN_DOMCTL_gdbsx_guestmemio 1000
#define XEN_DOMCTL_gdbsx_pausevcpu 1001
#define XEN_DOMCTL_gdbsx_unpausevcpu 1002
@@ -921,6 +933,7 @@ struct xen_domctl {
struct xen_domctl_getpageframeinfo getpageframeinfo;
struct xen_domctl_getpageframeinfo2 getpageframeinfo2;
struct xen_domctl_getpageframeinfo3 getpageframeinfo3;
+ struct xen_domctl_nodeaffinity nodeaffinity;
struct xen_domctl_vcpuaffinity vcpuaffinity;
struct xen_domctl_shadow_op shadow_op;
struct xen_domctl_max_mem max_mem;
diff --git a/xen/include/xen/nodemask.h b/xen/include/xen/nodemask.h
index 1b860165ce..2a90dc1aef 100644
--- a/xen/include/xen/nodemask.h
+++ b/xen/include/xen/nodemask.h
@@ -8,8 +8,9 @@
* See detailed comments in the file linux/bitmap.h describing the
* data type on which these nodemasks are based.
*
- * For details of nodemask_scnprintf() and nodemask_parse(),
- * see bitmap_scnprintf() and bitmap_parse() in lib/bitmap.c.
+ * For details of nodemask_scnprintf(), nodelist_scnpintf() and
+ * nodemask_parse(), see bitmap_scnprintf() and bitmap_parse()
+ * in lib/bitmap.c.
*
* The available nodemask operations are:
*
@@ -50,6 +51,7 @@
* unsigned long *nodes_addr(mask) Array of unsigned long's in mask
*
* int nodemask_scnprintf(buf, len, mask) Format nodemask for printing
+ * int nodelist_scnprintf(buf, len, mask) Format nodemask as a list for printing
* int nodemask_parse(ubuf, ulen, mask) Parse ascii string as nodemask
*
* for_each_node_mask(node, mask) for-loop node over mask
@@ -292,6 +294,14 @@ static inline int __cycle_node(int n, const nodemask_t *maskp, int nbits)
#define nodes_addr(src) ((src).bits)
+#define nodelist_scnprintf(buf, len, src) \
+ __nodelist_scnprintf((buf), (len), (src), MAX_NUMNODES)
+static inline int __nodelist_scnprintf(char *buf, int len,
+ const nodemask_t *srcp, int nbits)
+{
+ return bitmap_scnlistprintf(buf, len, srcp->bits, nbits);
+}
+
#if 0
#define nodemask_scnprintf(buf, len, src) \
__nodemask_scnprintf((buf), (len), &(src), MAX_NUMNODES)
diff --git a/xen/include/xen/sched-if.h b/xen/include/xen/sched-if.h
index 9ace22cb66..2023ea393b 100644
--- a/xen/include/xen/sched-if.h
+++ b/xen/include/xen/sched-if.h
@@ -184,6 +184,8 @@ struct scheduler {
struct xen_domctl_scheduler_op *);
int (*adjust_global) (const struct scheduler *,
struct xen_sysctl_scheduler_op *);
+ void (*set_node_affinity) (const struct scheduler *,
+ struct domain *, nodemask_t *);
void (*dump_settings) (const struct scheduler *);
void (*dump_cpu_state) (const struct scheduler *, int);
diff --git a/xen/include/xen/sched.h b/xen/include/xen/sched.h
index d15d567c9b..ad971d22fe 100644
--- a/xen/include/xen/sched.h
+++ b/xen/include/xen/sched.h
@@ -288,6 +288,8 @@ struct domain
/* Does this guest need iommu mappings? */
bool_t need_iommu;
#endif
+ /* is node-affinity automatically computed? */
+ bool_t auto_node_affinity;
/* Is this guest fully privileged (aka dom0)? */
bool_t is_privileged;
/* Which guest this guest has privileges on */
@@ -365,7 +367,10 @@ struct domain
/* Various mem_events */
struct mem_event_per_domain *mem_event;
- /* Currently computed from union of all vcpu cpu-affinity masks. */
+ /*
+ * Can be specified by the user. If that is not the case, it is
+ * computed from the union of all the vcpu cpu-affinity masks.
+ */
nodemask_t node_affinity;
unsigned int last_alloc_node;
spinlock_t node_affinity_lock;
@@ -435,6 +440,7 @@ static inline void get_knownalive_domain(struct domain *d)
ASSERT(!(atomic_read(&d->refcnt) & DOMAIN_DESTROYED));
}
+int domain_set_node_affinity(struct domain *d, const nodemask_t *affinity);
void domain_update_node_affinity(struct domain *d);
struct domain *domain_create(
@@ -555,6 +561,7 @@ void sched_destroy_domain(struct domain *d);
int sched_move_domain(struct domain *d, struct cpupool *c);
long sched_adjust(struct domain *, struct xen_domctl_scheduler_op *);
long sched_adjust_global(struct xen_sysctl_scheduler_op *);
+void sched_set_node_affinity(struct domain *, nodemask_t *);
int sched_id(void);
void sched_tick_suspend(void);
void sched_tick_resume(void);
diff --git a/xen/xsm/flask/hooks.c b/xen/xsm/flask/hooks.c
index 29a78dd06e..247c8a393b 100644
--- a/xen/xsm/flask/hooks.c
+++ b/xen/xsm/flask/hooks.c
@@ -611,10 +611,12 @@ static int flask_domctl(struct domain *d, int cmd)
return current_has_perm(d, SECCLASS_DOMAIN, DOMAIN__UNPAUSE);
case XEN_DOMCTL_setvcpuaffinity:
- return current_has_perm(d, SECCLASS_DOMAIN, DOMAIN__SETVCPUAFFINITY);
+ case XEN_DOMCTL_setnodeaffinity:
+ return current_has_perm(d, SECCLASS_DOMAIN, DOMAIN__SETAFFINITY);
case XEN_DOMCTL_getvcpuaffinity:
- return current_has_perm(d, SECCLASS_DOMAIN, DOMAIN__GETVCPUAFFINITY);
+ case XEN_DOMCTL_getnodeaffinity:
+ return current_has_perm(d, SECCLASS_DOMAIN, DOMAIN__GETAFFINITY);
case XEN_DOMCTL_resumedomain:
return current_has_perm(d, SECCLASS_DOMAIN, DOMAIN__RESUME);
diff --git a/xen/xsm/flask/policy/access_vectors b/xen/xsm/flask/policy/access_vectors
index 36cbacfa13..fdfc50245a 100644
--- a/xen/xsm/flask/policy/access_vectors
+++ b/xen/xsm/flask/policy/access_vectors
@@ -104,9 +104,11 @@ class domain
# XEN_DOMCTL_destroydomain
destroy
# XEN_DOMCTL_setvcpuaffinity
- setvcpuaffinity
+# XEN_DOMCTL_setnodeaffinity
+ setaffinity
# XEN_DOMCTL_getvcpuaffinity
- getvcpuaffinity
+# XEN_DOMCTL_getnodeaffinity
+ getaffinity
# XEN_DOMCTL_scheduler_op with XEN_DOMCTL_SCHEDOP_getinfo
getscheduler
# XEN_DOMCTL_getdomaininfo, XEN_SYSCTL_getdomaininfolist