(( h1 dom h2 ) and (( l1 eq l2 ) or (t1 == mls_priv)));
# all the domain "read" ops
-mlsconstrain domain { getvcpuaffinity getdomaininfo getvcpuinfo getvcpucontext getaddrsize getextvcpucontext }
+mlsconstrain domain { getaffinity getdomaininfo getvcpuinfo getvcpucontext getaddrsize getextvcpucontext }
((l1 dom l2) or (t1 == mls_priv));
# all the domain "write" ops
-mlsconstrain domain { setvcpucontext pause unpause resume create max_vcpus destroy setvcpuaffinity scheduler setdomainmaxmem setdomainhandle setdebugging hypercall settime set_target shutdown setaddrsize trigger setextvcpucontext }
+mlsconstrain domain { setvcpucontext pause unpause resume create max_vcpus destroy setaffinity scheduler setdomainmaxmem setdomainhandle setdebugging hypercall settime set_target shutdown setaddrsize trigger setextvcpucontext }
((l1 eq l2) or (t1 == mls_priv));
# This is incomplete - similar constraints must be written for all classes
allow $1 $2:domain { create max_vcpus setdomainmaxmem setaddrsize
getdomaininfo hypercall setvcpucontext setextvcpucontext
getscheduler getvcpuinfo getvcpuextstate getaddrsize
- getvcpuaffinity setvcpuaffinity };
+ getaffinity setaffinity };
allow $1 $2:domain2 { set_cpuid settsc setscheduler };
allow $1 $2:security check_context;
allow $1 $2:shadow enable;
# manage_domain(priv, target)
# Allow managing a running domain
define(`manage_domain', `
- allow $1 $2:domain { getdomaininfo getvcpuinfo getvcpuaffinity
+ allow $1 $2:domain { getdomaininfo getvcpuinfo getaffinity
getaddrsize pause unpause trigger shutdown destroy
- setvcpuaffinity setdomainmaxmem getscheduler };
+ setaffinity setdomainmaxmem getscheduler };
')
# migrate_domain_out(priv, target)
# Allow dom0 to use these domctls on itself. For domctls acting on other
# domains, see the definitions of create_domain and manage_domain.
allow dom0_t dom0_t:domain {
- setvcpucontext max_vcpus setvcpuaffinity getvcpuaffinity getscheduler
+ setvcpucontext max_vcpus setaffinity getaffinity getscheduler
getdomaininfo getvcpuinfo getvcpucontext setdomainmaxmem setdomainhandle
setdebugging hypercall settime setaddrsize getaddrsize trigger
getextvcpucontext setextvcpucontext getvcpuextstate setvcpuextstate
spin_lock_init(&d->node_affinity_lock);
d->node_affinity = NODE_MASK_ALL;
+ d->auto_node_affinity = 1;
spin_lock_init(&d->shutdown_lock);
d->shutdown_code = -1;
cpumask_or(cpumask, cpumask, online_affinity);
}
- for_each_online_node ( node )
- if ( cpumask_intersects(&node_to_cpumask(node), cpumask) )
- node_set(node, nodemask);
+ if ( d->auto_node_affinity )
+ {
+ /* Node-affinity is automaically computed from all vcpu-affinities */
+ for_each_online_node ( node )
+ if ( cpumask_intersects(&node_to_cpumask(node), cpumask) )
+ node_set(node, nodemask);
+
+ d->node_affinity = nodemask;
+ }
+ else
+ {
+ /* Node-affinity is provided by someone else, just filter out cpus
+ * that are either offline or not in the affinity of any vcpus. */
+ nodemask = d->node_affinity;
+ for_each_node_mask ( node, d->node_affinity )
+ if ( !cpumask_intersects(&node_to_cpumask(node), cpumask) )
+ node_clear(node, nodemask);//d->node_affinity);
+
+ /* Avoid loosing track of node-affinity because of a bad
+ * vcpu-affinity has been specified. */
+ if ( !nodes_empty(nodemask) )
+ d->node_affinity = nodemask;
+ }
+
+ sched_set_node_affinity(d, &d->node_affinity);
- d->node_affinity = nodemask;
spin_unlock(&d->node_affinity_lock);
free_cpumask_var(online_affinity);
}
+int domain_set_node_affinity(struct domain *d, const nodemask_t *affinity)
+{
+ /* Being affine with no nodes is just wrong */
+ if ( nodes_empty(*affinity) )
+ return -EINVAL;
+
+ spin_lock(&d->node_affinity_lock);
+
+ /*
+ * Being/becoming explicitly affine to all nodes is not particularly
+ * useful. Let's take it as the `reset node affinity` command.
+ */
+ if ( nodes_full(*affinity) )
+ {
+ d->auto_node_affinity = 1;
+ goto out;
+ }
+
+ d->auto_node_affinity = 0;
+ d->node_affinity = *affinity;
+
+out:
+ spin_unlock(&d->node_affinity_lock);
+
+ domain_update_node_affinity(d);
+
+ return 0;
+}
+
+
struct domain *get_domain_by_id(domid_t dom)
{
struct domain *d;
}
break;
+ case XEN_DOMCTL_setnodeaffinity:
+ {
+ nodemask_t new_affinity;
+
+ ret = xenctl_bitmap_to_nodemask(&new_affinity,
+ &op->u.nodeaffinity.nodemap);
+ if ( !ret )
+ ret = domain_set_node_affinity(d, &new_affinity);
+ }
+ break;
+ case XEN_DOMCTL_getnodeaffinity:
+ {
+ ret = nodemask_to_xenctl_bitmap(&op->u.nodeaffinity.nodemap,
+ &d->node_affinity);
+ }
+ break;
+
case XEN_DOMCTL_setvcpuaffinity:
case XEN_DOMCTL_getvcpuaffinity:
{
*set++ = '\0';
}
+static void nodeset_print(char *set, int size, const nodemask_t *mask)
+{
+ *set++ = '[';
+ set += nodelist_scnprintf(set, size-2, mask);
+ *set++ = ']';
+ *set++ = '\0';
+}
+
static void periodic_timer_print(char *str, int size, uint64_t period)
{
if ( period == 0 )
dump_pageframe_info(d);
+ nodeset_print(tmpstr, sizeof(tmpstr), &d->node_affinity);
+ printk("NODE affinity for domain %d: %s\n", d->domain_id, tmpstr);
+
printk("VCPU information and callbacks for domain %u:\n",
d->domain_id);
for_each_vcpu ( d, v )
list_del_init(&svc->runq_elem);
}
+/*
+ * Translates node-affinity mask into a cpumask, so that we can use it during
+ * actual scheduling. That of course will contain all the cpus from all the
+ * set nodes in the original node-affinity mask.
+ *
+ * Note that any serialization needed to access mask safely is complete
+ * responsibility of the caller of this function/hook.
+ */
+static void csched_set_node_affinity(
+ const struct scheduler *ops,
+ struct domain *d,
+ nodemask_t *mask)
+{
+ struct csched_dom *sdom;
+ int node;
+
+ /* Skip idle domain since it doesn't even have a node_affinity_cpumask */
+ if ( unlikely(is_idle_domain(d)) )
+ return;
+
+ sdom = CSCHED_DOM(d);
+ cpumask_clear(sdom->node_affinity_cpumask);
+ for_each_node_mask( node, *mask )
+ cpumask_or(sdom->node_affinity_cpumask, sdom->node_affinity_cpumask,
+ &node_to_cpumask(node));
+}
+
#define for_each_csched_balance_step(step) \
for ( (step) = 0; (step) <= CSCHED_BALANCE_CPU_AFFINITY; (step)++ )
/*
* vcpu-affinity balancing is always necessary and must never be skipped.
- * OTOH, if a domain's node-affinity spans all the nodes, we can safely
- * avoid dealing with node-affinity entirely.
+ * OTOH, if a domain's node-affinity is said to be automatically computed
+ * (or if it just spans all the nodes), we can safely avoid dealing with
+ * node-affinity entirely. Ah, node-affinity is also deemed meaningless
+ * in case it has empty intersection with the vcpu's vcpu-affinity, as it
+ * would mean trying to schedule it on _no_ pcpu!
*/
-#define __vcpu_has_node_affinity(vc) \
- ( !cpumask_full(CSCHED_DOM(vc->domain)->node_affinity_cpumask) )
+#define __vcpu_has_node_affinity(vc) \
+ ( !(cpumask_full(CSCHED_DOM(vc->domain)->node_affinity_cpumask) \
+ || !cpumask_intersects(vc->cpu_affinity, \
+ CSCHED_DOM(vc->domain)->node_affinity_cpumask) \
+ || vc->domain->auto_node_affinity == 1) )
/*
* Each csched-balance step uses its own cpumask. This function determines
csched_balance_cpumask(const struct vcpu *vc, int step, cpumask_t *mask)
{
if ( step == CSCHED_BALANCE_NODE_AFFINITY )
+ {
cpumask_and(mask, CSCHED_DOM(vc->domain)->node_affinity_cpumask,
vc->cpu_affinity);
+
+ if ( unlikely(cpumask_empty(mask)) )
+ cpumask_copy(mask, vc->cpu_affinity);
+ }
else /* step == CSCHED_BALANCE_CPU_AFFINITY */
cpumask_copy(mask, vc->cpu_affinity);
}
.adjust = csched_dom_cntl,
.adjust_global = csched_sys_cntl,
+ .set_node_affinity = csched_set_node_affinity,
+
.pick_cpu = csched_cpu_pick,
.do_schedule = csched_schedule,
return ret;
}
+void sched_set_node_affinity(struct domain *d, nodemask_t *mask)
+{
+ SCHED_OP(DOM2OP(d), set_node_affinity, d, mask);
+}
+
int vcpu_set_affinity(struct vcpu *v, const cpumask_t *affinity)
{
cpumask_t online_affinity;
DEFINE_XEN_GUEST_HANDLE(xen_domctl_getvcpuinfo_t);
+/* Get/set the NUMA node(s) with which the guest has affinity with. */
+/* XEN_DOMCTL_setnodeaffinity */
+/* XEN_DOMCTL_getnodeaffinity */
+struct xen_domctl_nodeaffinity {
+ struct xenctl_bitmap nodemap;/* IN */
+};
+typedef struct xen_domctl_nodeaffinity xen_domctl_nodeaffinity_t;
+DEFINE_XEN_GUEST_HANDLE(xen_domctl_nodeaffinity_t);
+
+
/* Get/set which physical cpus a vcpu can execute on. */
/* XEN_DOMCTL_setvcpuaffinity */
/* XEN_DOMCTL_getvcpuaffinity */
#define XEN_DOMCTL_audit_p2m 65
#define XEN_DOMCTL_set_virq_handler 66
#define XEN_DOMCTL_set_broken_page_p2m 67
+#define XEN_DOMCTL_setnodeaffinity 68
+#define XEN_DOMCTL_getnodeaffinity 69
#define XEN_DOMCTL_gdbsx_guestmemio 1000
#define XEN_DOMCTL_gdbsx_pausevcpu 1001
#define XEN_DOMCTL_gdbsx_unpausevcpu 1002
struct xen_domctl_getpageframeinfo getpageframeinfo;
struct xen_domctl_getpageframeinfo2 getpageframeinfo2;
struct xen_domctl_getpageframeinfo3 getpageframeinfo3;
+ struct xen_domctl_nodeaffinity nodeaffinity;
struct xen_domctl_vcpuaffinity vcpuaffinity;
struct xen_domctl_shadow_op shadow_op;
struct xen_domctl_max_mem max_mem;
* See detailed comments in the file linux/bitmap.h describing the
* data type on which these nodemasks are based.
*
- * For details of nodemask_scnprintf() and nodemask_parse(),
- * see bitmap_scnprintf() and bitmap_parse() in lib/bitmap.c.
+ * For details of nodemask_scnprintf(), nodelist_scnpintf() and
+ * nodemask_parse(), see bitmap_scnprintf() and bitmap_parse()
+ * in lib/bitmap.c.
*
* The available nodemask operations are:
*
* unsigned long *nodes_addr(mask) Array of unsigned long's in mask
*
* int nodemask_scnprintf(buf, len, mask) Format nodemask for printing
+ * int nodelist_scnprintf(buf, len, mask) Format nodemask as a list for printing
* int nodemask_parse(ubuf, ulen, mask) Parse ascii string as nodemask
*
* for_each_node_mask(node, mask) for-loop node over mask
#define nodes_addr(src) ((src).bits)
+#define nodelist_scnprintf(buf, len, src) \
+ __nodelist_scnprintf((buf), (len), (src), MAX_NUMNODES)
+static inline int __nodelist_scnprintf(char *buf, int len,
+ const nodemask_t *srcp, int nbits)
+{
+ return bitmap_scnlistprintf(buf, len, srcp->bits, nbits);
+}
+
#if 0
#define nodemask_scnprintf(buf, len, src) \
__nodemask_scnprintf((buf), (len), &(src), MAX_NUMNODES)
struct xen_domctl_scheduler_op *);
int (*adjust_global) (const struct scheduler *,
struct xen_sysctl_scheduler_op *);
+ void (*set_node_affinity) (const struct scheduler *,
+ struct domain *, nodemask_t *);
void (*dump_settings) (const struct scheduler *);
void (*dump_cpu_state) (const struct scheduler *, int);
/* Does this guest need iommu mappings? */
bool_t need_iommu;
#endif
+ /* is node-affinity automatically computed? */
+ bool_t auto_node_affinity;
/* Is this guest fully privileged (aka dom0)? */
bool_t is_privileged;
/* Which guest this guest has privileges on */
/* Various mem_events */
struct mem_event_per_domain *mem_event;
- /* Currently computed from union of all vcpu cpu-affinity masks. */
+ /*
+ * Can be specified by the user. If that is not the case, it is
+ * computed from the union of all the vcpu cpu-affinity masks.
+ */
nodemask_t node_affinity;
unsigned int last_alloc_node;
spinlock_t node_affinity_lock;
ASSERT(!(atomic_read(&d->refcnt) & DOMAIN_DESTROYED));
}
+int domain_set_node_affinity(struct domain *d, const nodemask_t *affinity);
void domain_update_node_affinity(struct domain *d);
struct domain *domain_create(
int sched_move_domain(struct domain *d, struct cpupool *c);
long sched_adjust(struct domain *, struct xen_domctl_scheduler_op *);
long sched_adjust_global(struct xen_sysctl_scheduler_op *);
+void sched_set_node_affinity(struct domain *, nodemask_t *);
int sched_id(void);
void sched_tick_suspend(void);
void sched_tick_resume(void);
return current_has_perm(d, SECCLASS_DOMAIN, DOMAIN__UNPAUSE);
case XEN_DOMCTL_setvcpuaffinity:
- return current_has_perm(d, SECCLASS_DOMAIN, DOMAIN__SETVCPUAFFINITY);
+ case XEN_DOMCTL_setnodeaffinity:
+ return current_has_perm(d, SECCLASS_DOMAIN, DOMAIN__SETAFFINITY);
case XEN_DOMCTL_getvcpuaffinity:
- return current_has_perm(d, SECCLASS_DOMAIN, DOMAIN__GETVCPUAFFINITY);
+ case XEN_DOMCTL_getnodeaffinity:
+ return current_has_perm(d, SECCLASS_DOMAIN, DOMAIN__GETAFFINITY);
case XEN_DOMCTL_resumedomain:
return current_has_perm(d, SECCLASS_DOMAIN, DOMAIN__RESUME);
# XEN_DOMCTL_destroydomain
destroy
# XEN_DOMCTL_setvcpuaffinity
- setvcpuaffinity
+# XEN_DOMCTL_setnodeaffinity
+ setaffinity
# XEN_DOMCTL_getvcpuaffinity
- getvcpuaffinity
+# XEN_DOMCTL_getnodeaffinity
+ getaffinity
# XEN_DOMCTL_scheduler_op with XEN_DOMCTL_SCHEDOP_getinfo
getscheduler
# XEN_DOMCTL_getdomaininfo, XEN_SYSCTL_getdomaininfolist