direct-io.hg

view xen/common/dom0_ops.c @ 7357:d6e99066959a

Refactor domain/vcpu allocation to be more separated.
Signed-off-by: Keir Fraser <keir@xensource.com>
author kaf24@firebug.cl.cam.ac.uk
date Wed Oct 12 17:01:38 2005 +0100 (2005-10-12)
parents 52b9aca1916a
children bd3268de4145
line source
1 /******************************************************************************
2 * dom0_ops.c
3 *
4 * Process command requests from domain-0 guest OS.
5 *
6 * Copyright (c) 2002, K A Fraser
7 */
9 #include <xen/config.h>
10 #include <xen/types.h>
11 #include <xen/lib.h>
12 #include <xen/mm.h>
13 #include <xen/sched.h>
14 #include <xen/domain.h>
15 #include <xen/event.h>
16 #include <xen/domain_page.h>
17 #include <xen/trace.h>
18 #include <xen/console.h>
19 #include <asm/current.h>
20 #include <public/dom0_ops.h>
21 #include <public/sched_ctl.h>
22 #include <acm/acm_hooks.h>
24 extern long arch_do_dom0_op(dom0_op_t *op, dom0_op_t *u_dom0_op);
25 extern void arch_getdomaininfo_ctxt(
26 struct vcpu *, struct vcpu_guest_context *);
28 static inline int is_free_domid(domid_t dom)
29 {
30 struct domain *d;
32 if ( dom >= DOMID_FIRST_RESERVED )
33 return 0;
35 if ( (d = find_domain_by_id(dom)) == NULL )
36 return 1;
38 put_domain(d);
39 return 0;
40 }
42 static void getdomaininfo(struct domain *d, dom0_getdomaininfo_t *info)
43 {
44 struct vcpu *v;
45 u64 cpu_time = 0;
46 int vcpu_count = 0;
47 int flags = DOMFLAGS_BLOCKED;
49 info->domain = d->domain_id;
51 memset(&info->vcpu_to_cpu, -1, sizeof(info->vcpu_to_cpu));
52 memset(&info->cpumap, 0, sizeof(info->cpumap));
54 /*
55 * - domain is marked as blocked only if all its vcpus are blocked
56 * - domain is marked as running if any of its vcpus is running
57 * - only map vcpus that aren't down. Note, at some point we may
58 * wish to demux the -1 value to indicate down vs. not-ever-booted
59 */
60 for_each_vcpu ( d, v ) {
61 /* only map vcpus that are up */
62 if ( !(test_bit(_VCPUF_down, &v->vcpu_flags)) )
63 info->vcpu_to_cpu[v->vcpu_id] = v->processor;
64 info->cpumap[v->vcpu_id] = v->cpumap;
65 if ( !(v->vcpu_flags & VCPUF_blocked) )
66 flags &= ~DOMFLAGS_BLOCKED;
67 if ( v->vcpu_flags & VCPUF_running )
68 flags |= DOMFLAGS_RUNNING;
69 cpu_time += v->cpu_time;
70 vcpu_count++;
71 }
73 info->cpu_time = cpu_time;
74 info->n_vcpu = vcpu_count;
76 info->flags = flags |
77 ((d->domain_flags & DOMF_dying) ? DOMFLAGS_DYING : 0) |
78 ((d->domain_flags & DOMF_shutdown) ? DOMFLAGS_SHUTDOWN : 0) |
79 ((d->domain_flags & DOMF_ctrl_pause) ? DOMFLAGS_PAUSED : 0) |
80 d->shutdown_code << DOMFLAGS_SHUTDOWNSHIFT;
82 if (d->ssid != NULL)
83 info->ssidref = ((struct acm_ssid_domain *)d->ssid)->ssidref;
84 else
85 info->ssidref = ACM_DEFAULT_SSID;
87 info->tot_pages = d->tot_pages;
88 info->max_pages = d->max_pages;
89 info->shared_info_frame = __pa(d->shared_info) >> PAGE_SHIFT;
90 }
92 long do_dom0_op(dom0_op_t *u_dom0_op)
93 {
94 long ret = 0;
95 dom0_op_t curop, *op = &curop;
96 void *ssid = NULL; /* save security ptr between pre and post/fail hooks */
97 static spinlock_t dom0_lock = SPIN_LOCK_UNLOCKED;
99 if ( !IS_PRIV(current->domain) )
100 return -EPERM;
102 if ( copy_from_user(op, u_dom0_op, sizeof(*op)) )
103 return -EFAULT;
105 if ( op->interface_version != DOM0_INTERFACE_VERSION )
106 return -EACCES;
108 if ( acm_pre_dom0_op(op, &ssid) )
109 return -EACCES;
111 spin_lock(&dom0_lock);
113 switch ( op->cmd )
114 {
116 case DOM0_SETDOMAININFO:
117 {
118 struct domain *d = find_domain_by_id(op->u.setdomaininfo.domain);
119 ret = -ESRCH;
120 if ( d != NULL )
121 {
122 ret = set_info_guest(d, &op->u.setdomaininfo);
123 put_domain(d);
124 }
125 }
126 break;
128 case DOM0_PAUSEDOMAIN:
129 {
130 struct domain *d = find_domain_by_id(op->u.pausedomain.domain);
131 ret = -ESRCH;
132 if ( d != NULL )
133 {
134 ret = -EINVAL;
135 if ( d != current->domain )
136 {
137 domain_pause_by_systemcontroller(d);
138 ret = 0;
139 }
140 put_domain(d);
141 }
142 }
143 break;
145 case DOM0_UNPAUSEDOMAIN:
146 {
147 struct domain *d = find_domain_by_id(op->u.unpausedomain.domain);
148 ret = -ESRCH;
149 if ( d != NULL )
150 {
151 ret = -EINVAL;
152 if ( (d != current->domain) &&
153 test_bit(_VCPUF_initialised, &d->vcpu[0]->vcpu_flags) )
154 {
155 domain_unpause_by_systemcontroller(d);
156 ret = 0;
157 }
158 put_domain(d);
159 }
160 }
161 break;
163 case DOM0_CREATEDOMAIN:
164 {
165 struct domain *d;
166 unsigned int pro;
167 domid_t dom;
168 struct vcpu *v;
169 unsigned int i, cnt[NR_CPUS] = { 0 };
170 static domid_t rover = 0;
172 dom = op->u.createdomain.domain;
173 if ( (dom > 0) && (dom < DOMID_FIRST_RESERVED) )
174 {
175 ret = -EINVAL;
176 if ( !is_free_domid(dom) )
177 break;
178 }
179 else
180 {
181 for ( dom = rover + 1; dom != rover; dom++ )
182 {
183 if ( dom == DOMID_FIRST_RESERVED )
184 dom = 0;
185 if ( is_free_domid(dom) )
186 break;
187 }
189 ret = -ENOMEM;
190 if ( dom == rover )
191 break;
193 rover = dom;
194 }
196 /* Do an initial CPU placement. Pick the least-populated CPU. */
197 read_lock(&domlist_lock);
198 for_each_domain ( d )
199 for_each_vcpu ( d, v )
200 cnt[v->processor]++;
201 read_unlock(&domlist_lock);
203 /*
204 * If we're on a HT system, we only use the first HT for dom0, other
205 * domains will all share the second HT of each CPU. Since dom0 is on
206 * CPU 0, we favour high numbered CPUs in the event of a tie.
207 */
208 pro = smp_num_siblings - 1;
209 for ( i = pro; i < num_online_cpus(); i += smp_num_siblings )
210 if ( cnt[i] <= cnt[pro] )
211 pro = i;
213 ret = -ENOMEM;
214 if ( (d = do_createdomain(dom, pro)) == NULL )
215 break;
217 ret = 0;
219 op->u.createdomain.domain = d->domain_id;
220 copy_to_user(u_dom0_op, op, sizeof(*op));
221 }
222 break;
224 case DOM0_MAX_VCPUS:
225 {
226 struct domain *d;
227 unsigned int i, max = op->u.max_vcpus.max, cpu;
229 ret = -EINVAL;
230 if ( max > MAX_VIRT_CPUS )
231 break;
233 ret = -ESRCH;
234 if ( (d = find_domain_by_id(op->u.max_vcpus.domain)) == NULL )
235 break;
237 /*
238 * Can only create new VCPUs while the domain is not fully constructed
239 * (and hence not runnable). Xen needs auditing for races before
240 * removing this check.
241 */
242 ret = -EINVAL;
243 if ( test_bit(_VCPUF_initialised, &d->vcpu[0]->vcpu_flags) )
244 goto maxvcpu_out;
246 /* We cannot reduce maximum VCPUs. */
247 ret = -EINVAL;
248 if ( (max != MAX_VIRT_CPUS) && (d->vcpu[max] != NULL) )
249 goto maxvcpu_out;
251 ret = -ENOMEM;
252 for ( i = 0; i < max; i++ )
253 {
254 if ( d->vcpu[i] == NULL )
255 {
256 cpu = (d->vcpu[i-1]->processor + 1) % num_online_cpus();
257 if ( alloc_vcpu(d, i, cpu) == NULL )
258 goto maxvcpu_out;
259 }
260 }
262 ret = 0;
264 maxvcpu_out:
265 put_domain(d);
266 }
267 break;
269 case DOM0_DESTROYDOMAIN:
270 {
271 struct domain *d = find_domain_by_id(op->u.destroydomain.domain);
272 ret = -ESRCH;
273 if ( d != NULL )
274 {
275 ret = -EINVAL;
276 if ( d != current->domain )
277 {
278 domain_kill(d);
279 ret = 0;
280 }
281 put_domain(d);
282 }
283 }
284 break;
286 case DOM0_PINCPUDOMAIN:
287 {
288 domid_t dom = op->u.pincpudomain.domain;
289 struct domain *d = find_domain_by_id(dom);
290 struct vcpu *v;
291 cpumap_t cpumap;
294 if ( d == NULL )
295 {
296 ret = -ESRCH;
297 break;
298 }
300 if ( (op->u.pincpudomain.vcpu >= MAX_VIRT_CPUS) ||
301 !d->vcpu[op->u.pincpudomain.vcpu] )
302 {
303 ret = -EINVAL;
304 put_domain(d);
305 break;
306 }
308 v = d->vcpu[op->u.pincpudomain.vcpu];
309 if ( v == NULL )
310 {
311 ret = -ESRCH;
312 put_domain(d);
313 break;
314 }
316 if ( v == current )
317 {
318 ret = -EINVAL;
319 put_domain(d);
320 break;
321 }
323 if ( copy_from_user(&cpumap, op->u.pincpudomain.cpumap,
324 sizeof(cpumap)) )
325 {
326 ret = -EFAULT;
327 put_domain(d);
328 break;
329 }
331 /* update cpumap for this vcpu */
332 v->cpumap = cpumap;
334 if ( cpumap == CPUMAP_RUNANYWHERE )
335 {
336 clear_bit(_VCPUF_cpu_pinned, &v->vcpu_flags);
337 }
338 else
339 {
340 /* pick a new cpu from the usable map */
341 int new_cpu = (int)find_first_set_bit(cpumap) % num_online_cpus();
343 vcpu_pause(v);
344 vcpu_migrate_cpu(v, new_cpu);
345 set_bit(_VCPUF_cpu_pinned, &v->vcpu_flags);
346 vcpu_unpause(v);
347 }
349 put_domain(d);
350 }
351 break;
353 case DOM0_SCHEDCTL:
354 {
355 ret = sched_ctl(&op->u.schedctl);
356 copy_to_user(u_dom0_op, op, sizeof(*op));
357 }
358 break;
360 case DOM0_ADJUSTDOM:
361 {
362 ret = sched_adjdom(&op->u.adjustdom);
363 copy_to_user(u_dom0_op, op, sizeof(*op));
364 }
365 break;
367 case DOM0_GETDOMAININFO:
368 {
369 struct domain *d;
371 read_lock(&domlist_lock);
373 for_each_domain ( d )
374 {
375 if ( d->domain_id >= op->u.getdomaininfo.domain )
376 break;
377 }
379 if ( (d == NULL) || !get_domain(d) )
380 {
381 read_unlock(&domlist_lock);
382 ret = -ESRCH;
383 break;
384 }
386 read_unlock(&domlist_lock);
388 getdomaininfo(d, &op->u.getdomaininfo);
390 if ( copy_to_user(u_dom0_op, op, sizeof(*op)) )
391 ret = -EINVAL;
393 put_domain(d);
394 }
395 break;
397 case DOM0_GETDOMAININFOLIST:
398 {
399 struct domain *d;
400 dom0_getdomaininfo_t info;
401 dom0_getdomaininfo_t *buffer = op->u.getdomaininfolist.buffer;
402 u32 num_domains = 0;
404 read_lock(&domlist_lock);
406 for_each_domain ( d )
407 {
408 if ( d->domain_id < op->u.getdomaininfolist.first_domain )
409 continue;
410 if ( num_domains == op->u.getdomaininfolist.max_domains )
411 break;
412 if ( (d == NULL) || !get_domain(d) )
413 {
414 ret = -ESRCH;
415 break;
416 }
418 getdomaininfo(d, &info);
420 put_domain(d);
422 if ( copy_to_user(buffer, &info, sizeof(dom0_getdomaininfo_t)) )
423 {
424 ret = -EINVAL;
425 break;
426 }
428 buffer++;
429 num_domains++;
430 }
432 read_unlock(&domlist_lock);
434 if ( ret != 0 )
435 break;
437 op->u.getdomaininfolist.num_domains = num_domains;
439 if ( copy_to_user(u_dom0_op, op, sizeof(*op)) )
440 ret = -EINVAL;
441 }
442 break;
444 case DOM0_GETVCPUCONTEXT:
445 {
446 struct vcpu_guest_context *c;
447 struct domain *d;
448 struct vcpu *v;
449 int i;
451 d = find_domain_by_id(op->u.getvcpucontext.domain);
452 if ( d == NULL )
453 {
454 ret = -ESRCH;
455 break;
456 }
458 if ( op->u.getvcpucontext.vcpu >= MAX_VIRT_CPUS )
459 {
460 ret = -EINVAL;
461 put_domain(d);
462 break;
463 }
465 /* find first valid vcpu starting from request. */
466 v = NULL;
467 for ( i = op->u.getvcpucontext.vcpu; i < MAX_VIRT_CPUS; i++ )
468 {
469 v = d->vcpu[i];
470 if ( v != NULL && !(test_bit(_VCPUF_down, &v->vcpu_flags)) )
471 break;
472 }
474 if ( v == NULL )
475 {
476 ret = -ESRCH;
477 put_domain(d);
478 break;
479 }
481 op->u.getvcpucontext.cpu_time = v->cpu_time;
483 if ( op->u.getvcpucontext.ctxt != NULL )
484 {
485 if ( (c = xmalloc(struct vcpu_guest_context)) == NULL )
486 {
487 ret = -ENOMEM;
488 put_domain(d);
489 break;
490 }
492 if ( v != current )
493 vcpu_pause(v);
495 arch_getdomaininfo_ctxt(v,c);
497 if ( v != current )
498 vcpu_unpause(v);
500 if ( copy_to_user(op->u.getvcpucontext.ctxt, c, sizeof(*c)) )
501 ret = -EINVAL;
503 xfree(c);
504 }
506 if ( copy_to_user(u_dom0_op, op, sizeof(*op)) )
507 ret = -EINVAL;
509 put_domain(d);
510 }
511 break;
513 case DOM0_SETTIME:
514 {
515 do_settime(op->u.settime.secs,
516 op->u.settime.nsecs,
517 op->u.settime.system_time);
518 ret = 0;
519 }
520 break;
522 #ifdef TRACE_BUFFER
523 case DOM0_TBUFCONTROL:
524 {
525 ret = tb_control(&op->u.tbufcontrol);
526 copy_to_user(u_dom0_op, op, sizeof(*op));
527 }
528 break;
529 #endif
531 case DOM0_READCONSOLE:
532 {
533 ret = read_console_ring(
534 &op->u.readconsole.buffer,
535 &op->u.readconsole.count,
536 op->u.readconsole.clear);
537 copy_to_user(u_dom0_op, op, sizeof(*op));
538 }
539 break;
541 case DOM0_SCHED_ID:
542 {
543 op->u.sched_id.sched_id = sched_id();
544 copy_to_user(u_dom0_op, op, sizeof(*op));
545 ret = 0;
546 }
547 break;
549 case DOM0_SETDOMAINMAXMEM:
550 {
551 struct domain *d;
552 ret = -ESRCH;
553 d = find_domain_by_id(op->u.setdomainmaxmem.domain);
554 if ( d != NULL )
555 {
556 d->max_pages = op->u.setdomainmaxmem.max_memkb >> (PAGE_SHIFT-10);
557 put_domain(d);
558 ret = 0;
559 }
560 }
561 break;
563 #ifdef PERF_COUNTERS
564 case DOM0_PERFCCONTROL:
565 {
566 extern int perfc_control(dom0_perfccontrol_t *);
567 ret = perfc_control(&op->u.perfccontrol);
568 copy_to_user(u_dom0_op, op, sizeof(*op));
569 }
570 break;
571 #endif
573 default:
574 ret = arch_do_dom0_op(op,u_dom0_op);
576 }
578 spin_unlock(&dom0_lock);
580 if (!ret)
581 acm_post_dom0_op(op, ssid);
582 else
583 acm_fail_dom0_op(op, ssid);
585 return ret;
586 }
588 /*
589 * Local variables:
590 * mode: C
591 * c-set-style: "BSD"
592 * c-basic-offset: 4
593 * tab-width: 4
594 * indent-tabs-mode: nil
595 * End:
596 */