ia64/xen-unstable

view xen/common/domctl.c @ 14196:9d36026b1b43

xen: Cleanups and bug fixes after the rcu_lock_domain patch.
Signed-off-by: Keir Fraser <keir@xensource.com>
author kfraser@localhost.localdomain
date Thu Mar 01 11:38:55 2007 +0000 (2007-03-01)
parents 09a9b6d6c356
children 9f49a53fea30
line source
1 /******************************************************************************
2 * domctl.c
3 *
4 * Domain management operations. For use by node control stack.
5 *
6 * Copyright (c) 2002-2006, K A Fraser
7 */
9 #include <xen/config.h>
10 #include <xen/types.h>
11 #include <xen/lib.h>
12 #include <xen/mm.h>
13 #include <xen/sched.h>
14 #include <xen/domain.h>
15 #include <xen/event.h>
16 #include <xen/domain_page.h>
17 #include <xen/trace.h>
18 #include <xen/console.h>
19 #include <xen/iocap.h>
20 #include <xen/rcupdate.h>
21 #include <xen/guest_access.h>
22 #include <xen/bitmap.h>
23 #include <asm/current.h>
24 #include <public/domctl.h>
25 #include <acm/acm_hooks.h>
27 extern long arch_do_domctl(
28 struct xen_domctl *op, XEN_GUEST_HANDLE(xen_domctl_t) u_domctl);
30 void cpumask_to_xenctl_cpumap(
31 struct xenctl_cpumap *xenctl_cpumap, cpumask_t *cpumask)
32 {
33 unsigned int guest_bytes, copy_bytes, i;
34 uint8_t zero = 0;
35 uint8_t bytemap[(NR_CPUS + 7) / 8];
37 if ( guest_handle_is_null(xenctl_cpumap->bitmap) )
38 return;
40 guest_bytes = (xenctl_cpumap->nr_cpus + 7) / 8;
41 copy_bytes = min_t(unsigned int, guest_bytes, sizeof(bytemap));
43 bitmap_long_to_byte(bytemap, cpus_addr(*cpumask), NR_CPUS);
45 copy_to_guest(xenctl_cpumap->bitmap, &bytemap[0], copy_bytes);
47 for ( i = copy_bytes; i < guest_bytes; i++ )
48 copy_to_guest_offset(xenctl_cpumap->bitmap, i, &zero, 1);
49 }
51 void xenctl_cpumap_to_cpumask(
52 cpumask_t *cpumask, struct xenctl_cpumap *xenctl_cpumap)
53 {
54 unsigned int guest_bytes, copy_bytes;
55 uint8_t bytemap[(NR_CPUS + 7) / 8];
57 guest_bytes = (xenctl_cpumap->nr_cpus + 7) / 8;
58 copy_bytes = min_t(unsigned int, guest_bytes, sizeof(bytemap));
60 cpus_clear(*cpumask);
62 if ( guest_handle_is_null(xenctl_cpumap->bitmap) )
63 return;
65 copy_from_guest(&bytemap[0], xenctl_cpumap->bitmap, copy_bytes);
67 bitmap_byte_to_long(cpus_addr(*cpumask), bytemap, NR_CPUS);
68 }
70 static inline int is_free_domid(domid_t dom)
71 {
72 struct domain *d;
74 if ( dom >= DOMID_FIRST_RESERVED )
75 return 0;
77 if ( (d = rcu_lock_domain_by_id(dom)) == NULL )
78 return 1;
80 rcu_unlock_domain(d);
81 return 0;
82 }
84 void getdomaininfo(struct domain *d, struct xen_domctl_getdomaininfo *info)
85 {
86 struct vcpu *v;
87 u64 cpu_time = 0;
88 int flags = XEN_DOMINF_blocked;
89 struct vcpu_runstate_info runstate;
91 info->domain = d->domain_id;
92 info->nr_online_vcpus = 0;
94 /*
95 * - domain is marked as blocked only if all its vcpus are blocked
96 * - domain is marked as running if any of its vcpus is running
97 */
98 for_each_vcpu ( d, v )
99 {
100 vcpu_runstate_get(v, &runstate);
101 cpu_time += runstate.time[RUNSTATE_running];
102 info->max_vcpu_id = v->vcpu_id;
103 if ( !test_bit(_VCPUF_down, &v->vcpu_flags) )
104 {
105 if ( !(v->vcpu_flags & VCPUF_blocked) )
106 flags &= ~XEN_DOMINF_blocked;
107 if ( v->vcpu_flags & VCPUF_running )
108 flags |= XEN_DOMINF_running;
109 info->nr_online_vcpus++;
110 }
111 }
113 info->cpu_time = cpu_time;
115 info->flags = flags |
116 ((d->domain_flags & DOMF_dying) ? XEN_DOMINF_dying : 0) |
117 ((d->domain_flags & DOMF_shutdown) ? XEN_DOMINF_shutdown : 0) |
118 ((d->domain_flags & DOMF_ctrl_pause) ? XEN_DOMINF_paused : 0) |
119 d->shutdown_code << XEN_DOMINF_shutdownshift;
121 if ( is_hvm_domain(d) )
122 info->flags |= XEN_DOMINF_hvm_guest;
124 if ( d->ssid != NULL )
125 info->ssidref = ((struct acm_ssid_domain *)d->ssid)->ssidref;
126 else
127 info->ssidref = ACM_DEFAULT_SSID;
129 info->tot_pages = d->tot_pages;
130 info->max_pages = d->max_pages;
131 info->shared_info_frame = mfn_to_gmfn(d, __pa(d->shared_info)>>PAGE_SHIFT);
133 memcpy(info->handle, d->handle, sizeof(xen_domain_handle_t));
134 }
136 static unsigned int default_vcpu0_location(void)
137 {
138 struct domain *d;
139 struct vcpu *v;
140 unsigned int i, cpu, cnt[NR_CPUS] = { 0 };
141 cpumask_t cpu_exclude_map;
143 /* Do an initial CPU placement. Pick the least-populated CPU. */
144 rcu_read_lock(&domlist_read_lock);
145 for_each_domain ( d )
146 for_each_vcpu ( d, v )
147 if ( !test_bit(_VCPUF_down, &v->vcpu_flags) )
148 cnt[v->processor]++;
149 rcu_read_unlock(&domlist_read_lock);
151 /*
152 * If we're on a HT system, we only auto-allocate to a non-primary HT. We
153 * favour high numbered CPUs in the event of a tie.
154 */
155 cpu = first_cpu(cpu_sibling_map[0]);
156 if ( cpus_weight(cpu_sibling_map[0]) > 1 )
157 cpu = next_cpu(cpu, cpu_sibling_map[0]);
158 cpu_exclude_map = cpu_sibling_map[0];
159 for_each_online_cpu ( i )
160 {
161 if ( cpu_isset(i, cpu_exclude_map) )
162 continue;
163 if ( (i == first_cpu(cpu_sibling_map[i])) &&
164 (cpus_weight(cpu_sibling_map[i]) > 1) )
165 continue;
166 cpus_or(cpu_exclude_map, cpu_exclude_map, cpu_sibling_map[i]);
167 if ( cnt[i] <= cnt[cpu] )
168 cpu = i;
169 }
171 return cpu;
172 }
174 long do_domctl(XEN_GUEST_HANDLE(xen_domctl_t) u_domctl)
175 {
176 long ret = 0;
177 struct xen_domctl curop, *op = &curop;
178 void *ssid = NULL; /* save security ptr between pre and post/fail hooks */
179 static DEFINE_SPINLOCK(domctl_lock);
181 if ( !IS_PRIV(current->domain) )
182 return -EPERM;
184 if ( copy_from_guest(op, u_domctl, 1) )
185 return -EFAULT;
187 if ( op->interface_version != XEN_DOMCTL_INTERFACE_VERSION )
188 return -EACCES;
190 if ( acm_pre_domctl(op, &ssid) )
191 return -EPERM;
193 spin_lock(&domctl_lock);
195 switch ( op->cmd )
196 {
198 case XEN_DOMCTL_setvcpucontext:
199 {
200 struct domain *d = rcu_lock_domain_by_id(op->domain);
201 vcpu_guest_context_u c = { .nat = NULL };
202 unsigned int vcpu = op->u.vcpucontext.vcpu;
203 struct vcpu *v;
205 ret = -ESRCH;
206 if ( d == NULL )
207 break;
209 ret = -EINVAL;
210 if ( (vcpu >= MAX_VIRT_CPUS) || ((v = d->vcpu[vcpu]) == NULL) )
211 goto svc_out;
213 if ( guest_handle_is_null(op->u.vcpucontext.ctxt) )
214 {
215 ret = vcpu_reset(v);
216 goto svc_out;
217 }
219 #ifdef CONFIG_COMPAT
220 BUILD_BUG_ON(sizeof(struct vcpu_guest_context)
221 < sizeof(struct compat_vcpu_guest_context));
222 #endif
223 ret = -ENOMEM;
224 if ( (c.nat = xmalloc(struct vcpu_guest_context)) == NULL )
225 goto svc_out;
227 if ( !IS_COMPAT(v->domain) )
228 ret = copy_from_guest(c.nat, op->u.vcpucontext.ctxt, 1);
229 #ifdef CONFIG_COMPAT
230 else
231 ret = copy_from_guest(c.cmp,
232 guest_handle_cast(op->u.vcpucontext.ctxt,
233 void), 1);
234 #endif
235 ret = ret ? -EFAULT : 0;
237 if ( ret == 0 )
238 {
239 domain_pause(d);
240 ret = arch_set_info_guest(v, c);
241 domain_unpause(d);
242 }
244 svc_out:
245 xfree(c.nat);
246 rcu_unlock_domain(d);
247 }
248 break;
250 case XEN_DOMCTL_pausedomain:
251 {
252 struct domain *d = rcu_lock_domain_by_id(op->domain);
253 ret = -ESRCH;
254 if ( d != NULL )
255 {
256 ret = -EINVAL;
257 if ( d != current->domain )
258 {
259 domain_pause_by_systemcontroller(d);
260 ret = 0;
261 }
262 rcu_unlock_domain(d);
263 }
264 }
265 break;
267 case XEN_DOMCTL_unpausedomain:
268 {
269 struct domain *d = rcu_lock_domain_by_id(op->domain);
270 ret = -ESRCH;
271 if ( d != NULL )
272 {
273 ret = -EINVAL;
274 if ( (d != current->domain) && (d->vcpu[0] != NULL) &&
275 test_bit(_VCPUF_initialised, &d->vcpu[0]->vcpu_flags) )
276 {
277 domain_unpause_by_systemcontroller(d);
278 ret = 0;
279 }
280 rcu_unlock_domain(d);
281 }
282 }
283 break;
285 case XEN_DOMCTL_resumedomain:
286 {
287 struct domain *d = rcu_lock_domain_by_id(op->domain);
288 struct vcpu *v;
290 ret = -ESRCH;
291 if ( d != NULL )
292 {
293 ret = 0;
294 if ( test_and_clear_bit(_DOMF_shutdown, &d->domain_flags) )
295 for_each_vcpu ( d, v )
296 vcpu_wake(v);
297 rcu_unlock_domain(d);
298 }
299 }
300 break;
302 case XEN_DOMCTL_createdomain:
303 {
304 struct domain *d;
305 domid_t dom;
306 static domid_t rover = 0;
307 unsigned int domcr_flags;
309 if ( supervisor_mode_kernel ||
310 (op->u.createdomain.flags & ~XEN_DOMCTL_CDF_hvm_guest) )
311 return -EINVAL;
313 dom = op->domain;
314 if ( (dom > 0) && (dom < DOMID_FIRST_RESERVED) )
315 {
316 ret = -EINVAL;
317 if ( !is_free_domid(dom) )
318 break;
319 }
320 else
321 {
322 for ( dom = rover + 1; dom != rover; dom++ )
323 {
324 if ( dom == DOMID_FIRST_RESERVED )
325 dom = 0;
326 if ( is_free_domid(dom) )
327 break;
328 }
330 ret = -ENOMEM;
331 if ( dom == rover )
332 break;
334 rover = dom;
335 }
337 domcr_flags = 0;
338 if ( op->u.createdomain.flags & XEN_DOMCTL_CDF_hvm_guest )
339 domcr_flags |= DOMCRF_hvm;
341 ret = -ENOMEM;
342 if ( (d = domain_create(dom, domcr_flags)) == NULL )
343 break;
345 ret = 0;
347 memcpy(d->handle, op->u.createdomain.handle,
348 sizeof(xen_domain_handle_t));
350 op->domain = d->domain_id;
351 if ( copy_to_guest(u_domctl, op, 1) )
352 ret = -EFAULT;
353 }
354 break;
356 case XEN_DOMCTL_max_vcpus:
357 {
358 struct domain *d;
359 unsigned int i, max = op->u.max_vcpus.max, cpu;
361 ret = -EINVAL;
362 if ( max > MAX_VIRT_CPUS )
363 break;
365 ret = -ESRCH;
366 if ( (d = rcu_lock_domain_by_id(op->domain)) == NULL )
367 break;
369 /* Needed, for example, to ensure writable p.t. state is synced. */
370 domain_pause(d);
372 /* We cannot reduce maximum VCPUs. */
373 ret = -EINVAL;
374 if ( (max != MAX_VIRT_CPUS) && (d->vcpu[max] != NULL) )
375 goto maxvcpu_out;
377 ret = -ENOMEM;
378 for ( i = 0; i < max; i++ )
379 {
380 if ( d->vcpu[i] != NULL )
381 continue;
383 cpu = (i == 0) ?
384 default_vcpu0_location() :
385 (d->vcpu[i-1]->processor + 1) % num_online_cpus();
387 if ( alloc_vcpu(d, i, cpu) == NULL )
388 goto maxvcpu_out;
389 }
391 ret = 0;
393 maxvcpu_out:
394 domain_unpause(d);
395 rcu_unlock_domain(d);
396 }
397 break;
399 case XEN_DOMCTL_destroydomain:
400 {
401 struct domain *d = rcu_lock_domain_by_id(op->domain);
402 ret = -ESRCH;
403 if ( d != NULL )
404 {
405 ret = -EINVAL;
406 if ( d != current->domain )
407 {
408 domain_kill(d);
409 ret = 0;
410 }
411 rcu_unlock_domain(d);
412 }
413 }
414 break;
416 case XEN_DOMCTL_setvcpuaffinity:
417 case XEN_DOMCTL_getvcpuaffinity:
418 {
419 domid_t dom = op->domain;
420 struct domain *d = rcu_lock_domain_by_id(dom);
421 struct vcpu *v;
422 cpumask_t new_affinity;
424 ret = -ESRCH;
425 if ( d == NULL )
426 break;
428 ret = -EINVAL;
429 if ( op->u.vcpuaffinity.vcpu >= MAX_VIRT_CPUS )
430 goto vcpuaffinity_out;
432 ret = -ESRCH;
433 if ( (v = d->vcpu[op->u.vcpuaffinity.vcpu]) == NULL )
434 goto vcpuaffinity_out;
436 if ( op->cmd == XEN_DOMCTL_setvcpuaffinity )
437 {
438 xenctl_cpumap_to_cpumask(
439 &new_affinity, &op->u.vcpuaffinity.cpumap);
440 ret = vcpu_set_affinity(v, &new_affinity);
441 }
442 else
443 {
444 cpumask_to_xenctl_cpumap(
445 &op->u.vcpuaffinity.cpumap, &v->cpu_affinity);
446 ret = 0;
447 }
449 vcpuaffinity_out:
450 rcu_unlock_domain(d);
451 }
452 break;
454 case XEN_DOMCTL_scheduler_op:
455 {
456 struct domain *d;
458 ret = -ESRCH;
459 if ( (d = rcu_lock_domain_by_id(op->domain)) == NULL )
460 break;
462 ret = sched_adjust(d, &op->u.scheduler_op);
463 if ( copy_to_guest(u_domctl, op, 1) )
464 ret = -EFAULT;
466 rcu_unlock_domain(d);
467 }
468 break;
470 case XEN_DOMCTL_getdomaininfo:
471 {
472 struct domain *d;
473 domid_t dom;
475 dom = op->domain;
476 if ( dom == DOMID_SELF )
477 dom = current->domain->domain_id;
479 rcu_read_lock(&domlist_read_lock);
481 for_each_domain ( d )
482 {
483 if ( d->domain_id >= dom )
484 break;
485 }
487 if ( d == NULL )
488 {
489 rcu_read_unlock(&domlist_read_lock);
490 ret = -ESRCH;
491 break;
492 }
494 getdomaininfo(d, &op->u.getdomaininfo);
496 op->domain = op->u.getdomaininfo.domain;
497 if ( copy_to_guest(u_domctl, op, 1) )
498 ret = -EFAULT;
500 rcu_read_unlock(&domlist_read_lock);
501 }
502 break;
504 case XEN_DOMCTL_getvcpucontext:
505 {
506 vcpu_guest_context_u c = { .nat = NULL };
507 struct domain *d;
508 struct vcpu *v;
510 ret = -ESRCH;
511 if ( (d = rcu_lock_domain_by_id(op->domain)) == NULL )
512 break;
514 ret = -EINVAL;
515 if ( op->u.vcpucontext.vcpu >= MAX_VIRT_CPUS )
516 goto getvcpucontext_out;
518 ret = -ESRCH;
519 if ( (v = d->vcpu[op->u.vcpucontext.vcpu]) == NULL )
520 goto getvcpucontext_out;
522 ret = -ENODATA;
523 if ( !test_bit(_VCPUF_initialised, &v->vcpu_flags) )
524 goto getvcpucontext_out;
526 #ifdef CONFIG_COMPAT
527 BUILD_BUG_ON(sizeof(struct vcpu_guest_context)
528 < sizeof(struct compat_vcpu_guest_context));
529 #endif
530 ret = -ENOMEM;
531 if ( (c.nat = xmalloc(struct vcpu_guest_context)) == NULL )
532 goto getvcpucontext_out;
534 if ( v != current )
535 vcpu_pause(v);
537 arch_get_info_guest(v, c);
538 ret = 0;
540 if ( v != current )
541 vcpu_unpause(v);
543 if ( !IS_COMPAT(v->domain) )
544 ret = copy_to_guest(op->u.vcpucontext.ctxt, c.nat, 1);
545 #ifdef CONFIG_COMPAT
546 else
547 ret = copy_to_guest(guest_handle_cast(op->u.vcpucontext.ctxt,
548 void), c.cmp, 1);
549 #endif
551 if ( copy_to_guest(u_domctl, op, 1) || ret )
552 ret = -EFAULT;
554 getvcpucontext_out:
555 xfree(c.nat);
556 rcu_unlock_domain(d);
557 }
558 break;
560 case XEN_DOMCTL_getvcpuinfo:
561 {
562 struct domain *d;
563 struct vcpu *v;
564 struct vcpu_runstate_info runstate;
566 ret = -ESRCH;
567 if ( (d = rcu_lock_domain_by_id(op->domain)) == NULL )
568 break;
570 ret = -EINVAL;
571 if ( op->u.getvcpuinfo.vcpu >= MAX_VIRT_CPUS )
572 goto getvcpuinfo_out;
574 ret = -ESRCH;
575 if ( (v = d->vcpu[op->u.getvcpuinfo.vcpu]) == NULL )
576 goto getvcpuinfo_out;
578 vcpu_runstate_get(v, &runstate);
580 op->u.getvcpuinfo.online = !test_bit(_VCPUF_down, &v->vcpu_flags);
581 op->u.getvcpuinfo.blocked = test_bit(_VCPUF_blocked, &v->vcpu_flags);
582 op->u.getvcpuinfo.running = test_bit(_VCPUF_running, &v->vcpu_flags);
583 op->u.getvcpuinfo.cpu_time = runstate.time[RUNSTATE_running];
584 op->u.getvcpuinfo.cpu = v->processor;
585 ret = 0;
587 if ( copy_to_guest(u_domctl, op, 1) )
588 ret = -EFAULT;
590 getvcpuinfo_out:
591 rcu_unlock_domain(d);
592 }
593 break;
595 case XEN_DOMCTL_max_mem:
596 {
597 struct domain *d;
598 unsigned long new_max;
600 ret = -ESRCH;
601 d = rcu_lock_domain_by_id(op->domain);
602 if ( d == NULL )
603 break;
605 ret = -EINVAL;
606 new_max = op->u.max_mem.max_memkb >> (PAGE_SHIFT-10);
608 spin_lock(&d->page_alloc_lock);
609 if ( new_max >= d->tot_pages )
610 {
611 d->max_pages = new_max;
612 ret = 0;
613 }
614 spin_unlock(&d->page_alloc_lock);
616 rcu_unlock_domain(d);
617 }
618 break;
620 case XEN_DOMCTL_setdomainhandle:
621 {
622 struct domain *d;
623 ret = -ESRCH;
624 d = rcu_lock_domain_by_id(op->domain);
625 if ( d != NULL )
626 {
627 memcpy(d->handle, op->u.setdomainhandle.handle,
628 sizeof(xen_domain_handle_t));
629 rcu_unlock_domain(d);
630 ret = 0;
631 }
632 }
633 break;
635 case XEN_DOMCTL_setdebugging:
636 {
637 struct domain *d;
638 ret = -ESRCH;
639 d = rcu_lock_domain_by_id(op->domain);
640 if ( d != NULL )
641 {
642 if ( op->u.setdebugging.enable )
643 set_bit(_DOMF_debugging, &d->domain_flags);
644 else
645 clear_bit(_DOMF_debugging, &d->domain_flags);
646 rcu_unlock_domain(d);
647 ret = 0;
648 }
649 }
650 break;
652 case XEN_DOMCTL_irq_permission:
653 {
654 struct domain *d;
655 unsigned int pirq = op->u.irq_permission.pirq;
657 ret = -EINVAL;
658 if ( pirq >= NR_IRQS )
659 break;
661 ret = -ESRCH;
662 d = rcu_lock_domain_by_id(op->domain);
663 if ( d == NULL )
664 break;
666 if ( op->u.irq_permission.allow_access )
667 ret = irq_permit_access(d, pirq);
668 else
669 ret = irq_deny_access(d, pirq);
671 rcu_unlock_domain(d);
672 }
673 break;
675 case XEN_DOMCTL_iomem_permission:
676 {
677 struct domain *d;
678 unsigned long mfn = op->u.iomem_permission.first_mfn;
679 unsigned long nr_mfns = op->u.iomem_permission.nr_mfns;
681 ret = -EINVAL;
682 if ( (mfn + nr_mfns - 1) < mfn ) /* wrap? */
683 break;
685 ret = -ESRCH;
686 d = rcu_lock_domain_by_id(op->domain);
687 if ( d == NULL )
688 break;
690 if ( op->u.iomem_permission.allow_access )
691 ret = iomem_permit_access(d, mfn, mfn + nr_mfns - 1);
692 else
693 ret = iomem_deny_access(d, mfn, mfn + nr_mfns - 1);
695 rcu_unlock_domain(d);
696 }
697 break;
699 case XEN_DOMCTL_settimeoffset:
700 {
701 struct domain *d;
703 ret = -ESRCH;
704 d = rcu_lock_domain_by_id(op->domain);
705 if ( d != NULL )
706 {
707 d->time_offset_seconds = op->u.settimeoffset.time_offset_seconds;
708 rcu_unlock_domain(d);
709 ret = 0;
710 }
711 }
712 break;
714 default:
715 ret = arch_do_domctl(op, u_domctl);
716 break;
717 }
719 spin_unlock(&domctl_lock);
721 if ( ret == 0 )
722 acm_post_domctl(op, &ssid);
723 else
724 acm_fail_domctl(op, &ssid);
726 return ret;
727 }
729 /*
730 * Local variables:
731 * mode: C
732 * c-set-style: "BSD"
733 * c-basic-offset: 4
734 * tab-width: 4
735 * indent-tabs-mode: nil
736 * End:
737 */