direct-io.hg

view xen/common/domctl.c @ 14350:f3f5f2756d75

x86: Add VGCF_onlien flag to vcpu_guest_context.
Change common Xen code to start all VCPUs (except idle ones)
offline. Change arch code to deal with this.
Signed-off-by: Keir Fraser <keir@xensource.com>
author kfraser@localhost.localdomain
date Mon Mar 12 13:53:43 2007 +0000 (2007-03-12)
parents 1a01d8d9dbec
children 5c52e5ca8459
line source
1 /******************************************************************************
2 * domctl.c
3 *
4 * Domain management operations. For use by node control stack.
5 *
6 * Copyright (c) 2002-2006, K A Fraser
7 */
9 #include <xen/config.h>
10 #include <xen/types.h>
11 #include <xen/lib.h>
12 #include <xen/mm.h>
13 #include <xen/sched.h>
14 #include <xen/domain.h>
15 #include <xen/event.h>
16 #include <xen/domain_page.h>
17 #include <xen/trace.h>
18 #include <xen/console.h>
19 #include <xen/iocap.h>
20 #include <xen/rcupdate.h>
21 #include <xen/guest_access.h>
22 #include <xen/bitmap.h>
23 #include <xen/shadow.h>
24 #include <asm/current.h>
25 #include <public/domctl.h>
26 #include <acm/acm_hooks.h>
28 extern long arch_do_domctl(
29 struct xen_domctl *op, XEN_GUEST_HANDLE(xen_domctl_t) u_domctl);
31 void cpumask_to_xenctl_cpumap(
32 struct xenctl_cpumap *xenctl_cpumap, cpumask_t *cpumask)
33 {
34 unsigned int guest_bytes, copy_bytes, i;
35 uint8_t zero = 0;
36 uint8_t bytemap[(NR_CPUS + 7) / 8];
38 if ( guest_handle_is_null(xenctl_cpumap->bitmap) )
39 return;
41 guest_bytes = (xenctl_cpumap->nr_cpus + 7) / 8;
42 copy_bytes = min_t(unsigned int, guest_bytes, sizeof(bytemap));
44 bitmap_long_to_byte(bytemap, cpus_addr(*cpumask), NR_CPUS);
46 copy_to_guest(xenctl_cpumap->bitmap, &bytemap[0], copy_bytes);
48 for ( i = copy_bytes; i < guest_bytes; i++ )
49 copy_to_guest_offset(xenctl_cpumap->bitmap, i, &zero, 1);
50 }
52 void xenctl_cpumap_to_cpumask(
53 cpumask_t *cpumask, struct xenctl_cpumap *xenctl_cpumap)
54 {
55 unsigned int guest_bytes, copy_bytes;
56 uint8_t bytemap[(NR_CPUS + 7) / 8];
58 guest_bytes = (xenctl_cpumap->nr_cpus + 7) / 8;
59 copy_bytes = min_t(unsigned int, guest_bytes, sizeof(bytemap));
61 cpus_clear(*cpumask);
63 if ( guest_handle_is_null(xenctl_cpumap->bitmap) )
64 return;
66 copy_from_guest(&bytemap[0], xenctl_cpumap->bitmap, copy_bytes);
68 bitmap_byte_to_long(cpus_addr(*cpumask), bytemap, NR_CPUS);
69 }
71 static inline int is_free_domid(domid_t dom)
72 {
73 struct domain *d;
75 if ( dom >= DOMID_FIRST_RESERVED )
76 return 0;
78 if ( (d = rcu_lock_domain_by_id(dom)) == NULL )
79 return 1;
81 rcu_unlock_domain(d);
82 return 0;
83 }
85 void getdomaininfo(struct domain *d, struct xen_domctl_getdomaininfo *info)
86 {
87 struct vcpu *v;
88 u64 cpu_time = 0;
89 int flags = XEN_DOMINF_blocked;
90 struct vcpu_runstate_info runstate;
92 info->domain = d->domain_id;
93 info->nr_online_vcpus = 0;
95 /*
96 * - domain is marked as blocked only if all its vcpus are blocked
97 * - domain is marked as running if any of its vcpus is running
98 */
99 for_each_vcpu ( d, v )
100 {
101 vcpu_runstate_get(v, &runstate);
102 cpu_time += runstate.time[RUNSTATE_running];
103 info->max_vcpu_id = v->vcpu_id;
104 if ( !test_bit(_VCPUF_down, &v->vcpu_flags) )
105 {
106 if ( !(v->vcpu_flags & VCPUF_blocked) )
107 flags &= ~XEN_DOMINF_blocked;
108 if ( v->vcpu_flags & VCPUF_running )
109 flags |= XEN_DOMINF_running;
110 info->nr_online_vcpus++;
111 }
112 }
114 info->cpu_time = cpu_time;
116 info->flags = flags |
117 ((d->domain_flags & DOMF_dying) ? XEN_DOMINF_dying : 0) |
118 ((d->domain_flags & DOMF_shutdown) ? XEN_DOMINF_shutdown : 0) |
119 ((d->domain_flags & DOMF_ctrl_pause) ? XEN_DOMINF_paused : 0) |
120 d->shutdown_code << XEN_DOMINF_shutdownshift;
122 if ( is_hvm_domain(d) )
123 info->flags |= XEN_DOMINF_hvm_guest;
125 if ( d->ssid != NULL )
126 info->ssidref = ((struct acm_ssid_domain *)d->ssid)->ssidref;
127 else
128 info->ssidref = ACM_DEFAULT_SSID;
130 info->tot_pages = d->tot_pages;
131 info->max_pages = d->max_pages;
132 info->shared_info_frame = mfn_to_gmfn(d, __pa(d->shared_info)>>PAGE_SHIFT);
134 memcpy(info->handle, d->handle, sizeof(xen_domain_handle_t));
135 }
137 static unsigned int default_vcpu0_location(void)
138 {
139 struct domain *d;
140 struct vcpu *v;
141 unsigned int i, cpu, cnt[NR_CPUS] = { 0 };
142 cpumask_t cpu_exclude_map;
144 /* Do an initial CPU placement. Pick the least-populated CPU. */
145 rcu_read_lock(&domlist_read_lock);
146 for_each_domain ( d )
147 for_each_vcpu ( d, v )
148 if ( !test_bit(_VCPUF_down, &v->vcpu_flags) )
149 cnt[v->processor]++;
150 rcu_read_unlock(&domlist_read_lock);
152 /*
153 * If we're on a HT system, we only auto-allocate to a non-primary HT. We
154 * favour high numbered CPUs in the event of a tie.
155 */
156 cpu = first_cpu(cpu_sibling_map[0]);
157 if ( cpus_weight(cpu_sibling_map[0]) > 1 )
158 cpu = next_cpu(cpu, cpu_sibling_map[0]);
159 cpu_exclude_map = cpu_sibling_map[0];
160 for_each_online_cpu ( i )
161 {
162 if ( cpu_isset(i, cpu_exclude_map) )
163 continue;
164 if ( (i == first_cpu(cpu_sibling_map[i])) &&
165 (cpus_weight(cpu_sibling_map[i]) > 1) )
166 continue;
167 cpus_or(cpu_exclude_map, cpu_exclude_map, cpu_sibling_map[i]);
168 if ( cnt[i] <= cnt[cpu] )
169 cpu = i;
170 }
172 return cpu;
173 }
175 long do_domctl(XEN_GUEST_HANDLE(xen_domctl_t) u_domctl)
176 {
177 long ret = 0;
178 struct xen_domctl curop, *op = &curop;
179 void *ssid = NULL; /* save security ptr between pre and post/fail hooks */
180 static DEFINE_SPINLOCK(domctl_lock);
182 if ( !IS_PRIV(current->domain) )
183 return -EPERM;
185 if ( copy_from_guest(op, u_domctl, 1) )
186 return -EFAULT;
188 if ( op->interface_version != XEN_DOMCTL_INTERFACE_VERSION )
189 return -EACCES;
191 if ( acm_pre_domctl(op, &ssid) )
192 return -EPERM;
194 spin_lock(&domctl_lock);
196 switch ( op->cmd )
197 {
199 case XEN_DOMCTL_setvcpucontext:
200 {
201 struct domain *d = rcu_lock_domain_by_id(op->domain);
202 vcpu_guest_context_u c = { .nat = NULL };
203 unsigned int vcpu = op->u.vcpucontext.vcpu;
204 struct vcpu *v;
206 ret = -ESRCH;
207 if ( d == NULL )
208 break;
210 ret = -EINVAL;
211 if ( (vcpu >= MAX_VIRT_CPUS) || ((v = d->vcpu[vcpu]) == NULL) )
212 goto svc_out;
214 if ( guest_handle_is_null(op->u.vcpucontext.ctxt) )
215 {
216 ret = vcpu_reset(v);
217 goto svc_out;
218 }
220 #ifdef CONFIG_COMPAT
221 BUILD_BUG_ON(sizeof(struct vcpu_guest_context)
222 < sizeof(struct compat_vcpu_guest_context));
223 #endif
224 ret = -ENOMEM;
225 if ( (c.nat = xmalloc(struct vcpu_guest_context)) == NULL )
226 goto svc_out;
228 if ( !IS_COMPAT(v->domain) )
229 ret = copy_from_guest(c.nat, op->u.vcpucontext.ctxt, 1);
230 #ifdef CONFIG_COMPAT
231 else
232 ret = copy_from_guest(c.cmp,
233 guest_handle_cast(op->u.vcpucontext.ctxt,
234 void), 1);
235 #endif
236 ret = ret ? -EFAULT : 0;
238 if ( ret == 0 )
239 {
240 domain_pause(d);
241 ret = arch_set_info_guest(v, c);
242 domain_unpause(d);
243 }
245 svc_out:
246 xfree(c.nat);
247 rcu_unlock_domain(d);
248 }
249 break;
251 case XEN_DOMCTL_pausedomain:
252 {
253 struct domain *d = rcu_lock_domain_by_id(op->domain);
254 ret = -ESRCH;
255 if ( d != NULL )
256 {
257 ret = -EINVAL;
258 if ( d != current->domain )
259 {
260 domain_pause_by_systemcontroller(d);
261 ret = 0;
262 }
263 rcu_unlock_domain(d);
264 }
265 }
266 break;
268 case XEN_DOMCTL_unpausedomain:
269 {
270 struct domain *d = rcu_lock_domain_by_id(op->domain);
272 ret = -ESRCH;
273 if ( d == NULL )
274 break;
276 domain_unpause_by_systemcontroller(d);
277 rcu_unlock_domain(d);
278 ret = 0;
279 }
280 break;
282 case XEN_DOMCTL_resumedomain:
283 {
284 struct domain *d = rcu_lock_domain_by_id(op->domain);
285 struct vcpu *v;
287 ret = -ESRCH;
288 if ( d != NULL )
289 {
290 ret = 0;
291 if ( test_and_clear_bit(_DOMF_shutdown, &d->domain_flags) )
292 for_each_vcpu ( d, v )
293 vcpu_wake(v);
294 rcu_unlock_domain(d);
295 }
296 }
297 break;
299 case XEN_DOMCTL_createdomain:
300 {
301 struct domain *d;
302 domid_t dom;
303 static domid_t rover = 0;
304 unsigned int domcr_flags;
306 if ( supervisor_mode_kernel ||
307 (op->u.createdomain.flags & ~XEN_DOMCTL_CDF_hvm_guest) )
308 return -EINVAL;
310 dom = op->domain;
311 if ( (dom > 0) && (dom < DOMID_FIRST_RESERVED) )
312 {
313 ret = -EINVAL;
314 if ( !is_free_domid(dom) )
315 break;
316 }
317 else
318 {
319 for ( dom = rover + 1; dom != rover; dom++ )
320 {
321 if ( dom == DOMID_FIRST_RESERVED )
322 dom = 0;
323 if ( is_free_domid(dom) )
324 break;
325 }
327 ret = -ENOMEM;
328 if ( dom == rover )
329 break;
331 rover = dom;
332 }
334 domcr_flags = 0;
335 if ( op->u.createdomain.flags & XEN_DOMCTL_CDF_hvm_guest )
336 domcr_flags |= DOMCRF_hvm;
338 ret = -ENOMEM;
339 if ( (d = domain_create(dom, domcr_flags)) == NULL )
340 break;
342 ret = 0;
344 memcpy(d->handle, op->u.createdomain.handle,
345 sizeof(xen_domain_handle_t));
347 op->domain = d->domain_id;
348 if ( copy_to_guest(u_domctl, op, 1) )
349 ret = -EFAULT;
350 }
351 break;
353 case XEN_DOMCTL_max_vcpus:
354 {
355 struct domain *d;
356 unsigned int i, max = op->u.max_vcpus.max, cpu;
358 ret = -EINVAL;
359 if ( max > MAX_VIRT_CPUS )
360 break;
362 ret = -ESRCH;
363 if ( (d = rcu_lock_domain_by_id(op->domain)) == NULL )
364 break;
366 /* Needed, for example, to ensure writable p.t. state is synced. */
367 domain_pause(d);
369 /* We cannot reduce maximum VCPUs. */
370 ret = -EINVAL;
371 if ( (max != MAX_VIRT_CPUS) && (d->vcpu[max] != NULL) )
372 goto maxvcpu_out;
374 ret = -ENOMEM;
375 for ( i = 0; i < max; i++ )
376 {
377 if ( d->vcpu[i] != NULL )
378 continue;
380 cpu = (i == 0) ?
381 default_vcpu0_location() :
382 (d->vcpu[i-1]->processor + 1) % num_online_cpus();
384 if ( alloc_vcpu(d, i, cpu) == NULL )
385 goto maxvcpu_out;
386 }
388 ret = 0;
390 maxvcpu_out:
391 domain_unpause(d);
392 rcu_unlock_domain(d);
393 }
394 break;
396 case XEN_DOMCTL_destroydomain:
397 {
398 struct domain *d = rcu_lock_domain_by_id(op->domain);
399 ret = -ESRCH;
400 if ( d != NULL )
401 {
402 ret = -EINVAL;
403 if ( d != current->domain )
404 {
405 domain_kill(d);
406 ret = 0;
407 }
408 rcu_unlock_domain(d);
409 }
410 }
411 break;
413 case XEN_DOMCTL_setvcpuaffinity:
414 case XEN_DOMCTL_getvcpuaffinity:
415 {
416 domid_t dom = op->domain;
417 struct domain *d = rcu_lock_domain_by_id(dom);
418 struct vcpu *v;
419 cpumask_t new_affinity;
421 ret = -ESRCH;
422 if ( d == NULL )
423 break;
425 ret = -EINVAL;
426 if ( op->u.vcpuaffinity.vcpu >= MAX_VIRT_CPUS )
427 goto vcpuaffinity_out;
429 ret = -ESRCH;
430 if ( (v = d->vcpu[op->u.vcpuaffinity.vcpu]) == NULL )
431 goto vcpuaffinity_out;
433 if ( op->cmd == XEN_DOMCTL_setvcpuaffinity )
434 {
435 xenctl_cpumap_to_cpumask(
436 &new_affinity, &op->u.vcpuaffinity.cpumap);
437 ret = vcpu_set_affinity(v, &new_affinity);
438 }
439 else
440 {
441 cpumask_to_xenctl_cpumap(
442 &op->u.vcpuaffinity.cpumap, &v->cpu_affinity);
443 ret = 0;
444 }
446 vcpuaffinity_out:
447 rcu_unlock_domain(d);
448 }
449 break;
451 case XEN_DOMCTL_scheduler_op:
452 {
453 struct domain *d;
455 ret = -ESRCH;
456 if ( (d = rcu_lock_domain_by_id(op->domain)) == NULL )
457 break;
459 ret = sched_adjust(d, &op->u.scheduler_op);
460 if ( copy_to_guest(u_domctl, op, 1) )
461 ret = -EFAULT;
463 rcu_unlock_domain(d);
464 }
465 break;
467 case XEN_DOMCTL_getdomaininfo:
468 {
469 struct domain *d;
470 domid_t dom;
472 dom = op->domain;
473 if ( dom == DOMID_SELF )
474 dom = current->domain->domain_id;
476 rcu_read_lock(&domlist_read_lock);
478 for_each_domain ( d )
479 {
480 if ( d->domain_id >= dom )
481 break;
482 }
484 if ( d == NULL )
485 {
486 rcu_read_unlock(&domlist_read_lock);
487 ret = -ESRCH;
488 break;
489 }
491 getdomaininfo(d, &op->u.getdomaininfo);
493 op->domain = op->u.getdomaininfo.domain;
494 if ( copy_to_guest(u_domctl, op, 1) )
495 ret = -EFAULT;
497 rcu_read_unlock(&domlist_read_lock);
498 }
499 break;
501 case XEN_DOMCTL_getvcpucontext:
502 {
503 vcpu_guest_context_u c = { .nat = NULL };
504 struct domain *d;
505 struct vcpu *v;
507 ret = -ESRCH;
508 if ( (d = rcu_lock_domain_by_id(op->domain)) == NULL )
509 break;
511 ret = -EINVAL;
512 if ( op->u.vcpucontext.vcpu >= MAX_VIRT_CPUS )
513 goto getvcpucontext_out;
515 ret = -ESRCH;
516 if ( (v = d->vcpu[op->u.vcpucontext.vcpu]) == NULL )
517 goto getvcpucontext_out;
519 ret = -ENODATA;
520 if ( !test_bit(_VCPUF_initialised, &v->vcpu_flags) )
521 goto getvcpucontext_out;
523 #ifdef CONFIG_COMPAT
524 BUILD_BUG_ON(sizeof(struct vcpu_guest_context)
525 < sizeof(struct compat_vcpu_guest_context));
526 #endif
527 ret = -ENOMEM;
528 if ( (c.nat = xmalloc(struct vcpu_guest_context)) == NULL )
529 goto getvcpucontext_out;
531 if ( v != current )
532 vcpu_pause(v);
534 arch_get_info_guest(v, c);
535 ret = 0;
537 if ( v != current )
538 vcpu_unpause(v);
540 if ( !IS_COMPAT(v->domain) )
541 ret = copy_to_guest(op->u.vcpucontext.ctxt, c.nat, 1);
542 #ifdef CONFIG_COMPAT
543 else
544 ret = copy_to_guest(guest_handle_cast(op->u.vcpucontext.ctxt,
545 void), c.cmp, 1);
546 #endif
548 if ( copy_to_guest(u_domctl, op, 1) || ret )
549 ret = -EFAULT;
551 getvcpucontext_out:
552 xfree(c.nat);
553 rcu_unlock_domain(d);
554 }
555 break;
557 case XEN_DOMCTL_getvcpuinfo:
558 {
559 struct domain *d;
560 struct vcpu *v;
561 struct vcpu_runstate_info runstate;
563 ret = -ESRCH;
564 if ( (d = rcu_lock_domain_by_id(op->domain)) == NULL )
565 break;
567 ret = -EINVAL;
568 if ( op->u.getvcpuinfo.vcpu >= MAX_VIRT_CPUS )
569 goto getvcpuinfo_out;
571 ret = -ESRCH;
572 if ( (v = d->vcpu[op->u.getvcpuinfo.vcpu]) == NULL )
573 goto getvcpuinfo_out;
575 vcpu_runstate_get(v, &runstate);
577 op->u.getvcpuinfo.online = !test_bit(_VCPUF_down, &v->vcpu_flags);
578 op->u.getvcpuinfo.blocked = test_bit(_VCPUF_blocked, &v->vcpu_flags);
579 op->u.getvcpuinfo.running = test_bit(_VCPUF_running, &v->vcpu_flags);
580 op->u.getvcpuinfo.cpu_time = runstate.time[RUNSTATE_running];
581 op->u.getvcpuinfo.cpu = v->processor;
582 ret = 0;
584 if ( copy_to_guest(u_domctl, op, 1) )
585 ret = -EFAULT;
587 getvcpuinfo_out:
588 rcu_unlock_domain(d);
589 }
590 break;
592 case XEN_DOMCTL_max_mem:
593 {
594 struct domain *d;
595 unsigned long new_max;
597 ret = -ESRCH;
598 d = rcu_lock_domain_by_id(op->domain);
599 if ( d == NULL )
600 break;
602 ret = -EINVAL;
603 new_max = op->u.max_mem.max_memkb >> (PAGE_SHIFT-10);
605 spin_lock(&d->page_alloc_lock);
606 if ( new_max >= d->tot_pages )
607 {
608 ret = guest_physmap_max_mem_pages(d, new_max);
609 if ( ret != 0 )
610 break;
611 d->max_pages = new_max;
612 ret = 0;
613 }
614 spin_unlock(&d->page_alloc_lock);
616 rcu_unlock_domain(d);
617 }
618 break;
620 case XEN_DOMCTL_setdomainhandle:
621 {
622 struct domain *d;
623 ret = -ESRCH;
624 d = rcu_lock_domain_by_id(op->domain);
625 if ( d != NULL )
626 {
627 memcpy(d->handle, op->u.setdomainhandle.handle,
628 sizeof(xen_domain_handle_t));
629 rcu_unlock_domain(d);
630 ret = 0;
631 }
632 }
633 break;
635 case XEN_DOMCTL_setdebugging:
636 {
637 struct domain *d;
638 ret = -ESRCH;
639 d = rcu_lock_domain_by_id(op->domain);
640 if ( d != NULL )
641 {
642 if ( op->u.setdebugging.enable )
643 set_bit(_DOMF_debugging, &d->domain_flags);
644 else
645 clear_bit(_DOMF_debugging, &d->domain_flags);
646 rcu_unlock_domain(d);
647 ret = 0;
648 }
649 }
650 break;
652 case XEN_DOMCTL_irq_permission:
653 {
654 struct domain *d;
655 unsigned int pirq = op->u.irq_permission.pirq;
657 ret = -EINVAL;
658 if ( pirq >= NR_IRQS )
659 break;
661 ret = -ESRCH;
662 d = rcu_lock_domain_by_id(op->domain);
663 if ( d == NULL )
664 break;
666 if ( op->u.irq_permission.allow_access )
667 ret = irq_permit_access(d, pirq);
668 else
669 ret = irq_deny_access(d, pirq);
671 rcu_unlock_domain(d);
672 }
673 break;
675 case XEN_DOMCTL_iomem_permission:
676 {
677 struct domain *d;
678 unsigned long mfn = op->u.iomem_permission.first_mfn;
679 unsigned long nr_mfns = op->u.iomem_permission.nr_mfns;
681 ret = -EINVAL;
682 if ( (mfn + nr_mfns - 1) < mfn ) /* wrap? */
683 break;
685 ret = -ESRCH;
686 d = rcu_lock_domain_by_id(op->domain);
687 if ( d == NULL )
688 break;
690 if ( op->u.iomem_permission.allow_access )
691 ret = iomem_permit_access(d, mfn, mfn + nr_mfns - 1);
692 else
693 ret = iomem_deny_access(d, mfn, mfn + nr_mfns - 1);
695 rcu_unlock_domain(d);
696 }
697 break;
699 case XEN_DOMCTL_settimeoffset:
700 {
701 struct domain *d;
703 ret = -ESRCH;
704 d = rcu_lock_domain_by_id(op->domain);
705 if ( d != NULL )
706 {
707 d->time_offset_seconds = op->u.settimeoffset.time_offset_seconds;
708 rcu_unlock_domain(d);
709 ret = 0;
710 }
711 }
712 break;
714 default:
715 ret = arch_do_domctl(op, u_domctl);
716 break;
717 }
719 spin_unlock(&domctl_lock);
721 if ( ret == 0 )
722 acm_post_domctl(op, &ssid);
723 else
724 acm_fail_domctl(op, &ssid);
726 return ret;
727 }
729 /*
730 * Local variables:
731 * mode: C
732 * c-set-style: "BSD"
733 * c-basic-offset: 4
734 * tab-width: 4
735 * indent-tabs-mode: nil
736 * End:
737 */