ia64/xen-unstable

view xen/common/domain.c @ 17211:af33f2054f47

x86: Allow bitop functions to be applied only to fields of at least 4
bytes. Otherwise the 'longword' processor instructions used will
overlap with adjacent fields with unpredictable consequences.

This change requires some code fixup and just a few casts (mainly when
operating on guest-shared fields which cannot be changed, and which by
observation are clearly safe).

Based on ideas from Jan Beulich <jbeulich@novell.com>

Signed-off-by: Keir Fraser <keir.fraser@citrix.com>
author Keir Fraser <keir.fraser@citrix.com>
date Sun Mar 16 14:11:34 2008 +0000 (2008-03-16)
parents 21532468020b
children b2a3fe7f5591
line source
1 /******************************************************************************
2 * domain.c
3 *
4 * Generic domain-handling functions.
5 */
7 #include <xen/config.h>
8 #include <xen/compat.h>
9 #include <xen/init.h>
10 #include <xen/lib.h>
11 #include <xen/errno.h>
12 #include <xen/sched.h>
13 #include <xen/domain.h>
14 #include <xen/mm.h>
15 #include <xen/event.h>
16 #include <xen/time.h>
17 #include <xen/console.h>
18 #include <xen/softirq.h>
19 #include <xen/domain_page.h>
20 #include <xen/rangeset.h>
21 #include <xen/guest_access.h>
22 #include <xen/hypercall.h>
23 #include <xen/delay.h>
24 #include <xen/shutdown.h>
25 #include <xen/percpu.h>
26 #include <xen/multicall.h>
27 #include <xen/rcupdate.h>
28 #include <asm/debugger.h>
29 #include <public/sched.h>
30 #include <public/vcpu.h>
31 #include <xsm/xsm.h>
33 /* opt_dom0_vcpus_pin: If true, dom0 VCPUs are pinned. */
34 static unsigned int opt_dom0_vcpus_pin;
35 boolean_param("dom0_vcpus_pin", opt_dom0_vcpus_pin);
37 enum cpufreq_controller cpufreq_controller;
38 static void __init setup_cpufreq_option(char *str)
39 {
40 if ( !strcmp(str, "dom0-kernel") )
41 {
42 cpufreq_controller = FREQCTL_dom0_kernel;
43 opt_dom0_vcpus_pin = 1;
44 }
45 }
46 custom_param("cpufreq", setup_cpufreq_option);
48 /* Protect updates/reads (resp.) of domain_list and domain_hash. */
49 DEFINE_SPINLOCK(domlist_update_lock);
50 DEFINE_RCU_READ_LOCK(domlist_read_lock);
52 #define DOMAIN_HASH_SIZE 256
53 #define DOMAIN_HASH(_id) ((int)(_id)&(DOMAIN_HASH_SIZE-1))
54 static struct domain *domain_hash[DOMAIN_HASH_SIZE];
55 struct domain *domain_list;
57 struct domain *dom0;
59 struct vcpu *idle_vcpu[NR_CPUS] __read_mostly;
61 int current_domain_id(void)
62 {
63 return current->domain->domain_id;
64 }
66 struct domain *alloc_domain(domid_t domid)
67 {
68 struct domain *d;
70 if ( (d = xmalloc(struct domain)) == NULL )
71 return NULL;
73 memset(d, 0, sizeof(*d));
74 d->domain_id = domid;
76 if ( xsm_alloc_security_domain(d) != 0 )
77 {
78 free_domain(d);
79 return NULL;
80 }
82 atomic_set(&d->refcnt, 1);
83 spin_lock_init(&d->big_lock);
84 spin_lock_init(&d->page_alloc_lock);
85 spin_lock_init(&d->shutdown_lock);
86 spin_lock_init(&d->hypercall_deadlock_mutex);
87 INIT_LIST_HEAD(&d->page_list);
88 INIT_LIST_HEAD(&d->xenpage_list);
90 return d;
91 }
93 void free_domain(struct domain *d)
94 {
95 xsm_free_security_domain(d);
96 xfree(d);
97 }
99 static void __domain_finalise_shutdown(struct domain *d)
100 {
101 struct vcpu *v;
103 BUG_ON(!spin_is_locked(&d->shutdown_lock));
105 if ( d->is_shut_down )
106 return;
108 for_each_vcpu ( d, v )
109 if ( !v->paused_for_shutdown )
110 return;
112 d->is_shut_down = 1;
114 for_each_vcpu ( d, v )
115 vcpu_sleep_nosync(v);
117 send_guest_global_virq(dom0, VIRQ_DOM_EXC);
118 }
120 static void vcpu_check_shutdown(struct vcpu *v)
121 {
122 struct domain *d = v->domain;
124 spin_lock(&d->shutdown_lock);
126 if ( d->is_shutting_down )
127 {
128 if ( !v->paused_for_shutdown )
129 atomic_inc(&v->pause_count);
130 v->paused_for_shutdown = 1;
131 v->defer_shutdown = 0;
132 __domain_finalise_shutdown(d);
133 }
135 spin_unlock(&d->shutdown_lock);
136 }
138 struct vcpu *alloc_vcpu(
139 struct domain *d, unsigned int vcpu_id, unsigned int cpu_id)
140 {
141 struct vcpu *v;
143 BUG_ON(d->vcpu[vcpu_id] != NULL);
145 if ( (v = alloc_vcpu_struct()) == NULL )
146 return NULL;
148 v->domain = d;
149 v->vcpu_id = vcpu_id;
151 v->runstate.state = is_idle_vcpu(v) ? RUNSTATE_running : RUNSTATE_offline;
152 v->runstate.state_entry_time = NOW();
154 if ( !is_idle_domain(d) )
155 {
156 set_bit(_VPF_down, &v->pause_flags);
157 v->vcpu_info = (void *)&shared_info(d, vcpu_info[vcpu_id]);
158 }
160 if ( sched_init_vcpu(v, cpu_id) != 0 )
161 {
162 free_vcpu_struct(v);
163 return NULL;
164 }
166 if ( vcpu_initialise(v) != 0 )
167 {
168 sched_destroy_vcpu(v);
169 free_vcpu_struct(v);
170 return NULL;
171 }
173 d->vcpu[vcpu_id] = v;
174 if ( vcpu_id != 0 )
175 d->vcpu[v->vcpu_id-1]->next_in_list = v;
177 /* Must be called after making new vcpu visible to for_each_vcpu(). */
178 vcpu_check_shutdown(v);
180 return v;
181 }
183 struct vcpu *alloc_idle_vcpu(unsigned int cpu_id)
184 {
185 struct domain *d;
186 struct vcpu *v;
187 unsigned int vcpu_id = cpu_id % MAX_VIRT_CPUS;
189 if ( (v = idle_vcpu[cpu_id]) != NULL )
190 return v;
192 d = (vcpu_id == 0) ?
193 domain_create(IDLE_DOMAIN_ID, 0, 0) :
194 idle_vcpu[cpu_id - vcpu_id]->domain;
195 BUG_ON(d == NULL);
197 v = alloc_vcpu(d, vcpu_id, cpu_id);
198 idle_vcpu[cpu_id] = v;
200 return v;
201 }
203 struct domain *domain_create(
204 domid_t domid, unsigned int domcr_flags, ssidref_t ssidref)
205 {
206 struct domain *d, **pd;
207 enum { INIT_evtchn = 1, INIT_gnttab = 2, INIT_arch = 8 };
208 int init_status = 0;
210 if ( (d = alloc_domain(domid)) == NULL )
211 return NULL;
213 if ( domcr_flags & DOMCRF_hvm )
214 d->is_hvm = 1;
216 if ( (domid == 0) && opt_dom0_vcpus_pin )
217 d->is_pinned = 1;
219 rangeset_domain_initialise(d);
221 if ( !is_idle_domain(d) )
222 {
223 if ( xsm_domain_create(d, ssidref) != 0 )
224 goto fail;
226 d->is_paused_by_controller = 1;
227 atomic_inc(&d->pause_count);
229 if ( evtchn_init(d) != 0 )
230 goto fail;
231 init_status |= INIT_evtchn;
233 if ( grant_table_create(d) != 0 )
234 goto fail;
235 init_status |= INIT_gnttab;
236 }
238 if ( arch_domain_create(d, domcr_flags) != 0 )
239 goto fail;
240 init_status |= INIT_arch;
242 d->iomem_caps = rangeset_new(d, "I/O Memory", RANGESETF_prettyprint_hex);
243 d->irq_caps = rangeset_new(d, "Interrupts", 0);
244 if ( (d->iomem_caps == NULL) || (d->irq_caps == NULL) )
245 goto fail;
247 if ( sched_init_domain(d) != 0 )
248 goto fail;
250 if ( !is_idle_domain(d) )
251 {
252 spin_lock(&domlist_update_lock);
253 pd = &domain_list; /* NB. domain_list maintained in order of domid. */
254 for ( pd = &domain_list; *pd != NULL; pd = &(*pd)->next_in_list )
255 if ( (*pd)->domain_id > d->domain_id )
256 break;
257 d->next_in_list = *pd;
258 d->next_in_hashbucket = domain_hash[DOMAIN_HASH(domid)];
259 rcu_assign_pointer(*pd, d);
260 rcu_assign_pointer(domain_hash[DOMAIN_HASH(domid)], d);
261 spin_unlock(&domlist_update_lock);
262 }
264 return d;
266 fail:
267 d->is_dying = DOMDYING_dead;
268 atomic_set(&d->refcnt, DOMAIN_DESTROYED);
269 if ( init_status & INIT_arch )
270 arch_domain_destroy(d);
271 if ( init_status & INIT_gnttab )
272 grant_table_destroy(d);
273 if ( init_status & INIT_evtchn )
274 evtchn_destroy(d);
275 rangeset_domain_destroy(d);
276 free_domain(d);
277 return NULL;
278 }
281 struct domain *get_domain_by_id(domid_t dom)
282 {
283 struct domain *d;
285 rcu_read_lock(&domlist_read_lock);
287 for ( d = rcu_dereference(domain_hash[DOMAIN_HASH(dom)]);
288 d != NULL;
289 d = rcu_dereference(d->next_in_hashbucket) )
290 {
291 if ( d->domain_id == dom )
292 {
293 if ( unlikely(!get_domain(d)) )
294 d = NULL;
295 break;
296 }
297 }
299 rcu_read_unlock(&domlist_read_lock);
301 return d;
302 }
305 struct domain *rcu_lock_domain_by_id(domid_t dom)
306 {
307 struct domain *d;
309 rcu_read_lock(&domlist_read_lock);
311 for ( d = rcu_dereference(domain_hash[DOMAIN_HASH(dom)]);
312 d != NULL;
313 d = rcu_dereference(d->next_in_hashbucket) )
314 {
315 if ( d->domain_id == dom )
316 return d;
317 }
319 rcu_read_unlock(&domlist_read_lock);
321 return NULL;
322 }
325 int domain_kill(struct domain *d)
326 {
327 int rc = 0;
329 if ( d == current->domain )
330 return -EINVAL;
332 /* Protected by domctl_lock. */
333 switch ( d->is_dying )
334 {
335 case DOMDYING_alive:
336 domain_pause(d);
337 d->is_dying = DOMDYING_dying;
338 evtchn_destroy(d);
339 gnttab_release_mappings(d);
340 /* fallthrough */
341 case DOMDYING_dying:
342 rc = domain_relinquish_resources(d);
343 page_scrub_kick();
344 if ( rc != 0 )
345 {
346 BUG_ON(rc != -EAGAIN);
347 break;
348 }
349 d->is_dying = DOMDYING_dead;
350 put_domain(d);
351 send_guest_global_virq(dom0, VIRQ_DOM_EXC);
352 /* fallthrough */
353 case DOMDYING_dead:
354 break;
355 }
357 return rc;
358 }
361 void __domain_crash(struct domain *d)
362 {
363 if ( d->is_shutting_down )
364 {
365 /* Print nothing: the domain is already shutting down. */
366 }
367 else if ( d == current->domain )
368 {
369 printk("Domain %d (vcpu#%d) crashed on cpu#%d:\n",
370 d->domain_id, current->vcpu_id, smp_processor_id());
371 show_execution_state(guest_cpu_user_regs());
372 }
373 else
374 {
375 printk("Domain %d reported crashed by domain %d on cpu#%d:\n",
376 d->domain_id, current->domain->domain_id, smp_processor_id());
377 }
379 domain_shutdown(d, SHUTDOWN_crash);
380 }
383 void __domain_crash_synchronous(void)
384 {
385 __domain_crash(current->domain);
387 /*
388 * Flush multicall state before dying if a multicall is in progress.
389 * This shouldn't be necessary, but some architectures are calling
390 * domain_crash_synchronous() when they really shouldn't (i.e., from
391 * within hypercall context).
392 */
393 if ( this_cpu(mc_state).flags != 0 )
394 {
395 dprintk(XENLOG_ERR,
396 "FIXME: synchronous domain crash during a multicall!\n");
397 this_cpu(mc_state).flags = 0;
398 }
400 for ( ; ; )
401 do_softirq();
402 }
405 void domain_shutdown(struct domain *d, u8 reason)
406 {
407 struct vcpu *v;
409 if ( d->domain_id == 0 )
410 dom0_shutdown(reason);
412 spin_lock(&d->shutdown_lock);
414 if ( d->is_shutting_down )
415 {
416 spin_unlock(&d->shutdown_lock);
417 return;
418 }
420 d->is_shutting_down = 1;
421 d->shutdown_code = reason;
423 smp_mb(); /* set shutdown status /then/ check for per-cpu deferrals */
425 for_each_vcpu ( d, v )
426 {
427 if ( v->defer_shutdown )
428 continue;
429 atomic_inc(&v->pause_count);
430 v->paused_for_shutdown = 1;
431 }
433 __domain_finalise_shutdown(d);
435 spin_unlock(&d->shutdown_lock);
436 }
438 void domain_resume(struct domain *d)
439 {
440 struct vcpu *v;
442 /*
443 * Some code paths assume that shutdown status does not get reset under
444 * their feet (e.g., some assertions make this assumption).
445 */
446 domain_pause(d);
448 spin_lock(&d->shutdown_lock);
450 d->is_shutting_down = d->is_shut_down = 0;
452 for_each_vcpu ( d, v )
453 {
454 if ( v->paused_for_shutdown )
455 vcpu_unpause(v);
456 v->paused_for_shutdown = 0;
457 }
459 spin_unlock(&d->shutdown_lock);
461 domain_unpause(d);
462 }
464 int vcpu_start_shutdown_deferral(struct vcpu *v)
465 {
466 v->defer_shutdown = 1;
467 smp_mb(); /* set deferral status /then/ check for shutdown */
468 if ( unlikely(v->domain->is_shutting_down) )
469 vcpu_check_shutdown(v);
470 return v->defer_shutdown;
471 }
473 void vcpu_end_shutdown_deferral(struct vcpu *v)
474 {
475 v->defer_shutdown = 0;
476 smp_mb(); /* clear deferral status /then/ check for shutdown */
477 if ( unlikely(v->domain->is_shutting_down) )
478 vcpu_check_shutdown(v);
479 }
481 void domain_pause_for_debugger(void)
482 {
483 struct domain *d = current->domain;
484 struct vcpu *v;
486 atomic_inc(&d->pause_count);
487 if ( test_and_set_bool(d->is_paused_by_controller) )
488 domain_unpause(d); /* race-free atomic_dec(&d->pause_count) */
490 for_each_vcpu ( d, v )
491 vcpu_sleep_nosync(v);
493 send_guest_global_virq(dom0, VIRQ_DEBUGGER);
494 }
496 /* Complete domain destroy after RCU readers are not holding old references. */
497 static void complete_domain_destroy(struct rcu_head *head)
498 {
499 struct domain *d = container_of(head, struct domain, rcu);
500 struct vcpu *v;
501 int i;
503 for ( i = MAX_VIRT_CPUS-1; i >= 0; i-- )
504 {
505 if ( (v = d->vcpu[i]) == NULL )
506 continue;
507 vcpu_destroy(v);
508 sched_destroy_vcpu(v);
509 }
511 rangeset_domain_destroy(d);
513 grant_table_destroy(d);
515 arch_domain_destroy(d);
517 sched_destroy_domain(d);
519 for ( i = MAX_VIRT_CPUS-1; i >= 0; i-- )
520 if ( (v = d->vcpu[i]) != NULL )
521 free_vcpu_struct(v);
523 if (d->target)
524 put_domain(d->target);
526 free_domain(d);
528 send_guest_global_virq(dom0, VIRQ_DOM_EXC);
529 }
531 /* Release resources belonging to task @p. */
532 void domain_destroy(struct domain *d)
533 {
534 struct domain **pd;
535 atomic_t old, new;
537 BUG_ON(!d->is_dying);
539 /* May be already destroyed, or get_domain() can race us. */
540 _atomic_set(old, 0);
541 _atomic_set(new, DOMAIN_DESTROYED);
542 old = atomic_compareandswap(old, new, &d->refcnt);
543 if ( _atomic_read(old) != 0 )
544 return;
546 /* Delete from task list and task hashtable. */
547 spin_lock(&domlist_update_lock);
548 pd = &domain_list;
549 while ( *pd != d )
550 pd = &(*pd)->next_in_list;
551 rcu_assign_pointer(*pd, d->next_in_list);
552 pd = &domain_hash[DOMAIN_HASH(d->domain_id)];
553 while ( *pd != d )
554 pd = &(*pd)->next_in_hashbucket;
555 rcu_assign_pointer(*pd, d->next_in_hashbucket);
556 spin_unlock(&domlist_update_lock);
558 /* Schedule RCU asynchronous completion of domain destroy. */
559 call_rcu(&d->rcu, complete_domain_destroy);
560 }
562 void vcpu_pause(struct vcpu *v)
563 {
564 ASSERT(v != current);
565 atomic_inc(&v->pause_count);
566 vcpu_sleep_sync(v);
567 }
569 void vcpu_pause_nosync(struct vcpu *v)
570 {
571 atomic_inc(&v->pause_count);
572 vcpu_sleep_nosync(v);
573 }
575 void vcpu_unpause(struct vcpu *v)
576 {
577 if ( atomic_dec_and_test(&v->pause_count) )
578 vcpu_wake(v);
579 }
581 void domain_pause(struct domain *d)
582 {
583 struct vcpu *v;
585 ASSERT(d != current->domain);
587 atomic_inc(&d->pause_count);
589 for_each_vcpu( d, v )
590 vcpu_sleep_sync(v);
591 }
593 void domain_unpause(struct domain *d)
594 {
595 struct vcpu *v;
597 if ( atomic_dec_and_test(&d->pause_count) )
598 for_each_vcpu( d, v )
599 vcpu_wake(v);
600 }
602 void domain_pause_by_systemcontroller(struct domain *d)
603 {
604 domain_pause(d);
605 if ( test_and_set_bool(d->is_paused_by_controller) )
606 domain_unpause(d);
607 }
609 void domain_unpause_by_systemcontroller(struct domain *d)
610 {
611 if ( test_and_clear_bool(d->is_paused_by_controller) )
612 domain_unpause(d);
613 }
615 int boot_vcpu(struct domain *d, int vcpuid, vcpu_guest_context_u ctxt)
616 {
617 struct vcpu *v = d->vcpu[vcpuid];
619 BUG_ON(v->is_initialised);
621 return arch_set_info_guest(v, ctxt);
622 }
624 int vcpu_reset(struct vcpu *v)
625 {
626 struct domain *d = v->domain;
627 int rc;
629 domain_pause(d);
630 LOCK_BIGLOCK(d);
632 rc = arch_vcpu_reset(v);
633 if ( rc != 0 )
634 goto out;
636 set_bit(_VPF_down, &v->pause_flags);
638 v->fpu_initialised = 0;
639 v->fpu_dirtied = 0;
640 v->is_polling = 0;
641 v->is_initialised = 0;
642 v->nmi_pending = 0;
643 v->nmi_masked = 0;
644 clear_bit(_VPF_blocked, &v->pause_flags);
646 out:
647 UNLOCK_BIGLOCK(v->domain);
648 domain_unpause(d);
650 return rc;
651 }
654 long do_vcpu_op(int cmd, int vcpuid, XEN_GUEST_HANDLE(void) arg)
655 {
656 struct domain *d = current->domain;
657 struct vcpu *v;
658 struct vcpu_guest_context *ctxt;
659 long rc = 0;
661 if ( (vcpuid < 0) || (vcpuid >= MAX_VIRT_CPUS) )
662 return -EINVAL;
664 if ( (v = d->vcpu[vcpuid]) == NULL )
665 return -ENOENT;
667 switch ( cmd )
668 {
669 case VCPUOP_initialise:
670 if ( (ctxt = xmalloc(struct vcpu_guest_context)) == NULL )
671 return -ENOMEM;
673 if ( copy_from_guest(ctxt, arg, 1) )
674 {
675 xfree(ctxt);
676 return -EFAULT;
677 }
679 LOCK_BIGLOCK(d);
680 rc = -EEXIST;
681 if ( !v->is_initialised )
682 rc = boot_vcpu(d, vcpuid, ctxt);
683 UNLOCK_BIGLOCK(d);
685 xfree(ctxt);
686 break;
688 case VCPUOP_up:
689 if ( !v->is_initialised )
690 return -EINVAL;
692 if ( test_and_clear_bit(_VPF_down, &v->pause_flags) )
693 vcpu_wake(v);
695 break;
697 case VCPUOP_down:
698 if ( !test_and_set_bit(_VPF_down, &v->pause_flags) )
699 vcpu_sleep_nosync(v);
700 break;
702 case VCPUOP_is_up:
703 rc = !test_bit(_VPF_down, &v->pause_flags);
704 break;
706 case VCPUOP_get_runstate_info:
707 {
708 struct vcpu_runstate_info runstate;
709 vcpu_runstate_get(v, &runstate);
710 if ( copy_to_guest(arg, &runstate, 1) )
711 rc = -EFAULT;
712 break;
713 }
715 case VCPUOP_set_periodic_timer:
716 {
717 struct vcpu_set_periodic_timer set;
719 if ( copy_from_guest(&set, arg, 1) )
720 return -EFAULT;
722 if ( set.period_ns < MILLISECS(1) )
723 return -EINVAL;
725 v->periodic_period = set.period_ns;
726 vcpu_force_reschedule(v);
728 break;
729 }
731 case VCPUOP_stop_periodic_timer:
732 v->periodic_period = 0;
733 vcpu_force_reschedule(v);
734 break;
736 case VCPUOP_set_singleshot_timer:
737 {
738 struct vcpu_set_singleshot_timer set;
740 if ( v != current )
741 return -EINVAL;
743 if ( copy_from_guest(&set, arg, 1) )
744 return -EFAULT;
746 if ( (set.flags & VCPU_SSHOTTMR_future) &&
747 (set.timeout_abs_ns < NOW()) )
748 return -ETIME;
750 if ( v->singleshot_timer.cpu != smp_processor_id() )
751 {
752 stop_timer(&v->singleshot_timer);
753 v->singleshot_timer.cpu = smp_processor_id();
754 }
756 set_timer(&v->singleshot_timer, set.timeout_abs_ns);
758 break;
759 }
761 case VCPUOP_stop_singleshot_timer:
762 if ( v != current )
763 return -EINVAL;
765 stop_timer(&v->singleshot_timer);
767 break;
769 case VCPUOP_send_nmi:
770 if ( !guest_handle_is_null(arg) )
771 return -EINVAL;
773 if ( !test_and_set_bool(v->nmi_pending) )
774 vcpu_kick(v);
776 break;
778 default:
779 rc = arch_do_vcpu_op(cmd, v, arg);
780 break;
781 }
783 return rc;
784 }
786 long vm_assist(struct domain *p, unsigned int cmd, unsigned int type)
787 {
788 if ( type > MAX_VMASST_TYPE )
789 return -EINVAL;
791 switch ( cmd )
792 {
793 case VMASST_CMD_enable:
794 set_bit(type, &p->vm_assist);
795 return 0;
796 case VMASST_CMD_disable:
797 clear_bit(type, &p->vm_assist);
798 return 0;
799 }
801 return -ENOSYS;
802 }
804 /*
805 * Local variables:
806 * mode: C
807 * c-set-style: "BSD"
808 * c-basic-offset: 4
809 * tab-width: 4
810 * indent-tabs-mode: nil
811 * End:
812 */