ia64/xen-unstable

view xen/common/domain.c @ 17251:b2a3fe7f5591

domain_shutdown() needs to vcpu_pause_nosync() rather than directly
incrementing the pause_count field. The latter ensures that the VCPU
gets descheduled --- synchronously in the case of the
currently-running VCPU.

Based on a bug report and proposed patch by Ben Guthro and Robert
Phillips of Virtual Iron.

Signed-off-by: Keir Fraser <keir.fraser@citrix.com>
author Keir Fraser <keir.fraser@citrix.com>
date Tue Mar 18 15:23:25 2008 +0000 (2008-03-18)
parents af33f2054f47
children b667e220e556
line source
1 /******************************************************************************
2 * domain.c
3 *
4 * Generic domain-handling functions.
5 */
7 #include <xen/config.h>
8 #include <xen/compat.h>
9 #include <xen/init.h>
10 #include <xen/lib.h>
11 #include <xen/errno.h>
12 #include <xen/sched.h>
13 #include <xen/domain.h>
14 #include <xen/mm.h>
15 #include <xen/event.h>
16 #include <xen/time.h>
17 #include <xen/console.h>
18 #include <xen/softirq.h>
19 #include <xen/domain_page.h>
20 #include <xen/rangeset.h>
21 #include <xen/guest_access.h>
22 #include <xen/hypercall.h>
23 #include <xen/delay.h>
24 #include <xen/shutdown.h>
25 #include <xen/percpu.h>
26 #include <xen/multicall.h>
27 #include <xen/rcupdate.h>
28 #include <asm/debugger.h>
29 #include <public/sched.h>
30 #include <public/vcpu.h>
31 #include <xsm/xsm.h>
33 /* opt_dom0_vcpus_pin: If true, dom0 VCPUs are pinned. */
34 static unsigned int opt_dom0_vcpus_pin;
35 boolean_param("dom0_vcpus_pin", opt_dom0_vcpus_pin);
37 enum cpufreq_controller cpufreq_controller;
38 static void __init setup_cpufreq_option(char *str)
39 {
40 if ( !strcmp(str, "dom0-kernel") )
41 {
42 cpufreq_controller = FREQCTL_dom0_kernel;
43 opt_dom0_vcpus_pin = 1;
44 }
45 }
46 custom_param("cpufreq", setup_cpufreq_option);
48 /* Protect updates/reads (resp.) of domain_list and domain_hash. */
49 DEFINE_SPINLOCK(domlist_update_lock);
50 DEFINE_RCU_READ_LOCK(domlist_read_lock);
52 #define DOMAIN_HASH_SIZE 256
53 #define DOMAIN_HASH(_id) ((int)(_id)&(DOMAIN_HASH_SIZE-1))
54 static struct domain *domain_hash[DOMAIN_HASH_SIZE];
55 struct domain *domain_list;
57 struct domain *dom0;
59 struct vcpu *idle_vcpu[NR_CPUS] __read_mostly;
61 int current_domain_id(void)
62 {
63 return current->domain->domain_id;
64 }
66 struct domain *alloc_domain(domid_t domid)
67 {
68 struct domain *d;
70 if ( (d = xmalloc(struct domain)) == NULL )
71 return NULL;
73 memset(d, 0, sizeof(*d));
74 d->domain_id = domid;
76 if ( xsm_alloc_security_domain(d) != 0 )
77 {
78 free_domain(d);
79 return NULL;
80 }
82 atomic_set(&d->refcnt, 1);
83 spin_lock_init(&d->big_lock);
84 spin_lock_init(&d->page_alloc_lock);
85 spin_lock_init(&d->shutdown_lock);
86 spin_lock_init(&d->hypercall_deadlock_mutex);
87 INIT_LIST_HEAD(&d->page_list);
88 INIT_LIST_HEAD(&d->xenpage_list);
90 return d;
91 }
93 void free_domain(struct domain *d)
94 {
95 xsm_free_security_domain(d);
96 xfree(d);
97 }
99 static void __domain_finalise_shutdown(struct domain *d)
100 {
101 struct vcpu *v;
103 BUG_ON(!spin_is_locked(&d->shutdown_lock));
105 if ( d->is_shut_down )
106 return;
108 for_each_vcpu ( d, v )
109 if ( !v->paused_for_shutdown )
110 return;
112 d->is_shut_down = 1;
113 send_guest_global_virq(dom0, VIRQ_DOM_EXC);
114 }
116 static void vcpu_check_shutdown(struct vcpu *v)
117 {
118 struct domain *d = v->domain;
120 spin_lock(&d->shutdown_lock);
122 if ( d->is_shutting_down )
123 {
124 if ( !v->paused_for_shutdown )
125 vcpu_pause_nosync(v);
126 v->paused_for_shutdown = 1;
127 v->defer_shutdown = 0;
128 __domain_finalise_shutdown(d);
129 }
131 spin_unlock(&d->shutdown_lock);
132 }
134 struct vcpu *alloc_vcpu(
135 struct domain *d, unsigned int vcpu_id, unsigned int cpu_id)
136 {
137 struct vcpu *v;
139 BUG_ON(d->vcpu[vcpu_id] != NULL);
141 if ( (v = alloc_vcpu_struct()) == NULL )
142 return NULL;
144 v->domain = d;
145 v->vcpu_id = vcpu_id;
147 v->runstate.state = is_idle_vcpu(v) ? RUNSTATE_running : RUNSTATE_offline;
148 v->runstate.state_entry_time = NOW();
150 if ( !is_idle_domain(d) )
151 {
152 set_bit(_VPF_down, &v->pause_flags);
153 v->vcpu_info = (void *)&shared_info(d, vcpu_info[vcpu_id]);
154 }
156 if ( sched_init_vcpu(v, cpu_id) != 0 )
157 {
158 free_vcpu_struct(v);
159 return NULL;
160 }
162 if ( vcpu_initialise(v) != 0 )
163 {
164 sched_destroy_vcpu(v);
165 free_vcpu_struct(v);
166 return NULL;
167 }
169 d->vcpu[vcpu_id] = v;
170 if ( vcpu_id != 0 )
171 d->vcpu[v->vcpu_id-1]->next_in_list = v;
173 /* Must be called after making new vcpu visible to for_each_vcpu(). */
174 vcpu_check_shutdown(v);
176 return v;
177 }
179 struct vcpu *alloc_idle_vcpu(unsigned int cpu_id)
180 {
181 struct domain *d;
182 struct vcpu *v;
183 unsigned int vcpu_id = cpu_id % MAX_VIRT_CPUS;
185 if ( (v = idle_vcpu[cpu_id]) != NULL )
186 return v;
188 d = (vcpu_id == 0) ?
189 domain_create(IDLE_DOMAIN_ID, 0, 0) :
190 idle_vcpu[cpu_id - vcpu_id]->domain;
191 BUG_ON(d == NULL);
193 v = alloc_vcpu(d, vcpu_id, cpu_id);
194 idle_vcpu[cpu_id] = v;
196 return v;
197 }
199 struct domain *domain_create(
200 domid_t domid, unsigned int domcr_flags, ssidref_t ssidref)
201 {
202 struct domain *d, **pd;
203 enum { INIT_evtchn = 1, INIT_gnttab = 2, INIT_arch = 8 };
204 int init_status = 0;
206 if ( (d = alloc_domain(domid)) == NULL )
207 return NULL;
209 if ( domcr_flags & DOMCRF_hvm )
210 d->is_hvm = 1;
212 if ( (domid == 0) && opt_dom0_vcpus_pin )
213 d->is_pinned = 1;
215 rangeset_domain_initialise(d);
217 if ( !is_idle_domain(d) )
218 {
219 if ( xsm_domain_create(d, ssidref) != 0 )
220 goto fail;
222 d->is_paused_by_controller = 1;
223 atomic_inc(&d->pause_count);
225 if ( evtchn_init(d) != 0 )
226 goto fail;
227 init_status |= INIT_evtchn;
229 if ( grant_table_create(d) != 0 )
230 goto fail;
231 init_status |= INIT_gnttab;
232 }
234 if ( arch_domain_create(d, domcr_flags) != 0 )
235 goto fail;
236 init_status |= INIT_arch;
238 d->iomem_caps = rangeset_new(d, "I/O Memory", RANGESETF_prettyprint_hex);
239 d->irq_caps = rangeset_new(d, "Interrupts", 0);
240 if ( (d->iomem_caps == NULL) || (d->irq_caps == NULL) )
241 goto fail;
243 if ( sched_init_domain(d) != 0 )
244 goto fail;
246 if ( !is_idle_domain(d) )
247 {
248 spin_lock(&domlist_update_lock);
249 pd = &domain_list; /* NB. domain_list maintained in order of domid. */
250 for ( pd = &domain_list; *pd != NULL; pd = &(*pd)->next_in_list )
251 if ( (*pd)->domain_id > d->domain_id )
252 break;
253 d->next_in_list = *pd;
254 d->next_in_hashbucket = domain_hash[DOMAIN_HASH(domid)];
255 rcu_assign_pointer(*pd, d);
256 rcu_assign_pointer(domain_hash[DOMAIN_HASH(domid)], d);
257 spin_unlock(&domlist_update_lock);
258 }
260 return d;
262 fail:
263 d->is_dying = DOMDYING_dead;
264 atomic_set(&d->refcnt, DOMAIN_DESTROYED);
265 if ( init_status & INIT_arch )
266 arch_domain_destroy(d);
267 if ( init_status & INIT_gnttab )
268 grant_table_destroy(d);
269 if ( init_status & INIT_evtchn )
270 evtchn_destroy(d);
271 rangeset_domain_destroy(d);
272 free_domain(d);
273 return NULL;
274 }
277 struct domain *get_domain_by_id(domid_t dom)
278 {
279 struct domain *d;
281 rcu_read_lock(&domlist_read_lock);
283 for ( d = rcu_dereference(domain_hash[DOMAIN_HASH(dom)]);
284 d != NULL;
285 d = rcu_dereference(d->next_in_hashbucket) )
286 {
287 if ( d->domain_id == dom )
288 {
289 if ( unlikely(!get_domain(d)) )
290 d = NULL;
291 break;
292 }
293 }
295 rcu_read_unlock(&domlist_read_lock);
297 return d;
298 }
301 struct domain *rcu_lock_domain_by_id(domid_t dom)
302 {
303 struct domain *d;
305 rcu_read_lock(&domlist_read_lock);
307 for ( d = rcu_dereference(domain_hash[DOMAIN_HASH(dom)]);
308 d != NULL;
309 d = rcu_dereference(d->next_in_hashbucket) )
310 {
311 if ( d->domain_id == dom )
312 return d;
313 }
315 rcu_read_unlock(&domlist_read_lock);
317 return NULL;
318 }
321 int domain_kill(struct domain *d)
322 {
323 int rc = 0;
325 if ( d == current->domain )
326 return -EINVAL;
328 /* Protected by domctl_lock. */
329 switch ( d->is_dying )
330 {
331 case DOMDYING_alive:
332 domain_pause(d);
333 d->is_dying = DOMDYING_dying;
334 evtchn_destroy(d);
335 gnttab_release_mappings(d);
336 /* fallthrough */
337 case DOMDYING_dying:
338 rc = domain_relinquish_resources(d);
339 page_scrub_kick();
340 if ( rc != 0 )
341 {
342 BUG_ON(rc != -EAGAIN);
343 break;
344 }
345 d->is_dying = DOMDYING_dead;
346 put_domain(d);
347 send_guest_global_virq(dom0, VIRQ_DOM_EXC);
348 /* fallthrough */
349 case DOMDYING_dead:
350 break;
351 }
353 return rc;
354 }
357 void __domain_crash(struct domain *d)
358 {
359 if ( d->is_shutting_down )
360 {
361 /* Print nothing: the domain is already shutting down. */
362 }
363 else if ( d == current->domain )
364 {
365 printk("Domain %d (vcpu#%d) crashed on cpu#%d:\n",
366 d->domain_id, current->vcpu_id, smp_processor_id());
367 show_execution_state(guest_cpu_user_regs());
368 }
369 else
370 {
371 printk("Domain %d reported crashed by domain %d on cpu#%d:\n",
372 d->domain_id, current->domain->domain_id, smp_processor_id());
373 }
375 domain_shutdown(d, SHUTDOWN_crash);
376 }
379 void __domain_crash_synchronous(void)
380 {
381 __domain_crash(current->domain);
383 /*
384 * Flush multicall state before dying if a multicall is in progress.
385 * This shouldn't be necessary, but some architectures are calling
386 * domain_crash_synchronous() when they really shouldn't (i.e., from
387 * within hypercall context).
388 */
389 if ( this_cpu(mc_state).flags != 0 )
390 {
391 dprintk(XENLOG_ERR,
392 "FIXME: synchronous domain crash during a multicall!\n");
393 this_cpu(mc_state).flags = 0;
394 }
396 for ( ; ; )
397 do_softirq();
398 }
401 void domain_shutdown(struct domain *d, u8 reason)
402 {
403 struct vcpu *v;
405 if ( d->domain_id == 0 )
406 dom0_shutdown(reason);
408 spin_lock(&d->shutdown_lock);
410 if ( d->is_shutting_down )
411 {
412 spin_unlock(&d->shutdown_lock);
413 return;
414 }
416 d->is_shutting_down = 1;
417 d->shutdown_code = reason;
419 smp_mb(); /* set shutdown status /then/ check for per-cpu deferrals */
421 for_each_vcpu ( d, v )
422 {
423 if ( v->defer_shutdown )
424 continue;
425 vcpu_pause_nosync(v);
426 v->paused_for_shutdown = 1;
427 }
429 __domain_finalise_shutdown(d);
431 spin_unlock(&d->shutdown_lock);
432 }
434 void domain_resume(struct domain *d)
435 {
436 struct vcpu *v;
438 /*
439 * Some code paths assume that shutdown status does not get reset under
440 * their feet (e.g., some assertions make this assumption).
441 */
442 domain_pause(d);
444 spin_lock(&d->shutdown_lock);
446 d->is_shutting_down = d->is_shut_down = 0;
448 for_each_vcpu ( d, v )
449 {
450 if ( v->paused_for_shutdown )
451 vcpu_unpause(v);
452 v->paused_for_shutdown = 0;
453 }
455 spin_unlock(&d->shutdown_lock);
457 domain_unpause(d);
458 }
460 int vcpu_start_shutdown_deferral(struct vcpu *v)
461 {
462 v->defer_shutdown = 1;
463 smp_mb(); /* set deferral status /then/ check for shutdown */
464 if ( unlikely(v->domain->is_shutting_down) )
465 vcpu_check_shutdown(v);
466 return v->defer_shutdown;
467 }
469 void vcpu_end_shutdown_deferral(struct vcpu *v)
470 {
471 v->defer_shutdown = 0;
472 smp_mb(); /* clear deferral status /then/ check for shutdown */
473 if ( unlikely(v->domain->is_shutting_down) )
474 vcpu_check_shutdown(v);
475 }
477 void domain_pause_for_debugger(void)
478 {
479 struct domain *d = current->domain;
480 struct vcpu *v;
482 atomic_inc(&d->pause_count);
483 if ( test_and_set_bool(d->is_paused_by_controller) )
484 domain_unpause(d); /* race-free atomic_dec(&d->pause_count) */
486 for_each_vcpu ( d, v )
487 vcpu_sleep_nosync(v);
489 send_guest_global_virq(dom0, VIRQ_DEBUGGER);
490 }
492 /* Complete domain destroy after RCU readers are not holding old references. */
493 static void complete_domain_destroy(struct rcu_head *head)
494 {
495 struct domain *d = container_of(head, struct domain, rcu);
496 struct vcpu *v;
497 int i;
499 for ( i = MAX_VIRT_CPUS-1; i >= 0; i-- )
500 {
501 if ( (v = d->vcpu[i]) == NULL )
502 continue;
503 vcpu_destroy(v);
504 sched_destroy_vcpu(v);
505 }
507 rangeset_domain_destroy(d);
509 grant_table_destroy(d);
511 arch_domain_destroy(d);
513 sched_destroy_domain(d);
515 for ( i = MAX_VIRT_CPUS-1; i >= 0; i-- )
516 if ( (v = d->vcpu[i]) != NULL )
517 free_vcpu_struct(v);
519 if (d->target)
520 put_domain(d->target);
522 free_domain(d);
524 send_guest_global_virq(dom0, VIRQ_DOM_EXC);
525 }
527 /* Release resources belonging to task @p. */
528 void domain_destroy(struct domain *d)
529 {
530 struct domain **pd;
531 atomic_t old, new;
533 BUG_ON(!d->is_dying);
535 /* May be already destroyed, or get_domain() can race us. */
536 _atomic_set(old, 0);
537 _atomic_set(new, DOMAIN_DESTROYED);
538 old = atomic_compareandswap(old, new, &d->refcnt);
539 if ( _atomic_read(old) != 0 )
540 return;
542 /* Delete from task list and task hashtable. */
543 spin_lock(&domlist_update_lock);
544 pd = &domain_list;
545 while ( *pd != d )
546 pd = &(*pd)->next_in_list;
547 rcu_assign_pointer(*pd, d->next_in_list);
548 pd = &domain_hash[DOMAIN_HASH(d->domain_id)];
549 while ( *pd != d )
550 pd = &(*pd)->next_in_hashbucket;
551 rcu_assign_pointer(*pd, d->next_in_hashbucket);
552 spin_unlock(&domlist_update_lock);
554 /* Schedule RCU asynchronous completion of domain destroy. */
555 call_rcu(&d->rcu, complete_domain_destroy);
556 }
558 void vcpu_pause(struct vcpu *v)
559 {
560 ASSERT(v != current);
561 atomic_inc(&v->pause_count);
562 vcpu_sleep_sync(v);
563 }
565 void vcpu_pause_nosync(struct vcpu *v)
566 {
567 atomic_inc(&v->pause_count);
568 vcpu_sleep_nosync(v);
569 }
571 void vcpu_unpause(struct vcpu *v)
572 {
573 if ( atomic_dec_and_test(&v->pause_count) )
574 vcpu_wake(v);
575 }
577 void domain_pause(struct domain *d)
578 {
579 struct vcpu *v;
581 ASSERT(d != current->domain);
583 atomic_inc(&d->pause_count);
585 for_each_vcpu( d, v )
586 vcpu_sleep_sync(v);
587 }
589 void domain_unpause(struct domain *d)
590 {
591 struct vcpu *v;
593 if ( atomic_dec_and_test(&d->pause_count) )
594 for_each_vcpu( d, v )
595 vcpu_wake(v);
596 }
598 void domain_pause_by_systemcontroller(struct domain *d)
599 {
600 domain_pause(d);
601 if ( test_and_set_bool(d->is_paused_by_controller) )
602 domain_unpause(d);
603 }
605 void domain_unpause_by_systemcontroller(struct domain *d)
606 {
607 if ( test_and_clear_bool(d->is_paused_by_controller) )
608 domain_unpause(d);
609 }
611 int boot_vcpu(struct domain *d, int vcpuid, vcpu_guest_context_u ctxt)
612 {
613 struct vcpu *v = d->vcpu[vcpuid];
615 BUG_ON(v->is_initialised);
617 return arch_set_info_guest(v, ctxt);
618 }
620 int vcpu_reset(struct vcpu *v)
621 {
622 struct domain *d = v->domain;
623 int rc;
625 domain_pause(d);
626 LOCK_BIGLOCK(d);
628 rc = arch_vcpu_reset(v);
629 if ( rc != 0 )
630 goto out;
632 set_bit(_VPF_down, &v->pause_flags);
634 v->fpu_initialised = 0;
635 v->fpu_dirtied = 0;
636 v->is_polling = 0;
637 v->is_initialised = 0;
638 v->nmi_pending = 0;
639 v->nmi_masked = 0;
640 clear_bit(_VPF_blocked, &v->pause_flags);
642 out:
643 UNLOCK_BIGLOCK(v->domain);
644 domain_unpause(d);
646 return rc;
647 }
650 long do_vcpu_op(int cmd, int vcpuid, XEN_GUEST_HANDLE(void) arg)
651 {
652 struct domain *d = current->domain;
653 struct vcpu *v;
654 struct vcpu_guest_context *ctxt;
655 long rc = 0;
657 if ( (vcpuid < 0) || (vcpuid >= MAX_VIRT_CPUS) )
658 return -EINVAL;
660 if ( (v = d->vcpu[vcpuid]) == NULL )
661 return -ENOENT;
663 switch ( cmd )
664 {
665 case VCPUOP_initialise:
666 if ( (ctxt = xmalloc(struct vcpu_guest_context)) == NULL )
667 return -ENOMEM;
669 if ( copy_from_guest(ctxt, arg, 1) )
670 {
671 xfree(ctxt);
672 return -EFAULT;
673 }
675 LOCK_BIGLOCK(d);
676 rc = -EEXIST;
677 if ( !v->is_initialised )
678 rc = boot_vcpu(d, vcpuid, ctxt);
679 UNLOCK_BIGLOCK(d);
681 xfree(ctxt);
682 break;
684 case VCPUOP_up:
685 if ( !v->is_initialised )
686 return -EINVAL;
688 if ( test_and_clear_bit(_VPF_down, &v->pause_flags) )
689 vcpu_wake(v);
691 break;
693 case VCPUOP_down:
694 if ( !test_and_set_bit(_VPF_down, &v->pause_flags) )
695 vcpu_sleep_nosync(v);
696 break;
698 case VCPUOP_is_up:
699 rc = !test_bit(_VPF_down, &v->pause_flags);
700 break;
702 case VCPUOP_get_runstate_info:
703 {
704 struct vcpu_runstate_info runstate;
705 vcpu_runstate_get(v, &runstate);
706 if ( copy_to_guest(arg, &runstate, 1) )
707 rc = -EFAULT;
708 break;
709 }
711 case VCPUOP_set_periodic_timer:
712 {
713 struct vcpu_set_periodic_timer set;
715 if ( copy_from_guest(&set, arg, 1) )
716 return -EFAULT;
718 if ( set.period_ns < MILLISECS(1) )
719 return -EINVAL;
721 v->periodic_period = set.period_ns;
722 vcpu_force_reschedule(v);
724 break;
725 }
727 case VCPUOP_stop_periodic_timer:
728 v->periodic_period = 0;
729 vcpu_force_reschedule(v);
730 break;
732 case VCPUOP_set_singleshot_timer:
733 {
734 struct vcpu_set_singleshot_timer set;
736 if ( v != current )
737 return -EINVAL;
739 if ( copy_from_guest(&set, arg, 1) )
740 return -EFAULT;
742 if ( (set.flags & VCPU_SSHOTTMR_future) &&
743 (set.timeout_abs_ns < NOW()) )
744 return -ETIME;
746 if ( v->singleshot_timer.cpu != smp_processor_id() )
747 {
748 stop_timer(&v->singleshot_timer);
749 v->singleshot_timer.cpu = smp_processor_id();
750 }
752 set_timer(&v->singleshot_timer, set.timeout_abs_ns);
754 break;
755 }
757 case VCPUOP_stop_singleshot_timer:
758 if ( v != current )
759 return -EINVAL;
761 stop_timer(&v->singleshot_timer);
763 break;
765 case VCPUOP_send_nmi:
766 if ( !guest_handle_is_null(arg) )
767 return -EINVAL;
769 if ( !test_and_set_bool(v->nmi_pending) )
770 vcpu_kick(v);
772 break;
774 default:
775 rc = arch_do_vcpu_op(cmd, v, arg);
776 break;
777 }
779 return rc;
780 }
782 long vm_assist(struct domain *p, unsigned int cmd, unsigned int type)
783 {
784 if ( type > MAX_VMASST_TYPE )
785 return -EINVAL;
787 switch ( cmd )
788 {
789 case VMASST_CMD_enable:
790 set_bit(type, &p->vm_assist);
791 return 0;
792 case VMASST_CMD_disable:
793 clear_bit(type, &p->vm_assist);
794 return 0;
795 }
797 return -ENOSYS;
798 }
800 /*
801 * Local variables:
802 * mode: C
803 * c-set-style: "BSD"
804 * c-basic-offset: 4
805 * tab-width: 4
806 * indent-tabs-mode: nil
807 * End:
808 */