ia64/xen-unstable

view xen/common/domain.c @ 15828:3b50a7e52ff2

Implement x86 continuable domain destroy.
This patch addresses the following bug report.
http://bugzilla.xensource.com/bugzilla/show_bug.cgi?id=1037
Signed-off-by: Isaku Yamahata <yamahata@valinux.co.jp>
author kfraser@localhost.localdomain
date Fri Aug 31 17:00:11 2007 +0100 (2007-08-31)
parents bd59dd48e208
children 6903e3f3bdd6
line source
1 /******************************************************************************
2 * domain.c
3 *
4 * Generic domain-handling functions.
5 */
7 #include <xen/config.h>
8 #include <xen/compat.h>
9 #include <xen/init.h>
10 #include <xen/lib.h>
11 #include <xen/errno.h>
12 #include <xen/sched.h>
13 #include <xen/domain.h>
14 #include <xen/mm.h>
15 #include <xen/event.h>
16 #include <xen/time.h>
17 #include <xen/console.h>
18 #include <xen/softirq.h>
19 #include <xen/domain_page.h>
20 #include <xen/rangeset.h>
21 #include <xen/guest_access.h>
22 #include <xen/hypercall.h>
23 #include <xen/delay.h>
24 #include <xen/shutdown.h>
25 #include <xen/percpu.h>
26 #include <xen/multicall.h>
27 #include <xen/rcupdate.h>
28 #include <asm/debugger.h>
29 #include <public/sched.h>
30 #include <public/vcpu.h>
31 #include <xsm/xsm.h>
33 /* Protect updates/reads (resp.) of domain_list and domain_hash. */
34 DEFINE_SPINLOCK(domlist_update_lock);
35 DEFINE_RCU_READ_LOCK(domlist_read_lock);
37 #define DOMAIN_HASH_SIZE 256
38 #define DOMAIN_HASH(_id) ((int)(_id)&(DOMAIN_HASH_SIZE-1))
39 static struct domain *domain_hash[DOMAIN_HASH_SIZE];
40 struct domain *domain_list;
42 struct domain *dom0;
44 struct vcpu *idle_vcpu[NR_CPUS] __read_mostly;
46 int current_domain_id(void)
47 {
48 return current->domain->domain_id;
49 }
51 struct domain *alloc_domain(domid_t domid)
52 {
53 struct domain *d;
55 if ( (d = xmalloc(struct domain)) == NULL )
56 return NULL;
58 memset(d, 0, sizeof(*d));
59 d->domain_id = domid;
61 if ( xsm_alloc_security_domain(d) != 0 )
62 {
63 free_domain(d);
64 return NULL;
65 }
67 atomic_set(&d->refcnt, 1);
68 spin_lock_init(&d->big_lock);
69 spin_lock_init(&d->page_alloc_lock);
70 spin_lock_init(&d->shutdown_lock);
71 INIT_LIST_HEAD(&d->page_list);
72 INIT_LIST_HEAD(&d->xenpage_list);
74 return d;
75 }
77 void free_domain(struct domain *d)
78 {
79 xsm_free_security_domain(d);
80 xfree(d);
81 }
83 static void __domain_finalise_shutdown(struct domain *d)
84 {
85 struct vcpu *v;
87 BUG_ON(!spin_is_locked(&d->shutdown_lock));
89 if ( d->is_shut_down )
90 return;
92 for_each_vcpu ( d, v )
93 if ( !v->paused_for_shutdown )
94 return;
96 d->is_shut_down = 1;
98 for_each_vcpu ( d, v )
99 vcpu_sleep_nosync(v);
101 send_guest_global_virq(dom0, VIRQ_DOM_EXC);
102 }
104 static void vcpu_check_shutdown(struct vcpu *v)
105 {
106 struct domain *d = v->domain;
108 spin_lock(&d->shutdown_lock);
110 if ( d->is_shutting_down )
111 {
112 if ( !v->paused_for_shutdown )
113 atomic_inc(&v->pause_count);
114 v->paused_for_shutdown = 1;
115 v->defer_shutdown = 0;
116 __domain_finalise_shutdown(d);
117 }
119 spin_unlock(&d->shutdown_lock);
120 }
122 struct vcpu *alloc_vcpu(
123 struct domain *d, unsigned int vcpu_id, unsigned int cpu_id)
124 {
125 struct vcpu *v;
127 BUG_ON(d->vcpu[vcpu_id] != NULL);
129 if ( (v = alloc_vcpu_struct()) == NULL )
130 return NULL;
132 v->domain = d;
133 v->vcpu_id = vcpu_id;
135 v->runstate.state = is_idle_vcpu(v) ? RUNSTATE_running : RUNSTATE_offline;
136 v->runstate.state_entry_time = NOW();
138 if ( !is_idle_domain(d) )
139 {
140 set_bit(_VPF_down, &v->pause_flags);
141 v->vcpu_info = shared_info_addr(d, vcpu_info[vcpu_id]);
142 }
144 if ( sched_init_vcpu(v, cpu_id) != 0 )
145 {
146 free_vcpu_struct(v);
147 return NULL;
148 }
150 if ( vcpu_initialise(v) != 0 )
151 {
152 sched_destroy_vcpu(v);
153 free_vcpu_struct(v);
154 return NULL;
155 }
157 d->vcpu[vcpu_id] = v;
158 if ( vcpu_id != 0 )
159 d->vcpu[v->vcpu_id-1]->next_in_list = v;
161 /* Must be called after making new vcpu visible to for_each_vcpu(). */
162 vcpu_check_shutdown(v);
164 return v;
165 }
167 struct vcpu *alloc_idle_vcpu(unsigned int cpu_id)
168 {
169 struct domain *d;
170 struct vcpu *v;
171 unsigned int vcpu_id = cpu_id % MAX_VIRT_CPUS;
173 if ( (v = idle_vcpu[cpu_id]) != NULL )
174 return v;
176 d = (vcpu_id == 0) ?
177 domain_create(IDLE_DOMAIN_ID, 0, 0) :
178 idle_vcpu[cpu_id - vcpu_id]->domain;
179 BUG_ON(d == NULL);
181 v = alloc_vcpu(d, vcpu_id, cpu_id);
182 idle_vcpu[cpu_id] = v;
184 return v;
185 }
187 struct domain *domain_create(
188 domid_t domid, unsigned int domcr_flags, ssidref_t ssidref)
189 {
190 struct domain *d, **pd;
191 enum { INIT_evtchn = 1, INIT_gnttab = 2, INIT_arch = 8 };
192 int init_status = 0;
194 if ( (d = alloc_domain(domid)) == NULL )
195 return NULL;
197 if ( domcr_flags & DOMCRF_hvm )
198 d->is_hvm = 1;
200 rangeset_domain_initialise(d);
202 if ( !is_idle_domain(d) )
203 {
204 if ( xsm_domain_create(d, ssidref) != 0 )
205 goto fail;
207 d->is_paused_by_controller = 1;
208 atomic_inc(&d->pause_count);
210 if ( evtchn_init(d) != 0 )
211 goto fail;
212 init_status |= INIT_evtchn;
214 if ( grant_table_create(d) != 0 )
215 goto fail;
216 init_status |= INIT_gnttab;
217 }
219 if ( arch_domain_create(d) != 0 )
220 goto fail;
221 init_status |= INIT_arch;
223 d->iomem_caps = rangeset_new(d, "I/O Memory", RANGESETF_prettyprint_hex);
224 d->irq_caps = rangeset_new(d, "Interrupts", 0);
225 if ( (d->iomem_caps == NULL) || (d->irq_caps == NULL) )
226 goto fail;
228 if ( sched_init_domain(d) != 0 )
229 goto fail;
231 if ( !is_idle_domain(d) )
232 {
233 spin_lock(&domlist_update_lock);
234 pd = &domain_list; /* NB. domain_list maintained in order of domid. */
235 for ( pd = &domain_list; *pd != NULL; pd = &(*pd)->next_in_list )
236 if ( (*pd)->domain_id > d->domain_id )
237 break;
238 d->next_in_list = *pd;
239 d->next_in_hashbucket = domain_hash[DOMAIN_HASH(domid)];
240 rcu_assign_pointer(*pd, d);
241 rcu_assign_pointer(domain_hash[DOMAIN_HASH(domid)], d);
242 spin_unlock(&domlist_update_lock);
243 }
245 return d;
247 fail:
248 d->is_dying = DOMDYING_dead;
249 atomic_set(&d->refcnt, DOMAIN_DESTROYED);
250 if ( init_status & INIT_arch )
251 arch_domain_destroy(d);
252 if ( init_status & INIT_gnttab )
253 grant_table_destroy(d);
254 if ( init_status & INIT_evtchn )
255 evtchn_destroy(d);
256 rangeset_domain_destroy(d);
257 free_domain(d);
258 return NULL;
259 }
262 struct domain *get_domain_by_id(domid_t dom)
263 {
264 struct domain *d;
266 rcu_read_lock(&domlist_read_lock);
268 for ( d = rcu_dereference(domain_hash[DOMAIN_HASH(dom)]);
269 d != NULL;
270 d = rcu_dereference(d->next_in_hashbucket) )
271 {
272 if ( d->domain_id == dom )
273 {
274 if ( unlikely(!get_domain(d)) )
275 d = NULL;
276 break;
277 }
278 }
280 rcu_read_unlock(&domlist_read_lock);
282 return d;
283 }
286 struct domain *rcu_lock_domain_by_id(domid_t dom)
287 {
288 struct domain *d;
290 rcu_read_lock(&domlist_read_lock);
292 for ( d = rcu_dereference(domain_hash[DOMAIN_HASH(dom)]);
293 d != NULL;
294 d = rcu_dereference(d->next_in_hashbucket) )
295 {
296 if ( d->domain_id == dom )
297 return d;
298 }
300 rcu_read_unlock(&domlist_read_lock);
302 return NULL;
303 }
306 int domain_kill(struct domain *d)
307 {
308 int rc = 0;
310 if ( d == current->domain )
311 return -EINVAL;
313 /* Protected by domctl_lock. */
314 switch ( d->is_dying )
315 {
316 case DOMDYING_alive:
317 domain_pause(d);
318 d->is_dying = DOMDYING_dying;
319 evtchn_destroy(d);
320 gnttab_release_mappings(d);
321 /* fallthrough */
322 case DOMDYING_dying:
323 rc = domain_relinquish_resources(d);
324 page_scrub_kick();
325 if ( rc != 0 )
326 {
327 BUG_ON(rc != -EAGAIN);
328 break;
329 }
330 d->is_dying = DOMDYING_dead;
331 put_domain(d);
332 send_guest_global_virq(dom0, VIRQ_DOM_EXC);
333 /* fallthrough */
334 case DOMDYING_dead:
335 break;
336 }
338 return rc;
339 }
342 void __domain_crash(struct domain *d)
343 {
344 if ( d->is_shutting_down )
345 {
346 /* Print nothing: the domain is already shutting down. */
347 }
348 else if ( d == current->domain )
349 {
350 printk("Domain %d (vcpu#%d) crashed on cpu#%d:\n",
351 d->domain_id, current->vcpu_id, smp_processor_id());
352 show_execution_state(guest_cpu_user_regs());
353 }
354 else
355 {
356 printk("Domain %d reported crashed by domain %d on cpu#%d:\n",
357 d->domain_id, current->domain->domain_id, smp_processor_id());
358 }
360 domain_shutdown(d, SHUTDOWN_crash);
361 }
364 void __domain_crash_synchronous(void)
365 {
366 __domain_crash(current->domain);
368 /*
369 * Flush multicall state before dying if a multicall is in progress.
370 * This shouldn't be necessary, but some architectures are calling
371 * domain_crash_synchronous() when they really shouldn't (i.e., from
372 * within hypercall context).
373 */
374 if ( this_cpu(mc_state).flags != 0 )
375 {
376 dprintk(XENLOG_ERR,
377 "FIXME: synchronous domain crash during a multicall!\n");
378 this_cpu(mc_state).flags = 0;
379 }
381 for ( ; ; )
382 do_softirq();
383 }
386 void domain_shutdown(struct domain *d, u8 reason)
387 {
388 struct vcpu *v;
390 if ( d->domain_id == 0 )
391 dom0_shutdown(reason);
393 spin_lock(&d->shutdown_lock);
395 if ( d->is_shutting_down )
396 {
397 spin_unlock(&d->shutdown_lock);
398 return;
399 }
401 d->is_shutting_down = 1;
402 d->shutdown_code = reason;
404 smp_mb(); /* set shutdown status /then/ check for per-cpu deferrals */
406 for_each_vcpu ( d, v )
407 {
408 if ( v->defer_shutdown )
409 continue;
410 atomic_inc(&v->pause_count);
411 v->paused_for_shutdown = 1;
412 }
414 __domain_finalise_shutdown(d);
416 spin_unlock(&d->shutdown_lock);
417 }
419 void domain_resume(struct domain *d)
420 {
421 struct vcpu *v;
423 /*
424 * Some code paths assume that shutdown status does not get reset under
425 * their feet (e.g., some assertions make this assumption).
426 */
427 domain_pause(d);
429 spin_lock(&d->shutdown_lock);
431 d->is_shutting_down = d->is_shut_down = 0;
433 for_each_vcpu ( d, v )
434 {
435 if ( v->paused_for_shutdown )
436 vcpu_unpause(v);
437 v->paused_for_shutdown = 0;
438 }
440 spin_unlock(&d->shutdown_lock);
442 domain_unpause(d);
443 }
445 int vcpu_start_shutdown_deferral(struct vcpu *v)
446 {
447 v->defer_shutdown = 1;
448 smp_mb(); /* set deferral status /then/ check for shutdown */
449 if ( unlikely(v->domain->is_shutting_down) )
450 vcpu_check_shutdown(v);
451 return v->defer_shutdown;
452 }
454 void vcpu_end_shutdown_deferral(struct vcpu *v)
455 {
456 v->defer_shutdown = 0;
457 smp_mb(); /* clear deferral status /then/ check for shutdown */
458 if ( unlikely(v->domain->is_shutting_down) )
459 vcpu_check_shutdown(v);
460 }
462 void domain_pause_for_debugger(void)
463 {
464 struct domain *d = current->domain;
465 struct vcpu *v;
467 atomic_inc(&d->pause_count);
468 if ( test_and_set_bool(d->is_paused_by_controller) )
469 domain_unpause(d); /* race-free atomic_dec(&d->pause_count) */
471 for_each_vcpu ( d, v )
472 vcpu_sleep_nosync(v);
474 send_guest_global_virq(dom0, VIRQ_DEBUGGER);
475 }
477 /* Complete domain destroy after RCU readers are not holding old references. */
478 static void complete_domain_destroy(struct rcu_head *head)
479 {
480 struct domain *d = container_of(head, struct domain, rcu);
481 struct vcpu *v;
482 int i;
484 for ( i = MAX_VIRT_CPUS-1; i >= 0; i-- )
485 {
486 if ( (v = d->vcpu[i]) == NULL )
487 continue;
488 vcpu_destroy(v);
489 sched_destroy_vcpu(v);
490 }
492 rangeset_domain_destroy(d);
494 grant_table_destroy(d);
496 arch_domain_destroy(d);
498 sched_destroy_domain(d);
500 for ( i = MAX_VIRT_CPUS-1; i >= 0; i-- )
501 if ( (v = d->vcpu[i]) != NULL )
502 free_vcpu_struct(v);
504 free_domain(d);
506 send_guest_global_virq(dom0, VIRQ_DOM_EXC);
507 }
509 /* Release resources belonging to task @p. */
510 void domain_destroy(struct domain *d)
511 {
512 struct domain **pd;
513 atomic_t old, new;
515 BUG_ON(!d->is_dying);
517 /* May be already destroyed, or get_domain() can race us. */
518 _atomic_set(old, 0);
519 _atomic_set(new, DOMAIN_DESTROYED);
520 old = atomic_compareandswap(old, new, &d->refcnt);
521 if ( _atomic_read(old) != 0 )
522 return;
524 /* Delete from task list and task hashtable. */
525 spin_lock(&domlist_update_lock);
526 pd = &domain_list;
527 while ( *pd != d )
528 pd = &(*pd)->next_in_list;
529 rcu_assign_pointer(*pd, d->next_in_list);
530 pd = &domain_hash[DOMAIN_HASH(d->domain_id)];
531 while ( *pd != d )
532 pd = &(*pd)->next_in_hashbucket;
533 rcu_assign_pointer(*pd, d->next_in_hashbucket);
534 spin_unlock(&domlist_update_lock);
536 /* Schedule RCU asynchronous completion of domain destroy. */
537 call_rcu(&d->rcu, complete_domain_destroy);
538 }
540 void vcpu_pause(struct vcpu *v)
541 {
542 ASSERT(v != current);
543 atomic_inc(&v->pause_count);
544 vcpu_sleep_sync(v);
545 }
547 void vcpu_pause_nosync(struct vcpu *v)
548 {
549 atomic_inc(&v->pause_count);
550 vcpu_sleep_nosync(v);
551 }
553 void vcpu_unpause(struct vcpu *v)
554 {
555 if ( atomic_dec_and_test(&v->pause_count) )
556 vcpu_wake(v);
557 }
559 void domain_pause(struct domain *d)
560 {
561 struct vcpu *v;
563 ASSERT(d != current->domain);
565 atomic_inc(&d->pause_count);
567 for_each_vcpu( d, v )
568 vcpu_sleep_sync(v);
569 }
571 void domain_unpause(struct domain *d)
572 {
573 struct vcpu *v;
575 if ( atomic_dec_and_test(&d->pause_count) )
576 for_each_vcpu( d, v )
577 vcpu_wake(v);
578 }
580 void domain_pause_by_systemcontroller(struct domain *d)
581 {
582 domain_pause(d);
583 if ( test_and_set_bool(d->is_paused_by_controller) )
584 domain_unpause(d);
585 }
587 void domain_unpause_by_systemcontroller(struct domain *d)
588 {
589 if ( test_and_clear_bool(d->is_paused_by_controller) )
590 domain_unpause(d);
591 }
593 int boot_vcpu(struct domain *d, int vcpuid, vcpu_guest_context_u ctxt)
594 {
595 struct vcpu *v = d->vcpu[vcpuid];
597 BUG_ON(v->is_initialised);
599 return arch_set_info_guest(v, ctxt);
600 }
602 int vcpu_reset(struct vcpu *v)
603 {
604 struct domain *d = v->domain;
605 int rc;
607 domain_pause(d);
608 LOCK_BIGLOCK(d);
610 rc = arch_vcpu_reset(v);
611 if ( rc != 0 )
612 goto out;
614 set_bit(_VPF_down, &v->pause_flags);
616 v->fpu_initialised = 0;
617 v->fpu_dirtied = 0;
618 v->is_polling = 0;
619 v->is_initialised = 0;
620 v->nmi_pending = 0;
621 v->nmi_masked = 0;
622 clear_bit(_VPF_blocked, &v->pause_flags);
624 out:
625 UNLOCK_BIGLOCK(v->domain);
626 domain_unpause(d);
628 return rc;
629 }
632 long do_vcpu_op(int cmd, int vcpuid, XEN_GUEST_HANDLE(void) arg)
633 {
634 struct domain *d = current->domain;
635 struct vcpu *v;
636 struct vcpu_guest_context *ctxt;
637 long rc = 0;
639 if ( (vcpuid < 0) || (vcpuid >= MAX_VIRT_CPUS) )
640 return -EINVAL;
642 if ( (v = d->vcpu[vcpuid]) == NULL )
643 return -ENOENT;
645 switch ( cmd )
646 {
647 case VCPUOP_initialise:
648 if ( (ctxt = xmalloc(struct vcpu_guest_context)) == NULL )
649 return -ENOMEM;
651 if ( copy_from_guest(ctxt, arg, 1) )
652 {
653 xfree(ctxt);
654 return -EFAULT;
655 }
657 LOCK_BIGLOCK(d);
658 rc = -EEXIST;
659 if ( !v->is_initialised )
660 rc = boot_vcpu(d, vcpuid, ctxt);
661 UNLOCK_BIGLOCK(d);
663 xfree(ctxt);
664 break;
666 case VCPUOP_up:
667 if ( !v->is_initialised )
668 return -EINVAL;
670 if ( test_and_clear_bit(_VPF_down, &v->pause_flags) )
671 vcpu_wake(v);
673 break;
675 case VCPUOP_down:
676 if ( !test_and_set_bit(_VPF_down, &v->pause_flags) )
677 vcpu_sleep_nosync(v);
678 break;
680 case VCPUOP_is_up:
681 rc = !test_bit(_VPF_down, &v->pause_flags);
682 break;
684 case VCPUOP_get_runstate_info:
685 {
686 struct vcpu_runstate_info runstate;
687 vcpu_runstate_get(v, &runstate);
688 if ( copy_to_guest(arg, &runstate, 1) )
689 rc = -EFAULT;
690 break;
691 }
693 case VCPUOP_set_periodic_timer:
694 {
695 struct vcpu_set_periodic_timer set;
697 if ( copy_from_guest(&set, arg, 1) )
698 return -EFAULT;
700 if ( set.period_ns < MILLISECS(1) )
701 return -EINVAL;
703 v->periodic_period = set.period_ns;
704 vcpu_force_reschedule(v);
706 break;
707 }
709 case VCPUOP_stop_periodic_timer:
710 {
711 v->periodic_period = 0;
712 vcpu_force_reschedule(v);
713 break;
714 }
716 case VCPUOP_set_singleshot_timer:
717 {
718 struct vcpu_set_singleshot_timer set;
720 if ( v != current )
721 return -EINVAL;
723 if ( copy_from_guest(&set, arg, 1) )
724 return -EFAULT;
726 if ( (set.flags & VCPU_SSHOTTMR_future) &&
727 (set.timeout_abs_ns < NOW()) )
728 return -ETIME;
730 if ( v->singleshot_timer.cpu != smp_processor_id() )
731 {
732 stop_timer(&v->singleshot_timer);
733 v->singleshot_timer.cpu = smp_processor_id();
734 }
736 set_timer(&v->singleshot_timer, set.timeout_abs_ns);
738 break;
739 }
741 case VCPUOP_stop_singleshot_timer:
742 {
743 if ( v != current )
744 return -EINVAL;
746 stop_timer(&v->singleshot_timer);
747 break;
748 }
750 default:
751 rc = arch_do_vcpu_op(cmd, v, arg);
752 break;
753 }
755 return rc;
756 }
758 long vm_assist(struct domain *p, unsigned int cmd, unsigned int type)
759 {
760 if ( type > MAX_VMASST_TYPE )
761 return -EINVAL;
763 switch ( cmd )
764 {
765 case VMASST_CMD_enable:
766 set_bit(type, &p->vm_assist);
767 return 0;
768 case VMASST_CMD_disable:
769 clear_bit(type, &p->vm_assist);
770 return 0;
771 }
773 return -ENOSYS;
774 }
776 /*
777 * Local variables:
778 * mode: C
779 * c-set-style: "BSD"
780 * c-basic-offset: 4
781 * tab-width: 4
782 * indent-tabs-mode: nil
783 * End:
784 */