ia64/xen-unstable

view xen/common/domain.c @ 17062:0769835cf50f

x86 shadow: Reduce scope of shadow lock.

emulate_map_dest doesn't require holding lock, since
only shadow related operation possibly involved is to
remove shadow which is less frequent and can acquire
lock inside. Rest are either guest table walk or
per-vcpu monitor table manipulation

Signed-off-by Kevin Tian <kevin.tian@intel.com>
author Keir Fraser <keir.fraser@citrix.com>
date Thu Feb 14 10:33:12 2008 +0000 (2008-02-14)
parents 98c2665056ea
children 21532468020b
line source
1 /******************************************************************************
2 * domain.c
3 *
4 * Generic domain-handling functions.
5 */
7 #include <xen/config.h>
8 #include <xen/compat.h>
9 #include <xen/init.h>
10 #include <xen/lib.h>
11 #include <xen/errno.h>
12 #include <xen/sched.h>
13 #include <xen/domain.h>
14 #include <xen/mm.h>
15 #include <xen/event.h>
16 #include <xen/time.h>
17 #include <xen/console.h>
18 #include <xen/softirq.h>
19 #include <xen/domain_page.h>
20 #include <xen/rangeset.h>
21 #include <xen/guest_access.h>
22 #include <xen/hypercall.h>
23 #include <xen/delay.h>
24 #include <xen/shutdown.h>
25 #include <xen/percpu.h>
26 #include <xen/multicall.h>
27 #include <xen/rcupdate.h>
28 #include <asm/debugger.h>
29 #include <public/sched.h>
30 #include <public/vcpu.h>
31 #include <xsm/xsm.h>
33 /* Protect updates/reads (resp.) of domain_list and domain_hash. */
34 DEFINE_SPINLOCK(domlist_update_lock);
35 DEFINE_RCU_READ_LOCK(domlist_read_lock);
37 #define DOMAIN_HASH_SIZE 256
38 #define DOMAIN_HASH(_id) ((int)(_id)&(DOMAIN_HASH_SIZE-1))
39 static struct domain *domain_hash[DOMAIN_HASH_SIZE];
40 struct domain *domain_list;
42 struct domain *dom0;
44 struct vcpu *idle_vcpu[NR_CPUS] __read_mostly;
46 int current_domain_id(void)
47 {
48 return current->domain->domain_id;
49 }
51 struct domain *alloc_domain(domid_t domid)
52 {
53 struct domain *d;
55 if ( (d = xmalloc(struct domain)) == NULL )
56 return NULL;
58 memset(d, 0, sizeof(*d));
59 d->domain_id = domid;
61 if ( xsm_alloc_security_domain(d) != 0 )
62 {
63 free_domain(d);
64 return NULL;
65 }
67 atomic_set(&d->refcnt, 1);
68 spin_lock_init(&d->big_lock);
69 spin_lock_init(&d->page_alloc_lock);
70 spin_lock_init(&d->shutdown_lock);
71 spin_lock_init(&d->hypercall_deadlock_mutex);
72 INIT_LIST_HEAD(&d->page_list);
73 INIT_LIST_HEAD(&d->xenpage_list);
75 return d;
76 }
78 void free_domain(struct domain *d)
79 {
80 xsm_free_security_domain(d);
81 xfree(d);
82 }
84 static void __domain_finalise_shutdown(struct domain *d)
85 {
86 struct vcpu *v;
88 BUG_ON(!spin_is_locked(&d->shutdown_lock));
90 if ( d->is_shut_down )
91 return;
93 for_each_vcpu ( d, v )
94 if ( !v->paused_for_shutdown )
95 return;
97 d->is_shut_down = 1;
99 for_each_vcpu ( d, v )
100 vcpu_sleep_nosync(v);
102 send_guest_global_virq(dom0, VIRQ_DOM_EXC);
103 }
105 static void vcpu_check_shutdown(struct vcpu *v)
106 {
107 struct domain *d = v->domain;
109 spin_lock(&d->shutdown_lock);
111 if ( d->is_shutting_down )
112 {
113 if ( !v->paused_for_shutdown )
114 atomic_inc(&v->pause_count);
115 v->paused_for_shutdown = 1;
116 v->defer_shutdown = 0;
117 __domain_finalise_shutdown(d);
118 }
120 spin_unlock(&d->shutdown_lock);
121 }
123 struct vcpu *alloc_vcpu(
124 struct domain *d, unsigned int vcpu_id, unsigned int cpu_id)
125 {
126 struct vcpu *v;
128 BUG_ON(d->vcpu[vcpu_id] != NULL);
130 if ( (v = alloc_vcpu_struct()) == NULL )
131 return NULL;
133 v->domain = d;
134 v->vcpu_id = vcpu_id;
136 v->runstate.state = is_idle_vcpu(v) ? RUNSTATE_running : RUNSTATE_offline;
137 v->runstate.state_entry_time = NOW();
139 if ( !is_idle_domain(d) )
140 {
141 set_bit(_VPF_down, &v->pause_flags);
142 v->vcpu_info = shared_info_addr(d, vcpu_info[vcpu_id]);
143 }
145 if ( sched_init_vcpu(v, cpu_id) != 0 )
146 {
147 free_vcpu_struct(v);
148 return NULL;
149 }
151 if ( vcpu_initialise(v) != 0 )
152 {
153 sched_destroy_vcpu(v);
154 free_vcpu_struct(v);
155 return NULL;
156 }
158 d->vcpu[vcpu_id] = v;
159 if ( vcpu_id != 0 )
160 d->vcpu[v->vcpu_id-1]->next_in_list = v;
162 /* Must be called after making new vcpu visible to for_each_vcpu(). */
163 vcpu_check_shutdown(v);
165 return v;
166 }
168 struct vcpu *alloc_idle_vcpu(unsigned int cpu_id)
169 {
170 struct domain *d;
171 struct vcpu *v;
172 unsigned int vcpu_id = cpu_id % MAX_VIRT_CPUS;
174 if ( (v = idle_vcpu[cpu_id]) != NULL )
175 return v;
177 d = (vcpu_id == 0) ?
178 domain_create(IDLE_DOMAIN_ID, 0, 0) :
179 idle_vcpu[cpu_id - vcpu_id]->domain;
180 BUG_ON(d == NULL);
182 v = alloc_vcpu(d, vcpu_id, cpu_id);
183 idle_vcpu[cpu_id] = v;
185 return v;
186 }
188 struct domain *domain_create(
189 domid_t domid, unsigned int domcr_flags, ssidref_t ssidref)
190 {
191 struct domain *d, **pd;
192 enum { INIT_evtchn = 1, INIT_gnttab = 2, INIT_arch = 8 };
193 int init_status = 0;
195 if ( (d = alloc_domain(domid)) == NULL )
196 return NULL;
198 if ( domcr_flags & DOMCRF_hvm )
199 d->is_hvm = 1;
201 rangeset_domain_initialise(d);
203 if ( !is_idle_domain(d) )
204 {
205 if ( xsm_domain_create(d, ssidref) != 0 )
206 goto fail;
208 d->is_paused_by_controller = 1;
209 atomic_inc(&d->pause_count);
211 if ( evtchn_init(d) != 0 )
212 goto fail;
213 init_status |= INIT_evtchn;
215 if ( grant_table_create(d) != 0 )
216 goto fail;
217 init_status |= INIT_gnttab;
218 }
220 if ( arch_domain_create(d, domcr_flags) != 0 )
221 goto fail;
222 init_status |= INIT_arch;
224 d->iomem_caps = rangeset_new(d, "I/O Memory", RANGESETF_prettyprint_hex);
225 d->irq_caps = rangeset_new(d, "Interrupts", 0);
226 if ( (d->iomem_caps == NULL) || (d->irq_caps == NULL) )
227 goto fail;
229 if ( sched_init_domain(d) != 0 )
230 goto fail;
232 if ( !is_idle_domain(d) )
233 {
234 spin_lock(&domlist_update_lock);
235 pd = &domain_list; /* NB. domain_list maintained in order of domid. */
236 for ( pd = &domain_list; *pd != NULL; pd = &(*pd)->next_in_list )
237 if ( (*pd)->domain_id > d->domain_id )
238 break;
239 d->next_in_list = *pd;
240 d->next_in_hashbucket = domain_hash[DOMAIN_HASH(domid)];
241 rcu_assign_pointer(*pd, d);
242 rcu_assign_pointer(domain_hash[DOMAIN_HASH(domid)], d);
243 spin_unlock(&domlist_update_lock);
244 }
246 return d;
248 fail:
249 d->is_dying = DOMDYING_dead;
250 atomic_set(&d->refcnt, DOMAIN_DESTROYED);
251 if ( init_status & INIT_arch )
252 arch_domain_destroy(d);
253 if ( init_status & INIT_gnttab )
254 grant_table_destroy(d);
255 if ( init_status & INIT_evtchn )
256 evtchn_destroy(d);
257 rangeset_domain_destroy(d);
258 free_domain(d);
259 return NULL;
260 }
263 struct domain *get_domain_by_id(domid_t dom)
264 {
265 struct domain *d;
267 rcu_read_lock(&domlist_read_lock);
269 for ( d = rcu_dereference(domain_hash[DOMAIN_HASH(dom)]);
270 d != NULL;
271 d = rcu_dereference(d->next_in_hashbucket) )
272 {
273 if ( d->domain_id == dom )
274 {
275 if ( unlikely(!get_domain(d)) )
276 d = NULL;
277 break;
278 }
279 }
281 rcu_read_unlock(&domlist_read_lock);
283 return d;
284 }
287 struct domain *rcu_lock_domain_by_id(domid_t dom)
288 {
289 struct domain *d;
291 rcu_read_lock(&domlist_read_lock);
293 for ( d = rcu_dereference(domain_hash[DOMAIN_HASH(dom)]);
294 d != NULL;
295 d = rcu_dereference(d->next_in_hashbucket) )
296 {
297 if ( d->domain_id == dom )
298 return d;
299 }
301 rcu_read_unlock(&domlist_read_lock);
303 return NULL;
304 }
307 int domain_kill(struct domain *d)
308 {
309 int rc = 0;
311 if ( d == current->domain )
312 return -EINVAL;
314 /* Protected by domctl_lock. */
315 switch ( d->is_dying )
316 {
317 case DOMDYING_alive:
318 domain_pause(d);
319 d->is_dying = DOMDYING_dying;
320 evtchn_destroy(d);
321 gnttab_release_mappings(d);
322 /* fallthrough */
323 case DOMDYING_dying:
324 rc = domain_relinquish_resources(d);
325 page_scrub_kick();
326 if ( rc != 0 )
327 {
328 BUG_ON(rc != -EAGAIN);
329 break;
330 }
331 d->is_dying = DOMDYING_dead;
332 put_domain(d);
333 send_guest_global_virq(dom0, VIRQ_DOM_EXC);
334 /* fallthrough */
335 case DOMDYING_dead:
336 break;
337 }
339 return rc;
340 }
343 void __domain_crash(struct domain *d)
344 {
345 if ( d->is_shutting_down )
346 {
347 /* Print nothing: the domain is already shutting down. */
348 }
349 else if ( d == current->domain )
350 {
351 printk("Domain %d (vcpu#%d) crashed on cpu#%d:\n",
352 d->domain_id, current->vcpu_id, smp_processor_id());
353 show_execution_state(guest_cpu_user_regs());
354 }
355 else
356 {
357 printk("Domain %d reported crashed by domain %d on cpu#%d:\n",
358 d->domain_id, current->domain->domain_id, smp_processor_id());
359 }
361 domain_shutdown(d, SHUTDOWN_crash);
362 }
365 void __domain_crash_synchronous(void)
366 {
367 __domain_crash(current->domain);
369 /*
370 * Flush multicall state before dying if a multicall is in progress.
371 * This shouldn't be necessary, but some architectures are calling
372 * domain_crash_synchronous() when they really shouldn't (i.e., from
373 * within hypercall context).
374 */
375 if ( this_cpu(mc_state).flags != 0 )
376 {
377 dprintk(XENLOG_ERR,
378 "FIXME: synchronous domain crash during a multicall!\n");
379 this_cpu(mc_state).flags = 0;
380 }
382 for ( ; ; )
383 do_softirq();
384 }
387 void domain_shutdown(struct domain *d, u8 reason)
388 {
389 struct vcpu *v;
391 if ( d->domain_id == 0 )
392 dom0_shutdown(reason);
394 spin_lock(&d->shutdown_lock);
396 if ( d->is_shutting_down )
397 {
398 spin_unlock(&d->shutdown_lock);
399 return;
400 }
402 d->is_shutting_down = 1;
403 d->shutdown_code = reason;
405 smp_mb(); /* set shutdown status /then/ check for per-cpu deferrals */
407 for_each_vcpu ( d, v )
408 {
409 if ( v->defer_shutdown )
410 continue;
411 atomic_inc(&v->pause_count);
412 v->paused_for_shutdown = 1;
413 }
415 __domain_finalise_shutdown(d);
417 spin_unlock(&d->shutdown_lock);
418 }
420 void domain_resume(struct domain *d)
421 {
422 struct vcpu *v;
424 /*
425 * Some code paths assume that shutdown status does not get reset under
426 * their feet (e.g., some assertions make this assumption).
427 */
428 domain_pause(d);
430 spin_lock(&d->shutdown_lock);
432 d->is_shutting_down = d->is_shut_down = 0;
434 for_each_vcpu ( d, v )
435 {
436 if ( v->paused_for_shutdown )
437 vcpu_unpause(v);
438 v->paused_for_shutdown = 0;
439 }
441 spin_unlock(&d->shutdown_lock);
443 domain_unpause(d);
444 }
446 int vcpu_start_shutdown_deferral(struct vcpu *v)
447 {
448 v->defer_shutdown = 1;
449 smp_mb(); /* set deferral status /then/ check for shutdown */
450 if ( unlikely(v->domain->is_shutting_down) )
451 vcpu_check_shutdown(v);
452 return v->defer_shutdown;
453 }
455 void vcpu_end_shutdown_deferral(struct vcpu *v)
456 {
457 v->defer_shutdown = 0;
458 smp_mb(); /* clear deferral status /then/ check for shutdown */
459 if ( unlikely(v->domain->is_shutting_down) )
460 vcpu_check_shutdown(v);
461 }
463 void domain_pause_for_debugger(void)
464 {
465 struct domain *d = current->domain;
466 struct vcpu *v;
468 atomic_inc(&d->pause_count);
469 if ( test_and_set_bool(d->is_paused_by_controller) )
470 domain_unpause(d); /* race-free atomic_dec(&d->pause_count) */
472 for_each_vcpu ( d, v )
473 vcpu_sleep_nosync(v);
475 send_guest_global_virq(dom0, VIRQ_DEBUGGER);
476 }
478 /* Complete domain destroy after RCU readers are not holding old references. */
479 static void complete_domain_destroy(struct rcu_head *head)
480 {
481 struct domain *d = container_of(head, struct domain, rcu);
482 struct vcpu *v;
483 int i;
485 for ( i = MAX_VIRT_CPUS-1; i >= 0; i-- )
486 {
487 if ( (v = d->vcpu[i]) == NULL )
488 continue;
489 vcpu_destroy(v);
490 sched_destroy_vcpu(v);
491 }
493 rangeset_domain_destroy(d);
495 grant_table_destroy(d);
497 arch_domain_destroy(d);
499 sched_destroy_domain(d);
501 for ( i = MAX_VIRT_CPUS-1; i >= 0; i-- )
502 if ( (v = d->vcpu[i]) != NULL )
503 free_vcpu_struct(v);
505 if (d->target)
506 put_domain(d->target);
508 free_domain(d);
510 send_guest_global_virq(dom0, VIRQ_DOM_EXC);
511 }
513 /* Release resources belonging to task @p. */
514 void domain_destroy(struct domain *d)
515 {
516 struct domain **pd;
517 atomic_t old, new;
519 BUG_ON(!d->is_dying);
521 /* May be already destroyed, or get_domain() can race us. */
522 _atomic_set(old, 0);
523 _atomic_set(new, DOMAIN_DESTROYED);
524 old = atomic_compareandswap(old, new, &d->refcnt);
525 if ( _atomic_read(old) != 0 )
526 return;
528 /* Delete from task list and task hashtable. */
529 spin_lock(&domlist_update_lock);
530 pd = &domain_list;
531 while ( *pd != d )
532 pd = &(*pd)->next_in_list;
533 rcu_assign_pointer(*pd, d->next_in_list);
534 pd = &domain_hash[DOMAIN_HASH(d->domain_id)];
535 while ( *pd != d )
536 pd = &(*pd)->next_in_hashbucket;
537 rcu_assign_pointer(*pd, d->next_in_hashbucket);
538 spin_unlock(&domlist_update_lock);
540 /* Schedule RCU asynchronous completion of domain destroy. */
541 call_rcu(&d->rcu, complete_domain_destroy);
542 }
544 void vcpu_pause(struct vcpu *v)
545 {
546 ASSERT(v != current);
547 atomic_inc(&v->pause_count);
548 vcpu_sleep_sync(v);
549 }
551 void vcpu_pause_nosync(struct vcpu *v)
552 {
553 atomic_inc(&v->pause_count);
554 vcpu_sleep_nosync(v);
555 }
557 void vcpu_unpause(struct vcpu *v)
558 {
559 if ( atomic_dec_and_test(&v->pause_count) )
560 vcpu_wake(v);
561 }
563 void domain_pause(struct domain *d)
564 {
565 struct vcpu *v;
567 ASSERT(d != current->domain);
569 atomic_inc(&d->pause_count);
571 for_each_vcpu( d, v )
572 vcpu_sleep_sync(v);
573 }
575 void domain_unpause(struct domain *d)
576 {
577 struct vcpu *v;
579 if ( atomic_dec_and_test(&d->pause_count) )
580 for_each_vcpu( d, v )
581 vcpu_wake(v);
582 }
584 void domain_pause_by_systemcontroller(struct domain *d)
585 {
586 domain_pause(d);
587 if ( test_and_set_bool(d->is_paused_by_controller) )
588 domain_unpause(d);
589 }
591 void domain_unpause_by_systemcontroller(struct domain *d)
592 {
593 if ( test_and_clear_bool(d->is_paused_by_controller) )
594 domain_unpause(d);
595 }
597 int boot_vcpu(struct domain *d, int vcpuid, vcpu_guest_context_u ctxt)
598 {
599 struct vcpu *v = d->vcpu[vcpuid];
601 BUG_ON(v->is_initialised);
603 return arch_set_info_guest(v, ctxt);
604 }
606 int vcpu_reset(struct vcpu *v)
607 {
608 struct domain *d = v->domain;
609 int rc;
611 domain_pause(d);
612 LOCK_BIGLOCK(d);
614 rc = arch_vcpu_reset(v);
615 if ( rc != 0 )
616 goto out;
618 set_bit(_VPF_down, &v->pause_flags);
620 v->fpu_initialised = 0;
621 v->fpu_dirtied = 0;
622 v->is_polling = 0;
623 v->is_initialised = 0;
624 v->nmi_pending = 0;
625 v->nmi_masked = 0;
626 clear_bit(_VPF_blocked, &v->pause_flags);
628 out:
629 UNLOCK_BIGLOCK(v->domain);
630 domain_unpause(d);
632 return rc;
633 }
636 long do_vcpu_op(int cmd, int vcpuid, XEN_GUEST_HANDLE(void) arg)
637 {
638 struct domain *d = current->domain;
639 struct vcpu *v;
640 struct vcpu_guest_context *ctxt;
641 long rc = 0;
643 if ( (vcpuid < 0) || (vcpuid >= MAX_VIRT_CPUS) )
644 return -EINVAL;
646 if ( (v = d->vcpu[vcpuid]) == NULL )
647 return -ENOENT;
649 switch ( cmd )
650 {
651 case VCPUOP_initialise:
652 if ( (ctxt = xmalloc(struct vcpu_guest_context)) == NULL )
653 return -ENOMEM;
655 if ( copy_from_guest(ctxt, arg, 1) )
656 {
657 xfree(ctxt);
658 return -EFAULT;
659 }
661 LOCK_BIGLOCK(d);
662 rc = -EEXIST;
663 if ( !v->is_initialised )
664 rc = boot_vcpu(d, vcpuid, ctxt);
665 UNLOCK_BIGLOCK(d);
667 xfree(ctxt);
668 break;
670 case VCPUOP_up:
671 if ( !v->is_initialised )
672 return -EINVAL;
674 if ( test_and_clear_bit(_VPF_down, &v->pause_flags) )
675 vcpu_wake(v);
677 break;
679 case VCPUOP_down:
680 if ( !test_and_set_bit(_VPF_down, &v->pause_flags) )
681 vcpu_sleep_nosync(v);
682 break;
684 case VCPUOP_is_up:
685 rc = !test_bit(_VPF_down, &v->pause_flags);
686 break;
688 case VCPUOP_get_runstate_info:
689 {
690 struct vcpu_runstate_info runstate;
691 vcpu_runstate_get(v, &runstate);
692 if ( copy_to_guest(arg, &runstate, 1) )
693 rc = -EFAULT;
694 break;
695 }
697 case VCPUOP_set_periodic_timer:
698 {
699 struct vcpu_set_periodic_timer set;
701 if ( copy_from_guest(&set, arg, 1) )
702 return -EFAULT;
704 if ( set.period_ns < MILLISECS(1) )
705 return -EINVAL;
707 v->periodic_period = set.period_ns;
708 vcpu_force_reschedule(v);
710 break;
711 }
713 case VCPUOP_stop_periodic_timer:
714 v->periodic_period = 0;
715 vcpu_force_reschedule(v);
716 break;
718 case VCPUOP_set_singleshot_timer:
719 {
720 struct vcpu_set_singleshot_timer set;
722 if ( v != current )
723 return -EINVAL;
725 if ( copy_from_guest(&set, arg, 1) )
726 return -EFAULT;
728 if ( (set.flags & VCPU_SSHOTTMR_future) &&
729 (set.timeout_abs_ns < NOW()) )
730 return -ETIME;
732 if ( v->singleshot_timer.cpu != smp_processor_id() )
733 {
734 stop_timer(&v->singleshot_timer);
735 v->singleshot_timer.cpu = smp_processor_id();
736 }
738 set_timer(&v->singleshot_timer, set.timeout_abs_ns);
740 break;
741 }
743 case VCPUOP_stop_singleshot_timer:
744 if ( v != current )
745 return -EINVAL;
747 stop_timer(&v->singleshot_timer);
749 break;
751 case VCPUOP_send_nmi:
752 if ( !guest_handle_is_null(arg) )
753 return -EINVAL;
755 if ( !test_and_set_bool(v->nmi_pending) )
756 vcpu_kick(v);
758 break;
760 default:
761 rc = arch_do_vcpu_op(cmd, v, arg);
762 break;
763 }
765 return rc;
766 }
768 long vm_assist(struct domain *p, unsigned int cmd, unsigned int type)
769 {
770 if ( type > MAX_VMASST_TYPE )
771 return -EINVAL;
773 switch ( cmd )
774 {
775 case VMASST_CMD_enable:
776 set_bit(type, &p->vm_assist);
777 return 0;
778 case VMASST_CMD_disable:
779 clear_bit(type, &p->vm_assist);
780 return 0;
781 }
783 return -ENOSYS;
784 }
786 /*
787 * Local variables:
788 * mode: C
789 * c-set-style: "BSD"
790 * c-basic-offset: 4
791 * tab-width: 4
792 * indent-tabs-mode: nil
793 * End:
794 */