ia64/xen-unstable

view xen/common/domain.c @ 14635:5c52e5ca8459

hvm: Clean up handling of exception intercepts.
Only intercept #DB/#BP if a debugger is attached.
Signed-off-by: Keir Fraser <keir@xensource.com>
author Keir Fraser <keir@xensource.com>
date Wed Mar 28 18:47:17 2007 +0100 (2007-03-28)
parents ba9d3fd4ee4b
children 4b13fc910acf
line source
1 /******************************************************************************
2 * domain.c
3 *
4 * Generic domain-handling functions.
5 */
7 #include <xen/config.h>
8 #include <xen/compat.h>
9 #include <xen/init.h>
10 #include <xen/lib.h>
11 #include <xen/errno.h>
12 #include <xen/sched.h>
13 #include <xen/domain.h>
14 #include <xen/mm.h>
15 #include <xen/event.h>
16 #include <xen/time.h>
17 #include <xen/console.h>
18 #include <xen/softirq.h>
19 #include <xen/domain_page.h>
20 #include <xen/rangeset.h>
21 #include <xen/guest_access.h>
22 #include <xen/hypercall.h>
23 #include <xen/delay.h>
24 #include <xen/shutdown.h>
25 #include <xen/percpu.h>
26 #include <xen/multicall.h>
27 #include <xen/rcupdate.h>
28 #include <asm/debugger.h>
29 #include <public/sched.h>
30 #include <public/vcpu.h>
32 /* Protect updates/reads (resp.) of domain_list and domain_hash. */
33 DEFINE_SPINLOCK(domlist_update_lock);
34 DEFINE_RCU_READ_LOCK(domlist_read_lock);
36 #define DOMAIN_HASH_SIZE 256
37 #define DOMAIN_HASH(_id) ((int)(_id)&(DOMAIN_HASH_SIZE-1))
38 static struct domain *domain_hash[DOMAIN_HASH_SIZE];
39 struct domain *domain_list;
41 struct domain *dom0;
43 struct vcpu *idle_vcpu[NR_CPUS] __read_mostly;
45 int current_domain_id(void)
46 {
47 return current->domain->domain_id;
48 }
50 struct domain *alloc_domain(domid_t domid)
51 {
52 struct domain *d;
54 if ( (d = xmalloc(struct domain)) == NULL )
55 return NULL;
57 memset(d, 0, sizeof(*d));
58 d->domain_id = domid;
59 atomic_set(&d->refcnt, 1);
60 spin_lock_init(&d->big_lock);
61 spin_lock_init(&d->page_alloc_lock);
62 spin_lock_init(&d->pause_lock);
63 INIT_LIST_HEAD(&d->page_list);
64 INIT_LIST_HEAD(&d->xenpage_list);
66 return d;
67 }
69 void free_domain(struct domain *d)
70 {
71 struct vcpu *v;
72 int i;
74 for ( i = MAX_VIRT_CPUS-1; i >= 0; i-- )
75 {
76 if ( (v = d->vcpu[i]) == NULL )
77 continue;
78 vcpu_destroy(v);
79 sched_destroy_vcpu(v);
80 free_vcpu_struct(v);
81 }
83 sched_destroy_domain(d);
84 xfree(d);
85 }
87 struct vcpu *alloc_vcpu(
88 struct domain *d, unsigned int vcpu_id, unsigned int cpu_id)
89 {
90 struct vcpu *v;
92 BUG_ON(d->vcpu[vcpu_id] != NULL);
94 if ( (v = alloc_vcpu_struct()) == NULL )
95 return NULL;
97 v->domain = d;
98 v->vcpu_id = vcpu_id;
99 spin_lock_init(&v->pause_lock);
101 v->runstate.state = is_idle_vcpu(v) ? RUNSTATE_running : RUNSTATE_offline;
102 v->runstate.state_entry_time = NOW();
104 if ( !is_idle_domain(d) )
105 {
106 set_bit(_VCPUF_down, &v->vcpu_flags);
107 v->vcpu_info = shared_info_addr(d, vcpu_info[vcpu_id]);
108 }
110 if ( sched_init_vcpu(v, cpu_id) != 0 )
111 {
112 free_vcpu_struct(v);
113 return NULL;
114 }
116 if ( vcpu_initialise(v) != 0 )
117 {
118 sched_destroy_vcpu(v);
119 free_vcpu_struct(v);
120 return NULL;
121 }
123 d->vcpu[vcpu_id] = v;
124 if ( vcpu_id != 0 )
125 d->vcpu[v->vcpu_id-1]->next_in_list = v;
127 return v;
128 }
130 struct vcpu *alloc_idle_vcpu(unsigned int cpu_id)
131 {
132 struct domain *d;
133 struct vcpu *v;
134 unsigned int vcpu_id = cpu_id % MAX_VIRT_CPUS;
136 if ( (v = idle_vcpu[cpu_id]) != NULL )
137 return v;
139 d = (vcpu_id == 0) ?
140 domain_create(IDLE_DOMAIN_ID, 0) :
141 idle_vcpu[cpu_id - vcpu_id]->domain;
142 BUG_ON(d == NULL);
144 v = alloc_vcpu(d, vcpu_id, cpu_id);
145 idle_vcpu[cpu_id] = v;
147 return v;
148 }
150 struct domain *domain_create(domid_t domid, unsigned int domcr_flags)
151 {
152 struct domain *d, **pd;
154 if ( (d = alloc_domain(domid)) == NULL )
155 return NULL;
157 if ( domcr_flags & DOMCRF_hvm )
158 d->is_hvm = 1;
160 rangeset_domain_initialise(d);
162 if ( !is_idle_domain(d) )
163 {
164 set_bit(_DOMF_ctrl_pause, &d->domain_flags);
165 if ( evtchn_init(d) != 0 )
166 goto fail1;
167 if ( grant_table_create(d) != 0 )
168 goto fail2;
169 }
171 if ( arch_domain_create(d) != 0 )
172 goto fail3;
174 d->iomem_caps = rangeset_new(d, "I/O Memory", RANGESETF_prettyprint_hex);
175 d->irq_caps = rangeset_new(d, "Interrupts", 0);
176 if ( (d->iomem_caps == NULL) || (d->irq_caps == NULL) )
177 goto fail4;
179 if ( sched_init_domain(d) != 0 )
180 goto fail4;
182 if ( !is_idle_domain(d) )
183 {
184 spin_lock(&domlist_update_lock);
185 pd = &domain_list; /* NB. domain_list maintained in order of domid. */
186 for ( pd = &domain_list; *pd != NULL; pd = &(*pd)->next_in_list )
187 if ( (*pd)->domain_id > d->domain_id )
188 break;
189 d->next_in_list = *pd;
190 d->next_in_hashbucket = domain_hash[DOMAIN_HASH(domid)];
191 /* Two rcu assignments are not atomic
192 * Readers may see inconsistent domlist and hash table
193 * That is OK as long as each RCU reader-side critical section uses
194 * only one or them */
195 rcu_assign_pointer(*pd, d);
196 rcu_assign_pointer(domain_hash[DOMAIN_HASH(domid)], d);
197 spin_unlock(&domlist_update_lock);
198 }
200 return d;
202 fail4:
203 arch_domain_destroy(d);
204 fail3:
205 if ( !is_idle_domain(d) )
206 grant_table_destroy(d);
207 fail2:
208 if ( !is_idle_domain(d) )
209 evtchn_destroy(d);
210 fail1:
211 rangeset_domain_destroy(d);
212 free_domain(d);
213 return NULL;
214 }
217 struct domain *get_domain_by_id(domid_t dom)
218 {
219 struct domain *d;
221 rcu_read_lock(&domlist_read_lock);
223 for ( d = rcu_dereference(domain_hash[DOMAIN_HASH(dom)]);
224 d != NULL;
225 d = rcu_dereference(d->next_in_hashbucket) )
226 {
227 if ( d->domain_id == dom )
228 {
229 if ( unlikely(!get_domain(d)) )
230 d = NULL;
231 break;
232 }
233 }
235 rcu_read_unlock(&domlist_read_lock);
237 return d;
238 }
241 struct domain *rcu_lock_domain_by_id(domid_t dom)
242 {
243 struct domain *d;
245 rcu_read_lock(&domlist_read_lock);
247 for ( d = rcu_dereference(domain_hash[DOMAIN_HASH(dom)]);
248 d != NULL;
249 d = rcu_dereference(d->next_in_hashbucket) )
250 {
251 if ( d->domain_id == dom )
252 return d;
253 }
255 rcu_read_unlock(&domlist_read_lock);
257 return NULL;
258 }
261 void domain_kill(struct domain *d)
262 {
263 domain_pause(d);
265 if ( test_and_set_bit(_DOMF_dying, &d->domain_flags) )
266 return;
268 gnttab_release_mappings(d);
269 domain_relinquish_resources(d);
270 put_domain(d);
272 /* Kick page scrubbing after domain_relinquish_resources(). */
273 page_scrub_kick();
275 send_guest_global_virq(dom0, VIRQ_DOM_EXC);
276 }
279 void __domain_crash(struct domain *d)
280 {
281 if ( test_bit(_DOMF_shutdown, &d->domain_flags) )
282 {
283 /* Print nothing: the domain is already shutting down. */
284 }
285 else if ( d == current->domain )
286 {
287 printk("Domain %d (vcpu#%d) crashed on cpu#%d:\n",
288 d->domain_id, current->vcpu_id, smp_processor_id());
289 show_execution_state(guest_cpu_user_regs());
290 }
291 else
292 {
293 printk("Domain %d reported crashed by domain %d on cpu#%d:\n",
294 d->domain_id, current->domain->domain_id, smp_processor_id());
295 }
297 domain_shutdown(d, SHUTDOWN_crash);
298 }
301 void __domain_crash_synchronous(void)
302 {
303 __domain_crash(current->domain);
305 /*
306 * Flush multicall state before dying if a multicall is in progress.
307 * This shouldn't be necessary, but some architectures are calling
308 * domain_crash_synchronous() when they really shouldn't (i.e., from
309 * within hypercall context).
310 */
311 if ( this_cpu(mc_state).flags != 0 )
312 {
313 dprintk(XENLOG_ERR,
314 "FIXME: synchronous domain crash during a multicall!\n");
315 this_cpu(mc_state).flags = 0;
316 }
318 for ( ; ; )
319 do_softirq();
320 }
323 void domain_shutdown(struct domain *d, u8 reason)
324 {
325 struct vcpu *v;
327 if ( d->domain_id == 0 )
328 dom0_shutdown(reason);
330 if ( !test_and_set_bit(_DOMF_shutdown, &d->domain_flags) )
331 d->shutdown_code = reason;
333 for_each_vcpu ( d, v )
334 vcpu_sleep_nosync(v);
336 send_guest_global_virq(dom0, VIRQ_DOM_EXC);
337 }
339 void domain_pause_for_debugger(void)
340 {
341 struct domain *d = current->domain;
342 struct vcpu *v;
344 set_bit(_DOMF_ctrl_pause, &d->domain_flags);
346 for_each_vcpu ( d, v )
347 vcpu_sleep_nosync(v);
349 send_guest_global_virq(dom0, VIRQ_DEBUGGER);
350 }
352 __attribute__ ((weak)) void domain_debug_state_changed(struct domain *d) { }
354 /* Complete domain destroy after RCU readers are not holding old references. */
355 static void complete_domain_destroy(struct rcu_head *head)
356 {
357 struct domain *d = container_of(head, struct domain, rcu);
359 rangeset_domain_destroy(d);
361 evtchn_destroy(d);
362 grant_table_destroy(d);
364 arch_domain_destroy(d);
366 free_domain(d);
368 send_guest_global_virq(dom0, VIRQ_DOM_EXC);
369 }
371 /* Release resources belonging to task @p. */
372 void domain_destroy(struct domain *d)
373 {
374 struct domain **pd;
375 atomic_t old, new;
377 BUG_ON(!test_bit(_DOMF_dying, &d->domain_flags));
379 /* May be already destroyed, or get_domain() can race us. */
380 _atomic_set(old, 0);
381 _atomic_set(new, DOMAIN_DESTROYED);
382 old = atomic_compareandswap(old, new, &d->refcnt);
383 if ( _atomic_read(old) != 0 )
384 return;
386 /* Delete from task list and task hashtable. */
387 spin_lock(&domlist_update_lock);
388 pd = &domain_list;
389 while ( *pd != d )
390 pd = &(*pd)->next_in_list;
391 rcu_assign_pointer(*pd, d->next_in_list);
392 pd = &domain_hash[DOMAIN_HASH(d->domain_id)];
393 while ( *pd != d )
394 pd = &(*pd)->next_in_hashbucket;
395 rcu_assign_pointer(*pd, d->next_in_hashbucket);
396 spin_unlock(&domlist_update_lock);
398 /* Schedule RCU asynchronous completion of domain destroy. */
399 call_rcu(&d->rcu, complete_domain_destroy);
400 }
402 static void vcpu_pause_setup(struct vcpu *v)
403 {
404 spin_lock(&v->pause_lock);
405 if ( v->pause_count++ == 0 )
406 set_bit(_VCPUF_paused, &v->vcpu_flags);
407 spin_unlock(&v->pause_lock);
408 }
410 void vcpu_pause(struct vcpu *v)
411 {
412 ASSERT(v != current);
413 vcpu_pause_setup(v);
414 vcpu_sleep_sync(v);
415 }
417 void vcpu_pause_nosync(struct vcpu *v)
418 {
419 vcpu_pause_setup(v);
420 vcpu_sleep_nosync(v);
421 }
423 void vcpu_unpause(struct vcpu *v)
424 {
425 int wake;
427 ASSERT(v != current);
429 spin_lock(&v->pause_lock);
430 wake = (--v->pause_count == 0);
431 if ( wake )
432 clear_bit(_VCPUF_paused, &v->vcpu_flags);
433 spin_unlock(&v->pause_lock);
435 if ( wake )
436 vcpu_wake(v);
437 }
439 void domain_pause(struct domain *d)
440 {
441 struct vcpu *v;
443 ASSERT(d != current->domain);
445 spin_lock(&d->pause_lock);
446 if ( d->pause_count++ == 0 )
447 set_bit(_DOMF_paused, &d->domain_flags);
448 spin_unlock(&d->pause_lock);
450 for_each_vcpu( d, v )
451 vcpu_sleep_sync(v);
452 }
454 void domain_unpause(struct domain *d)
455 {
456 struct vcpu *v;
457 int wake;
459 ASSERT(d != current->domain);
461 spin_lock(&d->pause_lock);
462 wake = (--d->pause_count == 0);
463 if ( wake )
464 clear_bit(_DOMF_paused, &d->domain_flags);
465 spin_unlock(&d->pause_lock);
467 if ( wake )
468 for_each_vcpu( d, v )
469 vcpu_wake(v);
470 }
472 void domain_pause_by_systemcontroller(struct domain *d)
473 {
474 struct vcpu *v;
476 BUG_ON(current->domain == d);
478 if ( !test_and_set_bit(_DOMF_ctrl_pause, &d->domain_flags) )
479 {
480 for_each_vcpu ( d, v )
481 vcpu_sleep_sync(v);
482 }
483 }
485 void domain_unpause_by_systemcontroller(struct domain *d)
486 {
487 struct vcpu *v;
489 if ( test_and_clear_bit(_DOMF_ctrl_pause, &d->domain_flags) )
490 {
491 for_each_vcpu ( d, v )
492 vcpu_wake(v);
493 }
494 }
496 int boot_vcpu(struct domain *d, int vcpuid, vcpu_guest_context_u ctxt)
497 {
498 struct vcpu *v = d->vcpu[vcpuid];
500 BUG_ON(test_bit(_VCPUF_initialised, &v->vcpu_flags));
502 return arch_set_info_guest(v, ctxt);
503 }
505 int vcpu_reset(struct vcpu *v)
506 {
507 struct domain *d = v->domain;
508 int rc;
510 domain_pause(d);
511 LOCK_BIGLOCK(d);
513 rc = arch_vcpu_reset(v);
514 if ( rc != 0 )
515 goto out;
517 set_bit(_VCPUF_down, &v->vcpu_flags);
519 clear_bit(_VCPUF_fpu_initialised, &v->vcpu_flags);
520 clear_bit(_VCPUF_fpu_dirtied, &v->vcpu_flags);
521 clear_bit(_VCPUF_blocked, &v->vcpu_flags);
522 clear_bit(_VCPUF_initialised, &v->vcpu_flags);
523 clear_bit(_VCPUF_nmi_pending, &v->vcpu_flags);
524 clear_bit(_VCPUF_nmi_masked, &v->vcpu_flags);
525 clear_bit(_VCPUF_polling, &v->vcpu_flags);
527 out:
528 UNLOCK_BIGLOCK(v->domain);
529 domain_unpause(d);
531 return rc;
532 }
535 long do_vcpu_op(int cmd, int vcpuid, XEN_GUEST_HANDLE(void) arg)
536 {
537 struct domain *d = current->domain;
538 struct vcpu *v;
539 struct vcpu_guest_context *ctxt;
540 long rc = 0;
542 if ( (vcpuid < 0) || (vcpuid >= MAX_VIRT_CPUS) )
543 return -EINVAL;
545 if ( (v = d->vcpu[vcpuid]) == NULL )
546 return -ENOENT;
548 switch ( cmd )
549 {
550 case VCPUOP_initialise:
551 if ( (ctxt = xmalloc(struct vcpu_guest_context)) == NULL )
552 return -ENOMEM;
554 if ( copy_from_guest(ctxt, arg, 1) )
555 {
556 xfree(ctxt);
557 return -EFAULT;
558 }
560 LOCK_BIGLOCK(d);
561 rc = -EEXIST;
562 if ( !test_bit(_VCPUF_initialised, &v->vcpu_flags) )
563 rc = boot_vcpu(d, vcpuid, ctxt);
564 UNLOCK_BIGLOCK(d);
566 xfree(ctxt);
567 break;
569 case VCPUOP_up:
570 if ( !test_bit(_VCPUF_initialised, &v->vcpu_flags) )
571 return -EINVAL;
573 if ( test_and_clear_bit(_VCPUF_down, &v->vcpu_flags) )
574 vcpu_wake(v);
576 break;
578 case VCPUOP_down:
579 if ( !test_and_set_bit(_VCPUF_down, &v->vcpu_flags) )
580 vcpu_sleep_nosync(v);
581 break;
583 case VCPUOP_is_up:
584 rc = !test_bit(_VCPUF_down, &v->vcpu_flags);
585 break;
587 case VCPUOP_get_runstate_info:
588 {
589 struct vcpu_runstate_info runstate;
590 vcpu_runstate_get(v, &runstate);
591 if ( copy_to_guest(arg, &runstate, 1) )
592 rc = -EFAULT;
593 break;
594 }
596 case VCPUOP_set_periodic_timer:
597 {
598 struct vcpu_set_periodic_timer set;
600 if ( copy_from_guest(&set, arg, 1) )
601 return -EFAULT;
603 if ( set.period_ns < MILLISECS(1) )
604 return -EINVAL;
606 v->periodic_period = set.period_ns;
607 vcpu_force_reschedule(v);
609 break;
610 }
612 case VCPUOP_stop_periodic_timer:
613 {
614 v->periodic_period = 0;
615 vcpu_force_reschedule(v);
616 break;
617 }
619 case VCPUOP_set_singleshot_timer:
620 {
621 struct vcpu_set_singleshot_timer set;
623 if ( v != current )
624 return -EINVAL;
626 if ( copy_from_guest(&set, arg, 1) )
627 return -EFAULT;
629 if ( (set.flags & VCPU_SSHOTTMR_future) &&
630 (set.timeout_abs_ns < NOW()) )
631 return -ETIME;
633 if ( v->singleshot_timer.cpu != smp_processor_id() )
634 {
635 stop_timer(&v->singleshot_timer);
636 v->singleshot_timer.cpu = smp_processor_id();
637 }
639 set_timer(&v->singleshot_timer, set.timeout_abs_ns);
641 break;
642 }
644 case VCPUOP_stop_singleshot_timer:
645 {
646 if ( v != current )
647 return -EINVAL;
649 stop_timer(&v->singleshot_timer);
650 break;
651 }
653 default:
654 rc = arch_do_vcpu_op(cmd, v, arg);
655 break;
656 }
658 return rc;
659 }
661 long vm_assist(struct domain *p, unsigned int cmd, unsigned int type)
662 {
663 if ( type > MAX_VMASST_TYPE )
664 return -EINVAL;
666 switch ( cmd )
667 {
668 case VMASST_CMD_enable:
669 set_bit(type, &p->vm_assist);
670 return 0;
671 case VMASST_CMD_disable:
672 clear_bit(type, &p->vm_assist);
673 return 0;
674 }
676 return -ENOSYS;
677 }
679 /*
680 * Local variables:
681 * mode: C
682 * c-set-style: "BSD"
683 * c-basic-offset: 4
684 * tab-width: 4
685 * indent-tabs-mode: nil
686 * End:
687 */