direct-io.hg

view xen/common/domain.c @ 14350:f3f5f2756d75

x86: Add VGCF_onlien flag to vcpu_guest_context.
Change common Xen code to start all VCPUs (except idle ones)
offline. Change arch code to deal with this.
Signed-off-by: Keir Fraser <keir@xensource.com>
author kfraser@localhost.localdomain
date Mon Mar 12 13:53:43 2007 +0000 (2007-03-12)
parents 7f624c770dbd
children ba9d3fd4ee4b
line source
1 /******************************************************************************
2 * domain.c
3 *
4 * Generic domain-handling functions.
5 */
7 #include <xen/config.h>
8 #include <xen/compat.h>
9 #include <xen/init.h>
10 #include <xen/lib.h>
11 #include <xen/errno.h>
12 #include <xen/sched.h>
13 #include <xen/domain.h>
14 #include <xen/mm.h>
15 #include <xen/event.h>
16 #include <xen/time.h>
17 #include <xen/console.h>
18 #include <xen/softirq.h>
19 #include <xen/domain_page.h>
20 #include <xen/rangeset.h>
21 #include <xen/guest_access.h>
22 #include <xen/hypercall.h>
23 #include <xen/delay.h>
24 #include <xen/shutdown.h>
25 #include <xen/percpu.h>
26 #include <xen/multicall.h>
27 #include <xen/rcupdate.h>
28 #include <asm/debugger.h>
29 #include <public/sched.h>
30 #include <public/vcpu.h>
32 /* Protect updates/reads (resp.) of domain_list and domain_hash. */
33 DEFINE_SPINLOCK(domlist_update_lock);
34 DEFINE_RCU_READ_LOCK(domlist_read_lock);
36 #define DOMAIN_HASH_SIZE 256
37 #define DOMAIN_HASH(_id) ((int)(_id)&(DOMAIN_HASH_SIZE-1))
38 static struct domain *domain_hash[DOMAIN_HASH_SIZE];
39 struct domain *domain_list;
41 struct domain *dom0;
43 struct vcpu *idle_vcpu[NR_CPUS] __read_mostly;
45 int current_domain_id(void)
46 {
47 return current->domain->domain_id;
48 }
50 struct domain *alloc_domain(domid_t domid)
51 {
52 struct domain *d;
54 if ( (d = xmalloc(struct domain)) == NULL )
55 return NULL;
57 memset(d, 0, sizeof(*d));
58 d->domain_id = domid;
59 atomic_set(&d->refcnt, 1);
60 spin_lock_init(&d->big_lock);
61 spin_lock_init(&d->page_alloc_lock);
62 spin_lock_init(&d->pause_lock);
63 INIT_LIST_HEAD(&d->page_list);
64 INIT_LIST_HEAD(&d->xenpage_list);
66 return d;
67 }
69 void free_domain(struct domain *d)
70 {
71 struct vcpu *v;
72 int i;
74 for ( i = MAX_VIRT_CPUS-1; i >= 0; i-- )
75 {
76 if ( (v = d->vcpu[i]) == NULL )
77 continue;
78 vcpu_destroy(v);
79 sched_destroy_vcpu(v);
80 free_vcpu_struct(v);
81 }
83 sched_destroy_domain(d);
84 xfree(d);
85 }
87 struct vcpu *alloc_vcpu(
88 struct domain *d, unsigned int vcpu_id, unsigned int cpu_id)
89 {
90 struct vcpu *v;
92 BUG_ON(d->vcpu[vcpu_id] != NULL);
94 if ( (v = alloc_vcpu_struct()) == NULL )
95 return NULL;
97 v->domain = d;
98 v->vcpu_id = vcpu_id;
99 v->vcpu_info = shared_info_addr(d, vcpu_info[vcpu_id]);
100 spin_lock_init(&v->pause_lock);
102 v->runstate.state = is_idle_vcpu(v) ? RUNSTATE_running : RUNSTATE_offline;
103 v->runstate.state_entry_time = NOW();
105 if ( !is_idle_domain(d) )
106 set_bit(_VCPUF_down, &v->vcpu_flags);
108 if ( sched_init_vcpu(v, cpu_id) != 0 )
109 {
110 free_vcpu_struct(v);
111 return NULL;
112 }
114 if ( vcpu_initialise(v) != 0 )
115 {
116 sched_destroy_vcpu(v);
117 free_vcpu_struct(v);
118 return NULL;
119 }
121 d->vcpu[vcpu_id] = v;
122 if ( vcpu_id != 0 )
123 d->vcpu[v->vcpu_id-1]->next_in_list = v;
125 return v;
126 }
128 struct vcpu *alloc_idle_vcpu(unsigned int cpu_id)
129 {
130 struct domain *d;
131 struct vcpu *v;
132 unsigned int vcpu_id = cpu_id % MAX_VIRT_CPUS;
134 if ( (v = idle_vcpu[cpu_id]) != NULL )
135 return v;
137 d = (vcpu_id == 0) ?
138 domain_create(IDLE_DOMAIN_ID, 0) :
139 idle_vcpu[cpu_id - vcpu_id]->domain;
140 BUG_ON(d == NULL);
142 v = alloc_vcpu(d, vcpu_id, cpu_id);
143 idle_vcpu[cpu_id] = v;
145 return v;
146 }
148 struct domain *domain_create(domid_t domid, unsigned int domcr_flags)
149 {
150 struct domain *d, **pd;
152 if ( (d = alloc_domain(domid)) == NULL )
153 return NULL;
155 if ( domcr_flags & DOMCRF_hvm )
156 d->is_hvm = 1;
158 rangeset_domain_initialise(d);
160 if ( !is_idle_domain(d) )
161 {
162 set_bit(_DOMF_ctrl_pause, &d->domain_flags);
163 if ( evtchn_init(d) != 0 )
164 goto fail1;
165 if ( grant_table_create(d) != 0 )
166 goto fail2;
167 }
169 if ( arch_domain_create(d) != 0 )
170 goto fail3;
172 d->iomem_caps = rangeset_new(d, "I/O Memory", RANGESETF_prettyprint_hex);
173 d->irq_caps = rangeset_new(d, "Interrupts", 0);
174 if ( (d->iomem_caps == NULL) || (d->irq_caps == NULL) )
175 goto fail4;
177 if ( sched_init_domain(d) != 0 )
178 goto fail4;
180 if ( !is_idle_domain(d) )
181 {
182 spin_lock(&domlist_update_lock);
183 pd = &domain_list; /* NB. domain_list maintained in order of domid. */
184 for ( pd = &domain_list; *pd != NULL; pd = &(*pd)->next_in_list )
185 if ( (*pd)->domain_id > d->domain_id )
186 break;
187 d->next_in_list = *pd;
188 d->next_in_hashbucket = domain_hash[DOMAIN_HASH(domid)];
189 /* Two rcu assignments are not atomic
190 * Readers may see inconsistent domlist and hash table
191 * That is OK as long as each RCU reader-side critical section uses
192 * only one or them */
193 rcu_assign_pointer(*pd, d);
194 rcu_assign_pointer(domain_hash[DOMAIN_HASH(domid)], d);
195 spin_unlock(&domlist_update_lock);
196 }
198 return d;
200 fail4:
201 arch_domain_destroy(d);
202 fail3:
203 if ( !is_idle_domain(d) )
204 grant_table_destroy(d);
205 fail2:
206 if ( !is_idle_domain(d) )
207 evtchn_destroy(d);
208 fail1:
209 rangeset_domain_destroy(d);
210 free_domain(d);
211 return NULL;
212 }
215 struct domain *get_domain_by_id(domid_t dom)
216 {
217 struct domain *d;
219 rcu_read_lock(&domlist_read_lock);
221 for ( d = rcu_dereference(domain_hash[DOMAIN_HASH(dom)]);
222 d != NULL;
223 d = rcu_dereference(d->next_in_hashbucket) )
224 {
225 if ( d->domain_id == dom )
226 {
227 if ( unlikely(!get_domain(d)) )
228 d = NULL;
229 break;
230 }
231 }
233 rcu_read_unlock(&domlist_read_lock);
235 return d;
236 }
239 struct domain *rcu_lock_domain_by_id(domid_t dom)
240 {
241 struct domain *d;
243 rcu_read_lock(&domlist_read_lock);
245 for ( d = rcu_dereference(domain_hash[DOMAIN_HASH(dom)]);
246 d != NULL;
247 d = rcu_dereference(d->next_in_hashbucket) )
248 {
249 if ( d->domain_id == dom )
250 return d;
251 }
253 rcu_read_unlock(&domlist_read_lock);
255 return NULL;
256 }
259 void domain_kill(struct domain *d)
260 {
261 domain_pause(d);
263 if ( test_and_set_bit(_DOMF_dying, &d->domain_flags) )
264 return;
266 gnttab_release_mappings(d);
267 domain_relinquish_resources(d);
268 put_domain(d);
270 /* Kick page scrubbing after domain_relinquish_resources(). */
271 page_scrub_kick();
273 send_guest_global_virq(dom0, VIRQ_DOM_EXC);
274 }
277 void __domain_crash(struct domain *d)
278 {
279 if ( test_bit(_DOMF_shutdown, &d->domain_flags) )
280 {
281 /* Print nothing: the domain is already shutting down. */
282 }
283 else if ( d == current->domain )
284 {
285 printk("Domain %d (vcpu#%d) crashed on cpu#%d:\n",
286 d->domain_id, current->vcpu_id, smp_processor_id());
287 show_execution_state(guest_cpu_user_regs());
288 }
289 else
290 {
291 printk("Domain %d reported crashed by domain %d on cpu#%d:\n",
292 d->domain_id, current->domain->domain_id, smp_processor_id());
293 }
295 domain_shutdown(d, SHUTDOWN_crash);
296 }
299 void __domain_crash_synchronous(void)
300 {
301 __domain_crash(current->domain);
303 /*
304 * Flush multicall state before dying if a multicall is in progress.
305 * This shouldn't be necessary, but some architectures are calling
306 * domain_crash_synchronous() when they really shouldn't (i.e., from
307 * within hypercall context).
308 */
309 if ( this_cpu(mc_state).flags != 0 )
310 {
311 dprintk(XENLOG_ERR,
312 "FIXME: synchronous domain crash during a multicall!\n");
313 this_cpu(mc_state).flags = 0;
314 }
316 for ( ; ; )
317 do_softirq();
318 }
321 void domain_shutdown(struct domain *d, u8 reason)
322 {
323 struct vcpu *v;
325 if ( d->domain_id == 0 )
326 dom0_shutdown(reason);
328 if ( !test_and_set_bit(_DOMF_shutdown, &d->domain_flags) )
329 d->shutdown_code = reason;
331 for_each_vcpu ( d, v )
332 vcpu_sleep_nosync(v);
334 send_guest_global_virq(dom0, VIRQ_DOM_EXC);
335 }
338 void domain_pause_for_debugger(void)
339 {
340 struct domain *d = current->domain;
341 struct vcpu *v;
343 set_bit(_DOMF_ctrl_pause, &d->domain_flags);
345 for_each_vcpu ( d, v )
346 vcpu_sleep_nosync(v);
348 send_guest_global_virq(dom0, VIRQ_DEBUGGER);
349 }
351 /* Complete domain destroy after RCU readers are not holding old references. */
352 static void complete_domain_destroy(struct rcu_head *head)
353 {
354 struct domain *d = container_of(head, struct domain, rcu);
356 rangeset_domain_destroy(d);
358 evtchn_destroy(d);
359 grant_table_destroy(d);
361 arch_domain_destroy(d);
363 free_domain(d);
365 send_guest_global_virq(dom0, VIRQ_DOM_EXC);
366 }
368 /* Release resources belonging to task @p. */
369 void domain_destroy(struct domain *d)
370 {
371 struct domain **pd;
372 atomic_t old, new;
374 BUG_ON(!test_bit(_DOMF_dying, &d->domain_flags));
376 /* May be already destroyed, or get_domain() can race us. */
377 _atomic_set(old, 0);
378 _atomic_set(new, DOMAIN_DESTROYED);
379 old = atomic_compareandswap(old, new, &d->refcnt);
380 if ( _atomic_read(old) != 0 )
381 return;
383 /* Delete from task list and task hashtable. */
384 spin_lock(&domlist_update_lock);
385 pd = &domain_list;
386 while ( *pd != d )
387 pd = &(*pd)->next_in_list;
388 rcu_assign_pointer(*pd, d->next_in_list);
389 pd = &domain_hash[DOMAIN_HASH(d->domain_id)];
390 while ( *pd != d )
391 pd = &(*pd)->next_in_hashbucket;
392 rcu_assign_pointer(*pd, d->next_in_hashbucket);
393 spin_unlock(&domlist_update_lock);
395 /* Schedule RCU asynchronous completion of domain destroy. */
396 call_rcu(&d->rcu, complete_domain_destroy);
397 }
399 static void vcpu_pause_setup(struct vcpu *v)
400 {
401 spin_lock(&v->pause_lock);
402 if ( v->pause_count++ == 0 )
403 set_bit(_VCPUF_paused, &v->vcpu_flags);
404 spin_unlock(&v->pause_lock);
405 }
407 void vcpu_pause(struct vcpu *v)
408 {
409 ASSERT(v != current);
410 vcpu_pause_setup(v);
411 vcpu_sleep_sync(v);
412 }
414 void vcpu_pause_nosync(struct vcpu *v)
415 {
416 vcpu_pause_setup(v);
417 vcpu_sleep_nosync(v);
418 }
420 void vcpu_unpause(struct vcpu *v)
421 {
422 int wake;
424 ASSERT(v != current);
426 spin_lock(&v->pause_lock);
427 wake = (--v->pause_count == 0);
428 if ( wake )
429 clear_bit(_VCPUF_paused, &v->vcpu_flags);
430 spin_unlock(&v->pause_lock);
432 if ( wake )
433 vcpu_wake(v);
434 }
436 void domain_pause(struct domain *d)
437 {
438 struct vcpu *v;
440 ASSERT(d != current->domain);
442 spin_lock(&d->pause_lock);
443 if ( d->pause_count++ == 0 )
444 set_bit(_DOMF_paused, &d->domain_flags);
445 spin_unlock(&d->pause_lock);
447 for_each_vcpu( d, v )
448 vcpu_sleep_sync(v);
449 }
451 void domain_unpause(struct domain *d)
452 {
453 struct vcpu *v;
454 int wake;
456 ASSERT(d != current->domain);
458 spin_lock(&d->pause_lock);
459 wake = (--d->pause_count == 0);
460 if ( wake )
461 clear_bit(_DOMF_paused, &d->domain_flags);
462 spin_unlock(&d->pause_lock);
464 if ( wake )
465 for_each_vcpu( d, v )
466 vcpu_wake(v);
467 }
469 void domain_pause_by_systemcontroller(struct domain *d)
470 {
471 struct vcpu *v;
473 BUG_ON(current->domain == d);
475 if ( !test_and_set_bit(_DOMF_ctrl_pause, &d->domain_flags) )
476 {
477 for_each_vcpu ( d, v )
478 vcpu_sleep_sync(v);
479 }
480 }
482 void domain_unpause_by_systemcontroller(struct domain *d)
483 {
484 struct vcpu *v;
486 if ( test_and_clear_bit(_DOMF_ctrl_pause, &d->domain_flags) )
487 {
488 for_each_vcpu ( d, v )
489 vcpu_wake(v);
490 }
491 }
493 int boot_vcpu(struct domain *d, int vcpuid, vcpu_guest_context_u ctxt)
494 {
495 struct vcpu *v = d->vcpu[vcpuid];
497 BUG_ON(test_bit(_VCPUF_initialised, &v->vcpu_flags));
499 return arch_set_info_guest(v, ctxt);
500 }
502 int vcpu_reset(struct vcpu *v)
503 {
504 struct domain *d = v->domain;
505 int rc;
507 domain_pause(d);
508 LOCK_BIGLOCK(d);
510 rc = arch_vcpu_reset(v);
511 if ( rc != 0 )
512 goto out;
514 set_bit(_VCPUF_down, &v->vcpu_flags);
516 clear_bit(_VCPUF_fpu_initialised, &v->vcpu_flags);
517 clear_bit(_VCPUF_fpu_dirtied, &v->vcpu_flags);
518 clear_bit(_VCPUF_blocked, &v->vcpu_flags);
519 clear_bit(_VCPUF_initialised, &v->vcpu_flags);
520 clear_bit(_VCPUF_nmi_pending, &v->vcpu_flags);
521 clear_bit(_VCPUF_nmi_masked, &v->vcpu_flags);
522 clear_bit(_VCPUF_polling, &v->vcpu_flags);
524 out:
525 UNLOCK_BIGLOCK(v->domain);
526 domain_unpause(d);
528 return rc;
529 }
532 long do_vcpu_op(int cmd, int vcpuid, XEN_GUEST_HANDLE(void) arg)
533 {
534 struct domain *d = current->domain;
535 struct vcpu *v;
536 struct vcpu_guest_context *ctxt;
537 long rc = 0;
539 if ( (vcpuid < 0) || (vcpuid >= MAX_VIRT_CPUS) )
540 return -EINVAL;
542 if ( (v = d->vcpu[vcpuid]) == NULL )
543 return -ENOENT;
545 switch ( cmd )
546 {
547 case VCPUOP_initialise:
548 if ( (ctxt = xmalloc(struct vcpu_guest_context)) == NULL )
549 return -ENOMEM;
551 if ( copy_from_guest(ctxt, arg, 1) )
552 {
553 xfree(ctxt);
554 return -EFAULT;
555 }
557 LOCK_BIGLOCK(d);
558 rc = -EEXIST;
559 if ( !test_bit(_VCPUF_initialised, &v->vcpu_flags) )
560 rc = boot_vcpu(d, vcpuid, ctxt);
561 UNLOCK_BIGLOCK(d);
563 xfree(ctxt);
564 break;
566 case VCPUOP_up:
567 if ( !test_bit(_VCPUF_initialised, &v->vcpu_flags) )
568 return -EINVAL;
570 if ( test_and_clear_bit(_VCPUF_down, &v->vcpu_flags) )
571 vcpu_wake(v);
573 break;
575 case VCPUOP_down:
576 if ( !test_and_set_bit(_VCPUF_down, &v->vcpu_flags) )
577 vcpu_sleep_nosync(v);
578 break;
580 case VCPUOP_is_up:
581 rc = !test_bit(_VCPUF_down, &v->vcpu_flags);
582 break;
584 case VCPUOP_get_runstate_info:
585 {
586 struct vcpu_runstate_info runstate;
587 vcpu_runstate_get(v, &runstate);
588 if ( copy_to_guest(arg, &runstate, 1) )
589 rc = -EFAULT;
590 break;
591 }
593 case VCPUOP_set_periodic_timer:
594 {
595 struct vcpu_set_periodic_timer set;
597 if ( copy_from_guest(&set, arg, 1) )
598 return -EFAULT;
600 if ( set.period_ns < MILLISECS(1) )
601 return -EINVAL;
603 v->periodic_period = set.period_ns;
604 vcpu_force_reschedule(v);
606 break;
607 }
609 case VCPUOP_stop_periodic_timer:
610 {
611 v->periodic_period = 0;
612 vcpu_force_reschedule(v);
613 break;
614 }
616 case VCPUOP_set_singleshot_timer:
617 {
618 struct vcpu_set_singleshot_timer set;
620 if ( v != current )
621 return -EINVAL;
623 if ( copy_from_guest(&set, arg, 1) )
624 return -EFAULT;
626 if ( (set.flags & VCPU_SSHOTTMR_future) &&
627 (set.timeout_abs_ns < NOW()) )
628 return -ETIME;
630 if ( v->singleshot_timer.cpu != smp_processor_id() )
631 {
632 stop_timer(&v->singleshot_timer);
633 v->singleshot_timer.cpu = smp_processor_id();
634 }
636 set_timer(&v->singleshot_timer, set.timeout_abs_ns);
638 break;
639 }
641 case VCPUOP_stop_singleshot_timer:
642 {
643 if ( v != current )
644 return -EINVAL;
646 stop_timer(&v->singleshot_timer);
647 break;
648 }
650 default:
651 rc = arch_do_vcpu_op(cmd, v, arg);
652 break;
653 }
655 return rc;
656 }
658 long vm_assist(struct domain *p, unsigned int cmd, unsigned int type)
659 {
660 if ( type > MAX_VMASST_TYPE )
661 return -EINVAL;
663 switch ( cmd )
664 {
665 case VMASST_CMD_enable:
666 set_bit(type, &p->vm_assist);
667 return 0;
668 case VMASST_CMD_disable:
669 clear_bit(type, &p->vm_assist);
670 return 0;
671 }
673 return -ENOSYS;
674 }
676 /*
677 * Local variables:
678 * mode: C
679 * c-set-style: "BSD"
680 * c-basic-offset: 4
681 * tab-width: 4
682 * indent-tabs-mode: nil
683 * End:
684 */