ia64/xen-unstable

view xen/arch/x86/domain.c @ 3739:4412ac39cc85

bitkeeper revision 1.1159.255.1 (420911c0Dpqzcuh7CS1OpkVP4miSbQ)

misc debug audit cleanups
Signed-off-by: ian.pratt@cl.cam.ac.uk
author iap10@freefall.cl.cam.ac.uk
date Tue Feb 08 19:23:44 2005 +0000 (2005-02-08)
parents 0703289cadc8
children d1e1c9854420
line source
1 /* -*- Mode:C; c-basic-offset:4; tab-width:4; indent-tabs-mode:nil -*- */
2 /******************************************************************************
3 * arch/x86/domain.c
4 *
5 * x86-specific domain handling (e.g., register setup and context switching).
6 */
8 /*
9 * Copyright (C) 1995 Linus Torvalds
10 *
11 * Pentium III FXSR, SSE support
12 * Gareth Hughes <gareth@valinux.com>, May 2000
13 */
15 #include <xen/config.h>
16 #include <xen/init.h>
17 #include <xen/lib.h>
18 #include <xen/errno.h>
19 #include <xen/sched.h>
20 #include <xen/smp.h>
21 #include <xen/delay.h>
22 #include <xen/softirq.h>
23 #include <asm/regs.h>
24 #include <asm/mc146818rtc.h>
25 #include <asm/system.h>
26 #include <asm/io.h>
27 #include <asm/processor.h>
28 #include <asm/desc.h>
29 #include <asm/i387.h>
30 #include <asm/mpspec.h>
31 #include <asm/ldt.h>
32 #include <xen/irq.h>
33 #include <xen/event.h>
34 #include <asm/shadow.h>
35 #include <xen/console.h>
36 #include <xen/elf.h>
37 #include <asm/vmx.h>
38 #include <asm/vmx_vmcs.h>
39 #include <xen/kernel.h>
40 #include <public/io/ioreq.h>
41 #include <xen/multicall.h>
43 /* opt_noreboot: If true, machine will need manual reset on error. */
44 static int opt_noreboot = 0;
45 boolean_param("noreboot", opt_noreboot);
47 static void default_idle(void)
48 {
49 __cli();
50 if ( !softirq_pending(smp_processor_id()) )
51 safe_halt();
52 else
53 __sti();
54 }
56 static __attribute_used__ void idle_loop(void)
57 {
58 int cpu = smp_processor_id();
59 for ( ; ; )
60 {
61 irq_stat[cpu].idle_timestamp = jiffies;
62 while ( !softirq_pending(cpu) )
63 default_idle();
64 do_softirq();
65 }
66 }
68 void startup_cpu_idle_loop(void)
69 {
70 /* Just some sanity to ensure that the scheduler is set up okay. */
71 ASSERT(current->domain->id == IDLE_DOMAIN_ID);
72 domain_unpause_by_systemcontroller(current->domain);
73 __enter_scheduler();
75 /*
76 * Declares CPU setup done to the boot processor.
77 * Therefore memory barrier to ensure state is visible.
78 */
79 smp_mb();
80 init_idle();
82 idle_loop();
83 }
85 static long no_idt[2];
86 static int reboot_mode;
87 int reboot_thru_bios = 0;
89 #ifdef CONFIG_SMP
90 int reboot_smp = 0;
91 static int reboot_cpu = -1;
92 /* shamelessly grabbed from lib/vsprintf.c for readability */
93 #define is_digit(c) ((c) >= '0' && (c) <= '9')
94 #endif
97 static inline void kb_wait(void)
98 {
99 int i;
101 for (i=0; i<0x10000; i++)
102 if ((inb_p(0x64) & 0x02) == 0)
103 break;
104 }
107 void machine_restart(char * __unused)
108 {
109 #ifdef CONFIG_SMP
110 int cpuid;
111 #endif
113 if ( opt_noreboot )
114 {
115 printk("Reboot disabled on cmdline: require manual reset\n");
116 for ( ; ; ) __asm__ __volatile__ ("hlt");
117 }
119 #ifdef CONFIG_SMP
120 cpuid = GET_APIC_ID(apic_read(APIC_ID));
122 /* KAF: Need interrupts enabled for safe IPI. */
123 __sti();
125 if (reboot_smp) {
127 /* check to see if reboot_cpu is valid
128 if its not, default to the BSP */
129 if ((reboot_cpu == -1) ||
130 (reboot_cpu > (NR_CPUS -1)) ||
131 !(phys_cpu_present_map & (1<<cpuid)))
132 reboot_cpu = boot_cpu_physical_apicid;
134 reboot_smp = 0; /* use this as a flag to only go through this once*/
135 /* re-run this function on the other CPUs
136 it will fall though this section since we have
137 cleared reboot_smp, and do the reboot if it is the
138 correct CPU, otherwise it halts. */
139 if (reboot_cpu != cpuid)
140 smp_call_function((void *)machine_restart , NULL, 1, 0);
141 }
143 /* if reboot_cpu is still -1, then we want a tradional reboot,
144 and if we are not running on the reboot_cpu,, halt */
145 if ((reboot_cpu != -1) && (cpuid != reboot_cpu)) {
146 for (;;)
147 __asm__ __volatile__ ("hlt");
148 }
149 /*
150 * Stop all CPUs and turn off local APICs and the IO-APIC, so
151 * other OSs see a clean IRQ state.
152 */
153 smp_send_stop();
154 disable_IO_APIC();
155 #endif
156 #ifdef CONFIG_VMX
157 stop_vmx();
158 #endif
160 if(!reboot_thru_bios) {
161 /* rebooting needs to touch the page at absolute addr 0 */
162 *((unsigned short *)__va(0x472)) = reboot_mode;
163 for (;;) {
164 int i;
165 for (i=0; i<100; i++) {
166 kb_wait();
167 udelay(50);
168 outb(0xfe,0x64); /* pulse reset low */
169 udelay(50);
170 }
171 /* That didn't work - force a triple fault.. */
172 __asm__ __volatile__("lidt %0": "=m" (no_idt));
173 __asm__ __volatile__("int3");
174 }
175 }
177 panic("Need to reinclude BIOS reboot code\n");
178 }
181 void __attribute__((noreturn)) __machine_halt(void *unused)
182 {
183 for ( ; ; )
184 __asm__ __volatile__ ( "cli; hlt" );
185 }
187 void machine_halt(void)
188 {
189 smp_call_function(__machine_halt, NULL, 1, 1);
190 __machine_halt(NULL);
191 }
193 void dump_pageframe_info(struct domain *d)
194 {
195 struct pfn_info *page;
197 if ( d->tot_pages < 10 )
198 {
199 list_for_each_entry ( page, &d->page_list, list )
200 {
201 printk("Page %08x: caf=%08x, taf=%08x\n",
202 page_to_phys(page), page->count_info,
203 page->u.inuse.type_info);
204 }
205 }
207 page = virt_to_page(d->shared_info);
208 printk("Shared_info@%08x: caf=%08x, taf=%08x\n",
209 page_to_phys(page), page->count_info,
210 page->u.inuse.type_info);
211 }
213 struct domain *arch_alloc_domain_struct(void)
214 {
215 return xmalloc(struct domain);
216 }
218 void arch_free_domain_struct(struct domain *d)
219 {
220 xfree(d);
221 }
223 struct exec_domain *arch_alloc_exec_domain_struct(void)
224 {
225 return xmalloc(struct exec_domain);
226 }
228 void arch_free_exec_domain_struct(struct exec_domain *ed)
229 {
230 xfree(ed);
231 }
233 void free_perdomain_pt(struct domain *d)
234 {
235 free_xenheap_page((unsigned long)d->arch.mm_perdomain_pt);
236 }
238 static void continue_idle_task(struct exec_domain *ed)
239 {
240 reset_stack_and_jump(idle_loop);
241 }
243 static void continue_nonidle_task(struct exec_domain *ed)
244 {
245 reset_stack_and_jump(ret_from_intr);
246 }
248 void arch_do_createdomain(struct exec_domain *ed)
249 {
250 struct domain *d = ed->domain;
252 SET_DEFAULT_FAST_TRAP(&ed->arch);
254 if ( d->id == IDLE_DOMAIN_ID )
255 {
256 ed->arch.schedule_tail = continue_idle_task;
257 }
258 else
259 {
260 ed->arch.schedule_tail = continue_nonidle_task;
262 d->shared_info = (void *)alloc_xenheap_page();
263 memset(d->shared_info, 0, PAGE_SIZE);
264 ed->vcpu_info = &d->shared_info->vcpu_data[ed->eid];
265 SHARE_PFN_WITH_DOMAIN(virt_to_page(d->shared_info), d);
266 machine_to_phys_mapping[virt_to_phys(d->shared_info) >>
267 PAGE_SHIFT] = INVALID_P2M_ENTRY;
269 d->arch.mm_perdomain_pt = (l1_pgentry_t *)alloc_xenheap_page();
270 memset(d->arch.mm_perdomain_pt, 0, PAGE_SIZE);
271 machine_to_phys_mapping[virt_to_phys(d->arch.mm_perdomain_pt) >>
272 PAGE_SHIFT] = INVALID_P2M_ENTRY;
273 ed->arch.perdomain_ptes = d->arch.mm_perdomain_pt;
274 }
275 }
277 void arch_do_boot_vcpu(struct exec_domain *ed)
278 {
279 struct domain *d = ed->domain;
280 ed->arch.schedule_tail = d->exec_domain[0]->arch.schedule_tail;
281 ed->arch.perdomain_ptes =
282 d->arch.mm_perdomain_pt + (ed->eid << PDPT_VCPU_SHIFT);
283 }
285 #ifdef CONFIG_VMX
286 void arch_vmx_do_resume(struct exec_domain *ed)
287 {
288 u64 vmcs_phys_ptr = (u64) virt_to_phys(ed->arch.arch_vmx.vmcs);
290 load_vmcs(&ed->arch.arch_vmx, vmcs_phys_ptr);
291 vmx_do_resume(ed);
292 reset_stack_and_jump(vmx_asm_do_resume);
293 }
295 void arch_vmx_do_launch(struct exec_domain *ed)
296 {
297 u64 vmcs_phys_ptr = (u64) virt_to_phys(ed->arch.arch_vmx.vmcs);
299 load_vmcs(&ed->arch.arch_vmx, vmcs_phys_ptr);
300 vmx_do_launch(ed);
301 reset_stack_and_jump(vmx_asm_do_launch);
302 }
304 static void monitor_mk_pagetable(struct exec_domain *ed)
305 {
306 unsigned long mpfn;
307 l2_pgentry_t *mpl2e, *phys_table;
308 struct pfn_info *mpfn_info;
309 struct domain *d = ed->domain;
311 mpfn_info = alloc_domheap_page(NULL);
312 ASSERT( mpfn_info );
314 mpfn = (unsigned long) (mpfn_info - frame_table);
315 mpl2e = (l2_pgentry_t *) map_domain_mem(mpfn << PAGE_SHIFT);
316 memset(mpl2e, 0, PAGE_SIZE);
318 memcpy(&mpl2e[DOMAIN_ENTRIES_PER_L2_PAGETABLE],
319 &idle_pg_table[DOMAIN_ENTRIES_PER_L2_PAGETABLE],
320 HYPERVISOR_ENTRIES_PER_L2_PAGETABLE * sizeof(l2_pgentry_t));
322 ed->arch.monitor_table = mk_pagetable(mpfn << PAGE_SHIFT);
323 d->arch.shadow_mode = SHM_full_32;
325 mpl2e[PERDOMAIN_VIRT_START >> L2_PAGETABLE_SHIFT] =
326 mk_l2_pgentry((__pa(d->arch.mm_perdomain_pt) & PAGE_MASK)
327 | __PAGE_HYPERVISOR);
329 phys_table = (l2_pgentry_t *) map_domain_mem(pagetable_val(
330 ed->arch.phys_table));
331 memcpy(d->arch.mm_perdomain_pt, phys_table,
332 ENTRIES_PER_L1_PAGETABLE * sizeof(l1_pgentry_t));
334 unmap_domain_mem(phys_table);
335 unmap_domain_mem(mpl2e);
336 }
338 /*
339 * Free the pages for monitor_table and guest_pl2e_cache
340 */
341 static void monitor_rm_pagetable(struct exec_domain *ed)
342 {
343 l2_pgentry_t *mpl2e;
344 unsigned long mpfn;
346 ASSERT( pagetable_val(ed->arch.monitor_table) );
348 mpl2e = (l2_pgentry_t *) map_domain_mem(pagetable_val(ed->arch.monitor_table));
349 /*
350 * First get the pfn for guest_pl2e_cache by looking at monitor_table
351 */
352 mpfn = l2_pgentry_val(mpl2e[LINEAR_PT_VIRT_START >> L2_PAGETABLE_SHIFT])
353 >> PAGE_SHIFT;
355 free_domheap_page(&frame_table[mpfn]);
356 unmap_domain_mem(mpl2e);
358 /*
359 * Then free monitor_table.
360 */
361 mpfn = (pagetable_val(ed->arch.monitor_table)) >> PAGE_SHIFT;
362 free_domheap_page(&frame_table[mpfn]);
364 ed->arch.monitor_table = mk_pagetable(0);
365 }
367 static int vmx_final_setup_guestos(struct exec_domain *ed,
368 full_execution_context_t *full_context)
369 {
370 int error;
371 execution_context_t *context;
372 struct vmcs_struct *vmcs;
374 context = &full_context->cpu_ctxt;
376 /*
377 * Create a new VMCS
378 */
379 if (!(vmcs = alloc_vmcs())) {
380 printk("Failed to create a new VMCS\n");
381 return -ENOMEM;
382 }
384 memset(&ed->arch.arch_vmx, 0, sizeof (struct arch_vmx_struct));
386 ed->arch.arch_vmx.vmcs = vmcs;
387 error = construct_vmcs(&ed->arch.arch_vmx, context, full_context, VMCS_USE_HOST_ENV);
388 if (error < 0) {
389 printk("Failed to construct a new VMCS\n");
390 goto out;
391 }
393 monitor_mk_pagetable(ed);
394 ed->arch.schedule_tail = arch_vmx_do_launch;
395 clear_bit(VMX_CPU_STATE_PG_ENABLED, &ed->arch.arch_vmx.cpu_state);
397 #if defined (__i386)
398 ed->arch.arch_vmx.vmx_platform.real_mode_data =
399 (unsigned long *) context->esi;
400 #endif
402 if (ed == ed->domain->exec_domain[0]) {
403 /*
404 * Required to do this once per domain
405 */
406 memset(&ed->domain->shared_info->evtchn_mask[0], 0xff,
407 sizeof(ed->domain->shared_info->evtchn_mask));
408 clear_bit(IOPACKET_PORT, &ed->domain->shared_info->evtchn_mask[0]);
409 }
411 return 0;
413 out:
414 free_vmcs(vmcs);
415 ed->arch.arch_vmx.vmcs = 0;
416 return error;
417 }
418 #endif
420 int arch_final_setup_guestos(
421 struct exec_domain *d, full_execution_context_t *c)
422 {
423 unsigned long phys_basetab;
424 int i, rc;
426 clear_bit(EDF_DONEFPUINIT, &d->ed_flags);
427 if ( c->flags & ECF_I387_VALID )
428 set_bit(EDF_DONEFPUINIT, &d->ed_flags);
430 memcpy(&d->arch.user_ctxt,
431 &c->cpu_ctxt,
432 sizeof(d->arch.user_ctxt));
434 /* Clear IOPL for unprivileged domains. */
435 if (!IS_PRIV(d->domain))
436 d->arch.user_ctxt.eflags &= 0xffffcfff;
438 /*
439 * This is sufficient! If the descriptor DPL differs from CS RPL then we'll
440 * #GP. If DS, ES, FS, GS are DPL 0 then they'll be cleared automatically.
441 * If SS RPL or DPL differs from CS RPL then we'll #GP.
442 */
443 if (!(c->flags & ECF_VMX_GUEST))
444 if ( ((d->arch.user_ctxt.cs & 3) == 0) ||
445 ((d->arch.user_ctxt.ss & 3) == 0) )
446 return -EINVAL;
448 memcpy(&d->arch.i387,
449 &c->fpu_ctxt,
450 sizeof(d->arch.i387));
452 memcpy(d->arch.traps,
453 &c->trap_ctxt,
454 sizeof(d->arch.traps));
456 if ( (rc = (int)set_fast_trap(d, c->fast_trap_idx)) != 0 )
457 return rc;
459 d->arch.ldt_base = c->ldt_base;
460 d->arch.ldt_ents = c->ldt_ents;
462 d->arch.guestos_ss = c->guestos_ss;
463 d->arch.guestos_sp = c->guestos_esp;
465 for ( i = 0; i < 8; i++ )
466 (void)set_debugreg(d, i, c->debugreg[i]);
468 d->arch.event_selector = c->event_callback_cs;
469 d->arch.event_address = c->event_callback_eip;
470 d->arch.failsafe_selector = c->failsafe_callback_cs;
471 d->arch.failsafe_address = c->failsafe_callback_eip;
473 phys_basetab = c->pt_base;
474 d->arch.pagetable = mk_pagetable(phys_basetab);
475 d->arch.phys_table = d->arch.pagetable;
476 if ( !get_page_and_type(&frame_table[phys_basetab>>PAGE_SHIFT], d->domain,
477 PGT_base_page_table) )
478 return -EINVAL;
480 /* Failure to set GDT is harmless. */
481 SET_GDT_ENTRIES(d, DEFAULT_GDT_ENTRIES);
482 SET_GDT_ADDRESS(d, DEFAULT_GDT_ADDRESS);
483 if ( c->gdt_ents != 0 )
484 {
485 if ( (rc = (int)set_gdt(d, c->gdt_frames, c->gdt_ents)) != 0 )
486 {
487 put_page_and_type(&frame_table[phys_basetab>>PAGE_SHIFT]);
488 return rc;
489 }
490 }
492 #ifdef CONFIG_VMX
493 if (c->flags & ECF_VMX_GUEST)
494 return vmx_final_setup_guestos(d, c);
495 #endif
497 return 0;
498 }
500 void new_thread(struct exec_domain *d,
501 unsigned long start_pc,
502 unsigned long start_stack,
503 unsigned long start_info)
504 {
505 execution_context_t *ec = &d->arch.user_ctxt;
507 /*
508 * Initial register values:
509 * DS,ES,FS,GS = FLAT_GUESTOS_DS
510 * CS:EIP = FLAT_GUESTOS_CS:start_pc
511 * SS:ESP = FLAT_GUESTOS_SS:start_stack
512 * ESI = start_info
513 * [EAX,EBX,ECX,EDX,EDI,EBP are zero]
514 */
515 ec->ds = ec->es = ec->fs = ec->gs = FLAT_GUESTOS_DS;
516 ec->ss = FLAT_GUESTOS_SS;
517 ec->cs = FLAT_GUESTOS_CS;
518 ec->eip = start_pc;
519 ec->esp = start_stack;
520 ec->esi = start_info;
522 __save_flags(ec->eflags);
523 ec->eflags |= X86_EFLAGS_IF;
524 }
527 /*
528 * This special macro can be used to load a debugging register
529 */
530 #define loaddebug(_ed,_reg) \
531 __asm__("mov %0,%%db" #_reg \
532 : /* no output */ \
533 :"r" ((_ed)->debugreg[_reg]))
535 void switch_to(struct exec_domain *prev_p, struct exec_domain *next_p)
536 {
537 struct tss_struct *tss = init_tss + smp_processor_id();
538 execution_context_t *stack_ec = get_execution_context();
539 int i;
540 #ifdef CONFIG_VMX
541 unsigned long vmx_domain = next_p->arch.arch_vmx.flags;
542 #endif
544 __cli();
546 /* Switch guest general-register state. */
547 if ( !is_idle_task(prev_p->domain) )
548 {
549 memcpy(&prev_p->arch.user_ctxt,
550 stack_ec,
551 sizeof(*stack_ec));
552 unlazy_fpu(prev_p);
553 CLEAR_FAST_TRAP(&prev_p->arch);
554 }
556 if ( !is_idle_task(next_p->domain) )
557 {
558 memcpy(stack_ec,
559 &next_p->arch.user_ctxt,
560 sizeof(*stack_ec));
562 /* Maybe switch the debug registers. */
563 if ( unlikely(next_p->arch.debugreg[7]) )
564 {
565 loaddebug(&next_p->arch, 0);
566 loaddebug(&next_p->arch, 1);
567 loaddebug(&next_p->arch, 2);
568 loaddebug(&next_p->arch, 3);
569 /* no 4 and 5 */
570 loaddebug(&next_p->arch, 6);
571 loaddebug(&next_p->arch, 7);
572 }
574 #ifdef CONFIG_VMX
575 if ( vmx_domain )
576 {
577 /* Switch page tables. */
578 write_ptbase(next_p);
580 set_current(next_p);
581 /* Switch GDT and LDT. */
582 __asm__ __volatile__ ("lgdt %0" : "=m" (*next_p->arch.gdt));
584 __sti();
585 return;
586 }
587 #endif
589 SET_FAST_TRAP(&next_p->arch);
591 #ifdef __i386__
592 /* Switch the guest OS ring-1 stack. */
593 tss->esp1 = next_p->arch.guestos_sp;
594 tss->ss1 = next_p->arch.guestos_ss;
595 #endif
597 /* Switch page tables. */
598 write_ptbase(next_p);
599 }
601 if ( unlikely(prev_p->arch.io_bitmap != NULL) )
602 {
603 for ( i = 0; i < sizeof(prev_p->arch.io_bitmap_sel) * 8; i++ )
604 if ( !test_bit(i, &prev_p->arch.io_bitmap_sel) )
605 memset(&tss->io_bitmap[i * IOBMP_BYTES_PER_SELBIT],
606 ~0U, IOBMP_BYTES_PER_SELBIT);
607 tss->bitmap = IOBMP_INVALID_OFFSET;
608 }
610 if ( unlikely(next_p->arch.io_bitmap != NULL) )
611 {
612 for ( i = 0; i < sizeof(next_p->arch.io_bitmap_sel) * 8; i++ )
613 if ( !test_bit(i, &next_p->arch.io_bitmap_sel) )
614 memcpy(&tss->io_bitmap[i * IOBMP_BYTES_PER_SELBIT],
615 &next_p->arch.io_bitmap[i * IOBMP_BYTES_PER_SELBIT],
616 IOBMP_BYTES_PER_SELBIT);
617 tss->bitmap = IOBMP_OFFSET;
618 }
620 set_current(next_p);
622 /* Switch GDT and LDT. */
623 __asm__ __volatile__ ("lgdt %0" : "=m" (*next_p->arch.gdt));
624 load_LDT(next_p);
626 __sti();
627 }
630 /* XXX Currently the 'domain' field is ignored! XXX */
631 long do_iopl(domid_t domain, unsigned int new_io_pl)
632 {
633 execution_context_t *ec = get_execution_context();
634 ec->eflags = (ec->eflags & 0xffffcfff) | ((new_io_pl&3) << 12);
635 return 0;
636 }
638 unsigned long __hypercall_create_continuation(
639 unsigned int op, unsigned int nr_args, ...)
640 {
641 struct mc_state *mcs = &mc_state[smp_processor_id()];
642 execution_context_t *ec;
643 unsigned int i;
644 va_list args;
646 va_start(args, nr_args);
648 if ( test_bit(_MCSF_in_multicall, &mcs->flags) )
649 {
650 __set_bit(_MCSF_call_preempted, &mcs->flags);
652 for ( i = 0; i < nr_args; i++ )
653 mcs->call.args[i] = va_arg(args, unsigned long);
654 }
655 else
656 {
657 ec = get_execution_context();
658 #if defined(__i386__)
659 ec->eax = op;
660 ec->eip -= 2; /* re-execute 'int 0x82' */
662 for ( i = 0; i < nr_args; i++ )
663 {
664 switch ( i )
665 {
666 case 0: ec->ebx = va_arg(args, unsigned long); break;
667 case 1: ec->ecx = va_arg(args, unsigned long); break;
668 case 2: ec->edx = va_arg(args, unsigned long); break;
669 case 3: ec->esi = va_arg(args, unsigned long); break;
670 case 4: ec->edi = va_arg(args, unsigned long); break;
671 case 5: ec->ebp = va_arg(args, unsigned long); break;
672 }
673 }
674 #elif defined(__x86_64__)
675 ec->rax = op;
676 ec->rip -= 2; /* re-execute 'syscall' */
678 for ( i = 0; i < nr_args; i++ )
679 {
680 switch ( i )
681 {
682 case 0: ec->rdi = va_arg(args, unsigned long); break;
683 case 1: ec->rsi = va_arg(args, unsigned long); break;
684 case 2: ec->rdx = va_arg(args, unsigned long); break;
685 case 3: ec->r10 = va_arg(args, unsigned long); break;
686 case 4: ec->r8 = va_arg(args, unsigned long); break;
687 case 5: ec->r9 = va_arg(args, unsigned long); break;
688 }
689 }
690 #endif
691 }
693 va_end(args);
695 return op;
696 }
698 static void relinquish_list(struct domain *d, struct list_head *list)
699 {
700 struct list_head *ent;
701 struct pfn_info *page;
702 unsigned long x, y;
704 /* Use a recursive lock, as we may enter 'free_domheap_page'. */
705 spin_lock_recursive(&d->page_alloc_lock);
707 ent = list->next;
708 while ( ent != list )
709 {
710 page = list_entry(ent, struct pfn_info, list);
712 /* Grab a reference to the page so it won't disappear from under us. */
713 if ( unlikely(!get_page(page, d)) )
714 {
715 /* Couldn't get a reference -- someone is freeing this page. */
716 ent = ent->next;
717 continue;
718 }
720 if ( test_and_clear_bit(_PGT_pinned, &page->u.inuse.type_info) )
721 put_page_and_type(page);
723 if ( test_and_clear_bit(_PGC_allocated, &page->count_info) )
724 put_page(page);
726 /*
727 * Forcibly invalidate base page tables at this point to break circular
728 * 'linear page table' references. This is okay because MMU structures
729 * are not shared across domains and this domain is now dead. Thus base
730 * tables are not in use so a non-zero count means circular reference.
731 */
732 y = page->u.inuse.type_info;
733 for ( ; ; )
734 {
735 x = y;
736 if ( likely((x & (PGT_type_mask|PGT_validated)) !=
737 (PGT_base_page_table|PGT_validated)) )
738 break;
740 y = cmpxchg(&page->u.inuse.type_info, x, x & ~PGT_validated);
741 if ( likely(y == x) )
742 {
743 free_page_type(page, PGT_base_page_table);
744 break;
745 }
746 }
748 /* Follow the list chain and /then/ potentially free the page. */
749 ent = ent->next;
750 put_page(page);
751 }
753 spin_unlock_recursive(&d->page_alloc_lock);
754 }
756 #ifdef CONFIG_VMX
757 static void vmx_domain_relinquish_memory(struct exec_domain *ed)
758 {
759 /*
760 * Free VMCS
761 */
762 ASSERT(ed->arch.arch_vmx.vmcs);
763 free_vmcs(ed->arch.arch_vmx.vmcs);
764 ed->arch.arch_vmx.vmcs = 0;
766 monitor_rm_pagetable(ed);
767 }
768 #endif
770 void domain_relinquish_memory(struct domain *d)
771 {
772 struct exec_domain *ed;
774 /* Ensure that noone is running over the dead domain's page tables. */
775 synchronise_pagetables(~0UL);
777 /* Exit shadow mode before deconstructing final guest page table. */
778 shadow_mode_disable(d);
780 /* Drop the in-use reference to the page-table base. */
781 for_each_exec_domain ( d, ed )
782 {
783 if ( pagetable_val(ed->arch.pagetable) != 0 )
784 put_page_and_type(&frame_table[pagetable_val(ed->arch.pagetable) >>
785 PAGE_SHIFT]);
786 ed->arch.pagetable = mk_pagetable(0);
787 }
789 #ifdef CONFIG_VMX
790 if ( VMX_DOMAIN(d->exec_domain[0]) )
791 for_each_exec_domain ( d, ed )
792 vmx_domain_relinquish_memory(ed);
793 #endif
795 /*
796 * Relinquish GDT mappings. No need for explicit unmapping of the LDT as
797 * it automatically gets squashed when the guest's mappings go away.
798 */
799 for_each_exec_domain(d, ed)
800 destroy_gdt(ed);
802 /* Relinquish every page of memory. */
803 relinquish_list(d, &d->xenpage_list);
804 relinquish_list(d, &d->page_list);
805 }