ia64/xen-unstable

view xen/arch/x86/hvm/hvm.c @ 11795:af1aa35265eb

[HVM] Fix resource leak in error path of AP bringup.
Based on a patch from Kouya Shimura.
Signed-off-by: Keir Fraser <keir@xensource.com>
author kfraser@localhost.localdomain
date Thu Oct 12 14:28:59 2006 +0100 (2006-10-12)
parents 058f4a2a8642
children 71e2a165aa7f
line source
1 /*
2 * hvm.c: Common hardware virtual machine abstractions.
3 *
4 * Copyright (c) 2004, Intel Corporation.
5 * Copyright (c) 2005, International Business Machines Corporation.
6 *
7 * This program is free software; you can redistribute it and/or modify it
8 * under the terms and conditions of the GNU General Public License,
9 * version 2, as published by the Free Software Foundation.
10 *
11 * This program is distributed in the hope it will be useful, but WITHOUT
12 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
13 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
14 * more details.
15 *
16 * You should have received a copy of the GNU General Public License along with
17 * this program; if not, write to the Free Software Foundation, Inc., 59 Temple
18 * Place - Suite 330, Boston, MA 02111-1307 USA.
19 */
21 #include <xen/config.h>
22 #include <xen/init.h>
23 #include <xen/lib.h>
24 #include <xen/trace.h>
25 #include <xen/sched.h>
26 #include <xen/irq.h>
27 #include <xen/softirq.h>
28 #include <xen/domain.h>
29 #include <xen/domain_page.h>
30 #include <xen/hypercall.h>
31 #include <xen/guest_access.h>
32 #include <xen/event.h>
33 #include <xen/shadow.h>
34 #include <asm/current.h>
35 #include <asm/e820.h>
36 #include <asm/io.h>
37 #include <asm/shadow.h>
38 #include <asm/regs.h>
39 #include <asm/cpufeature.h>
40 #include <asm/processor.h>
41 #include <asm/types.h>
42 #include <asm/msr.h>
43 #include <asm/spinlock.h>
44 #include <asm/hvm/hvm.h>
45 #include <asm/hvm/support.h>
46 #include <public/sched.h>
47 #include <public/hvm/ioreq.h>
48 #include <public/version.h>
49 #include <public/memory.h>
51 int hvm_enabled = 0;
53 unsigned int opt_hvm_debug_level = 0;
54 integer_param("hvm_debug", opt_hvm_debug_level);
56 struct hvm_function_table hvm_funcs;
58 static void hvm_zap_mmio_range(
59 struct domain *d, unsigned long pfn, unsigned long nr_pfn)
60 {
61 unsigned long i;
63 ASSERT(d == current->domain);
65 for ( i = 0; i < nr_pfn; i++ )
66 {
67 if ( pfn + i >= 0xfffff )
68 break;
70 if ( VALID_MFN(gmfn_to_mfn(d, pfn + i)) )
71 guest_remove_page(d, pfn + i);
72 }
73 }
75 static void e820_zap_iommu_callback(struct domain *d,
76 struct e820entry *e,
77 void *ign)
78 {
79 if ( e->type == E820_IO )
80 hvm_zap_mmio_range(d, e->addr >> PAGE_SHIFT, e->size >> PAGE_SHIFT);
81 }
83 static void e820_foreach(struct domain *d,
84 void (*cb)(struct domain *d,
85 struct e820entry *e,
86 void *data),
87 void *data)
88 {
89 int i;
90 unsigned char e820_map_nr;
91 struct e820entry *e820entry;
92 unsigned char *p;
93 unsigned long mfn;
95 mfn = gmfn_to_mfn(d, E820_MAP_PAGE >> PAGE_SHIFT);
96 if ( mfn == INVALID_MFN )
97 {
98 printk("Can not find E820 memory map page for HVM domain.\n");
99 domain_crash_synchronous();
100 }
102 p = map_domain_page(mfn);
103 if ( p == NULL )
104 {
105 printk("Can not map E820 memory map page for HVM domain.\n");
106 domain_crash_synchronous();
107 }
109 e820_map_nr = *(p + E820_MAP_NR_OFFSET);
110 e820entry = (struct e820entry *)(p + E820_MAP_OFFSET);
112 for ( i = 0; i < e820_map_nr; i++ )
113 cb(d, e820entry + i, data);
115 unmap_domain_page(p);
116 }
118 static void hvm_zap_iommu_pages(struct domain *d)
119 {
120 e820_foreach(d, e820_zap_iommu_callback, NULL);
121 }
123 static void e820_map_io_shared_callback(struct domain *d,
124 struct e820entry *e,
125 void *data)
126 {
127 unsigned long *mfn = data;
128 if ( e->type == E820_SHARED_PAGE )
129 {
130 ASSERT(*mfn == INVALID_MFN);
131 *mfn = gmfn_to_mfn(d, e->addr >> PAGE_SHIFT);
132 }
133 }
135 static void e820_map_buffered_io_callback(struct domain *d,
136 struct e820entry *e,
137 void *data)
138 {
139 unsigned long *mfn = data;
140 if ( e->type == E820_BUFFERED_IO ) {
141 ASSERT(*mfn == INVALID_MFN);
142 *mfn = gmfn_to_mfn(d, e->addr >> PAGE_SHIFT);
143 }
144 }
146 void hvm_map_io_shared_pages(struct vcpu *v)
147 {
148 unsigned long mfn;
149 void *p;
150 struct domain *d = v->domain;
152 if ( d->arch.hvm_domain.shared_page_va ||
153 d->arch.hvm_domain.buffered_io_va )
154 return;
156 mfn = INVALID_MFN;
157 e820_foreach(d, e820_map_io_shared_callback, &mfn);
159 if ( mfn == INVALID_MFN )
160 {
161 printk("Can not find io request shared page for HVM domain.\n");
162 domain_crash_synchronous();
163 }
165 p = map_domain_page_global(mfn);
166 if ( p == NULL )
167 {
168 printk("Can not map io request shared page for HVM domain.\n");
169 domain_crash_synchronous();
170 }
172 d->arch.hvm_domain.shared_page_va = (unsigned long)p;
174 mfn = INVALID_MFN;
175 e820_foreach(d, e820_map_buffered_io_callback, &mfn);
176 if ( mfn != INVALID_MFN ) {
177 p = map_domain_page_global(mfn);
178 if ( p )
179 d->arch.hvm_domain.buffered_io_va = (unsigned long)p;
180 }
181 }
183 void hvm_create_event_channels(struct vcpu *v)
184 {
185 vcpu_iodata_t *p;
186 struct vcpu *o;
188 if ( v->vcpu_id == 0 ) {
189 /* Ugly: create event channels for every vcpu when vcpu 0
190 starts, so that they're available for ioemu to bind to. */
191 for_each_vcpu(v->domain, o) {
192 p = get_vio(v->domain, o->vcpu_id);
193 o->arch.hvm_vcpu.xen_port = p->vp_eport =
194 alloc_unbound_xen_event_channel(o, 0);
195 DPRINTK("Allocated port %d for hvm.\n", o->arch.hvm_vcpu.xen_port);
196 }
197 }
198 }
201 void hvm_stts(struct vcpu *v)
202 {
203 /* FPU state already dirty? Then no need to setup_fpu() lazily. */
204 if ( test_bit(_VCPUF_fpu_dirtied, &v->vcpu_flags) )
205 return;
207 hvm_funcs.stts(v);
208 }
210 void hvm_set_guest_time(struct vcpu *v, u64 gtime)
211 {
212 u64 host_tsc;
214 rdtscll(host_tsc);
216 v->arch.hvm_vcpu.cache_tsc_offset = gtime - host_tsc;
217 hvm_funcs.set_tsc_offset(v, v->arch.hvm_vcpu.cache_tsc_offset);
218 }
220 void hvm_do_resume(struct vcpu *v)
221 {
222 ioreq_t *p;
223 struct periodic_time *pt =
224 &v->domain->arch.hvm_domain.pl_time.periodic_tm;
226 hvm_stts(v);
228 /* pick up the elapsed PIT ticks and re-enable pit_timer */
229 if ( pt->enabled && pt->first_injected ) {
230 if ( v->arch.hvm_vcpu.guest_time ) {
231 hvm_set_guest_time(v, v->arch.hvm_vcpu.guest_time);
232 v->arch.hvm_vcpu.guest_time = 0;
233 }
234 pickup_deactive_ticks(pt);
235 }
237 p = &get_vio(v->domain, v->vcpu_id)->vp_ioreq;
238 wait_on_xen_event_channel(v->arch.hvm.xen_port,
239 p->state != STATE_IOREQ_READY &&
240 p->state != STATE_IOREQ_INPROCESS);
241 if ( p->state == STATE_IORESP_READY )
242 hvm_io_assist(v);
243 if ( p->state != STATE_INVALID ) {
244 printf("Weird HVM iorequest state %d.\n", p->state);
245 domain_crash(v->domain);
246 }
247 }
249 void hvm_release_assist_channel(struct vcpu *v)
250 {
251 free_xen_event_channel(v, v->arch.hvm_vcpu.xen_port);
252 }
255 void hvm_setup_platform(struct domain* d)
256 {
257 struct hvm_domain *platform;
258 struct vcpu *v=current;
260 if ( !hvm_guest(v) || (v->vcpu_id != 0) )
261 return;
263 hvm_zap_iommu_pages(d);
265 platform = &d->arch.hvm_domain;
266 pic_init(&platform->vpic, pic_irq_request, &platform->interrupt_request);
267 register_pic_io_hook();
269 if ( hvm_apic_support(d) )
270 {
271 spin_lock_init(&d->arch.hvm_domain.round_robin_lock);
272 hvm_vioapic_init(d);
273 }
275 spin_lock_init(&d->arch.hvm_domain.buffered_io_lock);
277 init_timer(&platform->pl_time.periodic_tm.timer,
278 pt_timer_fn, v, v->processor);
279 pit_init(v, cpu_khz);
280 }
282 void pic_irq_request(void *data, int level)
283 {
284 int *interrupt_request = data;
285 *interrupt_request = level;
286 }
288 void hvm_pic_assist(struct vcpu *v)
289 {
290 global_iodata_t *spg;
291 u16 *virq_line, irqs;
292 struct hvm_virpic *pic = &v->domain->arch.hvm_domain.vpic;
294 spg = &get_sp(v->domain)->sp_global;
295 virq_line = &spg->pic_clear_irr;
296 if ( *virq_line ) {
297 do {
298 irqs = *(volatile u16*)virq_line;
299 } while ( (u16)cmpxchg(virq_line,irqs, 0) != irqs );
300 do_pic_irqs_clear(pic, irqs);
301 }
302 virq_line = &spg->pic_irr;
303 if ( *virq_line ) {
304 do {
305 irqs = *(volatile u16*)virq_line;
306 } while ( (u16)cmpxchg(virq_line,irqs, 0) != irqs );
307 do_pic_irqs(pic, irqs);
308 }
309 }
311 u64 hvm_get_guest_time(struct vcpu *v)
312 {
313 u64 host_tsc;
315 rdtscll(host_tsc);
316 return host_tsc + v->arch.hvm_vcpu.cache_tsc_offset;
317 }
319 int cpu_get_interrupt(struct vcpu *v, int *type)
320 {
321 int intno;
322 struct hvm_virpic *s = &v->domain->arch.hvm_domain.vpic;
323 unsigned long flags;
325 if ( (intno = cpu_get_apic_interrupt(v, type)) != -1 ) {
326 /* set irq request if a PIC irq is still pending */
327 /* XXX: improve that */
328 spin_lock_irqsave(&s->lock, flags);
329 pic_update_irq(s);
330 spin_unlock_irqrestore(&s->lock, flags);
331 return intno;
332 }
333 /* read the irq from the PIC */
334 if ( v->vcpu_id == 0 && (intno = cpu_get_pic_interrupt(v, type)) != -1 )
335 return intno;
337 return -1;
338 }
340 static void hvm_vcpu_down(void)
341 {
342 struct vcpu *v = current;
343 struct domain *d = v->domain;
344 int online_count = 0;
346 DPRINTK("DOM%d/VCPU%d: going offline.\n", d->domain_id, v->vcpu_id);
348 /* Doesn't halt us immediately, but we'll never return to guest context. */
349 set_bit(_VCPUF_down, &v->vcpu_flags);
350 vcpu_sleep_nosync(v);
352 /* Any other VCPUs online? ... */
353 LOCK_BIGLOCK(d);
354 for_each_vcpu ( d, v )
355 if ( !test_bit(_VCPUF_down, &v->vcpu_flags) )
356 online_count++;
357 UNLOCK_BIGLOCK(d);
359 /* ... Shut down the domain if not. */
360 if ( online_count == 0 )
361 {
362 DPRINTK("DOM%d: all CPUs offline -- powering off.\n", d->domain_id);
363 domain_shutdown(d, SHUTDOWN_poweroff);
364 }
365 }
367 void hvm_hlt(unsigned long rflags)
368 {
369 struct vcpu *v = current;
370 struct periodic_time *pt = &v->domain->arch.hvm_domain.pl_time.periodic_tm;
371 s_time_t next_pit = -1, next_wakeup;
373 /*
374 * If we halt with interrupts disabled, that's a pretty sure sign that we
375 * want to shut down. In a real processor, NMIs are the only way to break
376 * out of this.
377 */
378 if ( unlikely(!(rflags & X86_EFLAGS_IF)) )
379 return hvm_vcpu_down();
381 if ( !v->vcpu_id )
382 next_pit = get_scheduled(v, pt->irq, pt);
383 next_wakeup = get_apictime_scheduled(v);
384 if ( (next_pit != -1 && next_pit < next_wakeup) || next_wakeup == -1 )
385 next_wakeup = next_pit;
386 if ( next_wakeup != - 1 )
387 set_timer(&current->arch.hvm_vcpu.hlt_timer, next_wakeup);
388 do_sched_op_compat(SCHEDOP_block, 0);
389 }
391 /*
392 * __hvm_copy():
393 * @buf = hypervisor buffer
394 * @addr = guest virtual or physical address to copy to/from
395 * @size = number of bytes to copy
396 * @dir = copy *to* guest (TRUE) or *from* guest (FALSE)?
397 * @phy = interpret addr as physical (TRUE) or virtual (FALSE) address?
398 * Returns number of bytes failed to copy (0 == complete success).
399 */
400 static int __hvm_copy(
401 void *buf, unsigned long addr, int size, int dir, int phy)
402 {
403 struct vcpu *v = current;
404 unsigned long mfn;
405 char *p;
406 int count, todo;
408 todo = size;
409 while ( todo > 0 )
410 {
411 count = min_t(int, PAGE_SIZE - (addr & ~PAGE_MASK), todo);
413 mfn = phy ?
414 get_mfn_from_gpfn(addr >> PAGE_SHIFT) :
415 mfn_x(sh_vcpu_gfn_to_mfn(v, shadow_gva_to_gfn(v, addr)));
416 if ( mfn == INVALID_MFN )
417 return todo;
419 p = (char *)map_domain_page(mfn) + (addr & ~PAGE_MASK);
421 if ( dir )
422 memcpy(p, buf, count); /* dir == TRUE: *to* guest */
423 else
424 memcpy(buf, p, count); /* dir == FALSE: *from guest */
426 unmap_domain_page(p);
428 addr += count;
429 buf += count;
430 todo -= count;
431 }
433 return 0;
434 }
436 int hvm_copy_to_guest_phys(unsigned long paddr, void *buf, int size)
437 {
438 return __hvm_copy(buf, paddr, size, 1, 1);
439 }
441 int hvm_copy_from_guest_phys(void *buf, unsigned long paddr, int size)
442 {
443 return __hvm_copy(buf, paddr, size, 0, 1);
444 }
446 int hvm_copy_to_guest_virt(unsigned long vaddr, void *buf, int size)
447 {
448 return __hvm_copy(buf, vaddr, size, 1, 0);
449 }
451 int hvm_copy_from_guest_virt(void *buf, unsigned long vaddr, int size)
452 {
453 return __hvm_copy(buf, vaddr, size, 0, 0);
454 }
456 /*
457 * HVM specific printbuf. Mostly used for hvmloader chit-chat.
458 */
459 void hvm_print_line(struct vcpu *v, const char c)
460 {
461 int *index = &v->domain->arch.hvm_domain.pbuf_index;
462 char *pbuf = v->domain->arch.hvm_domain.pbuf;
464 if (*index == HVM_PBUF_SIZE-2 || c == '\n') {
465 if (*index == HVM_PBUF_SIZE-2)
466 pbuf[(*index)++] = c;
467 pbuf[*index] = '\0';
468 printk("(GUEST: %u) %s\n", v->domain->domain_id, pbuf);
469 *index = 0;
470 } else
471 pbuf[(*index)++] = c;
472 }
474 typedef unsigned long hvm_hypercall_t(
475 unsigned long, unsigned long, unsigned long, unsigned long, unsigned long);
477 #define HYPERCALL(x) \
478 [ __HYPERVISOR_ ## x ] = (hvm_hypercall_t *) do_ ## x
479 #define HYPERCALL_COMPAT32(x) \
480 [ __HYPERVISOR_ ## x ] = (hvm_hypercall_t *) do_ ## x ## _compat32
482 #if defined(__i386__)
484 static hvm_hypercall_t *hvm_hypercall_table[] = {
485 HYPERCALL(memory_op),
486 HYPERCALL(multicall),
487 HYPERCALL(xen_version),
488 HYPERCALL(event_channel_op),
489 HYPERCALL(hvm_op)
490 };
492 void hvm_do_hypercall(struct cpu_user_regs *pregs)
493 {
494 if ( unlikely(ring_3(pregs)) )
495 {
496 pregs->eax = -EPERM;
497 return;
498 }
500 if ( (pregs->eax >= NR_hypercalls) || !hvm_hypercall_table[pregs->eax] )
501 {
502 DPRINTK("HVM vcpu %d:%d did a bad hypercall %d.\n",
503 current->domain->domain_id, current->vcpu_id,
504 pregs->eax);
505 pregs->eax = -ENOSYS;
506 return;
507 }
509 pregs->eax = hvm_hypercall_table[pregs->eax](
510 pregs->ebx, pregs->ecx, pregs->edx, pregs->esi, pregs->edi);
511 }
513 #else /* defined(__x86_64__) */
515 static long do_memory_op_compat32(int cmd, XEN_GUEST_HANDLE(void) arg)
516 {
517 extern long do_add_to_physmap(struct xen_add_to_physmap *xatp);
518 long rc;
520 switch ( cmd )
521 {
522 case XENMEM_add_to_physmap:
523 {
524 struct {
525 domid_t domid;
526 uint32_t space;
527 uint32_t idx;
528 uint32_t gpfn;
529 } u;
530 struct xen_add_to_physmap h;
532 if ( copy_from_guest(&u, arg, 1) )
533 return -EFAULT;
535 h.domid = u.domid;
536 h.space = u.space;
537 h.idx = u.idx;
538 h.gpfn = u.gpfn;
540 this_cpu(guest_handles_in_xen_space) = 1;
541 rc = do_memory_op(cmd, guest_handle_from_ptr(&h, void));
542 this_cpu(guest_handles_in_xen_space) = 0;
544 break;
545 }
547 default:
548 DPRINTK("memory_op %d.\n", cmd);
549 rc = -ENOSYS;
550 break;
551 }
553 return rc;
554 }
556 static hvm_hypercall_t *hvm_hypercall64_table[NR_hypercalls] = {
557 HYPERCALL(memory_op),
558 HYPERCALL(xen_version),
559 HYPERCALL(hvm_op),
560 HYPERCALL(event_channel_op)
561 };
563 static hvm_hypercall_t *hvm_hypercall32_table[NR_hypercalls] = {
564 HYPERCALL_COMPAT32(memory_op),
565 HYPERCALL(xen_version),
566 HYPERCALL(hvm_op),
567 HYPERCALL(event_channel_op)
568 };
570 void hvm_do_hypercall(struct cpu_user_regs *pregs)
571 {
572 if ( unlikely(ring_3(pregs)) )
573 {
574 pregs->rax = -EPERM;
575 return;
576 }
578 pregs->rax = (uint32_t)pregs->eax; /* mask in case compat32 caller */
579 if ( (pregs->rax >= NR_hypercalls) || !hvm_hypercall64_table[pregs->rax] )
580 {
581 DPRINTK("HVM vcpu %d:%d did a bad hypercall %ld.\n",
582 current->domain->domain_id, current->vcpu_id,
583 pregs->rax);
584 pregs->rax = -ENOSYS;
585 return;
586 }
588 if ( current->arch.shadow.mode->guest_levels == 4 )
589 {
590 pregs->rax = hvm_hypercall64_table[pregs->rax](pregs->rdi,
591 pregs->rsi,
592 pregs->rdx,
593 pregs->r10,
594 pregs->r8);
595 }
596 else
597 {
598 pregs->eax = hvm_hypercall32_table[pregs->eax]((uint32_t)pregs->ebx,
599 (uint32_t)pregs->ecx,
600 (uint32_t)pregs->edx,
601 (uint32_t)pregs->esi,
602 (uint32_t)pregs->edi);
603 }
604 }
606 #endif /* defined(__x86_64__) */
608 /* Initialise a hypercall transfer page for a VMX domain using
609 paravirtualised drivers. */
610 void hvm_hypercall_page_initialise(struct domain *d,
611 void *hypercall_page)
612 {
613 hvm_funcs.init_hypercall_page(d, hypercall_page);
614 }
617 /*
618 * only called in HVM domain BSP context
619 * when booting, vcpuid is always equal to apic_id
620 */
621 int hvm_bringup_ap(int vcpuid, int trampoline_vector)
622 {
623 struct vcpu *bsp = current, *v;
624 struct domain *d = bsp->domain;
625 struct vcpu_guest_context *ctxt;
626 int rc = 0;
628 BUG_ON(!hvm_guest(bsp));
630 if ( bsp->vcpu_id != 0 )
631 {
632 DPRINTK("Not calling hvm_bringup_ap from BSP context.\n");
633 domain_crash_synchronous();
634 }
636 if ( (v = d->vcpu[vcpuid]) == NULL )
637 return -ENOENT;
639 if ( (ctxt = xmalloc(struct vcpu_guest_context)) == NULL )
640 {
641 DPRINTK("Failed to allocate memory in hvm_bringup_ap.\n");
642 return -ENOMEM;
643 }
645 hvm_init_ap_context(ctxt, vcpuid, trampoline_vector);
647 LOCK_BIGLOCK(d);
648 rc = -EEXIST;
649 if ( !test_bit(_VCPUF_initialised, &v->vcpu_flags) )
650 rc = boot_vcpu(d, vcpuid, ctxt);
651 UNLOCK_BIGLOCK(d);
653 if ( rc != 0 )
654 {
655 DPRINTK("AP %d bringup failed in boot_vcpu %x.\n", vcpuid, rc);
656 goto out;
657 }
659 if ( test_and_clear_bit(_VCPUF_down, &d->vcpu[vcpuid]->vcpu_flags) )
660 vcpu_wake(d->vcpu[vcpuid]);
661 DPRINTK("AP %d bringup suceeded.\n", vcpuid);
663 out:
664 xfree(ctxt);
665 return rc;
666 }
668 long do_hvm_op(unsigned long op, XEN_GUEST_HANDLE(void) arg)
670 {
671 long rc = 0;
673 switch ( op )
674 {
675 case HVMOP_set_param:
676 case HVMOP_get_param:
677 {
678 struct xen_hvm_param a;
679 struct domain *d;
681 if ( copy_from_guest(&a, arg, 1) )
682 return -EFAULT;
684 if ( a.index >= HVM_NR_PARAMS )
685 return -EINVAL;
687 if ( a.domid == DOMID_SELF )
688 {
689 get_knownalive_domain(current->domain);
690 d = current->domain;
691 }
692 else if ( IS_PRIV(current->domain) )
693 {
694 d = find_domain_by_id(a.domid);
695 if ( d == NULL )
696 return -ESRCH;
697 }
698 else
699 {
700 return -EPERM;
701 }
703 if ( op == HVMOP_set_param )
704 {
705 d->arch.hvm_domain.params[a.index] = a.value;
706 rc = 0;
707 }
708 else
709 {
710 a.value = d->arch.hvm_domain.params[a.index];
711 rc = copy_to_guest(arg, &a, 1) ? -EFAULT : 0;
712 }
714 put_domain(d);
715 break;
716 }
718 default:
719 {
720 DPRINTK("Bad HVM op %ld.\n", op);
721 rc = -ENOSYS;
722 break;
723 }
724 }
726 return rc;
727 }
729 /*
730 * Local variables:
731 * mode: C
732 * c-set-style: "BSD"
733 * c-basic-offset: 4
734 * tab-width: 4
735 * indent-tabs-mode: nil
736 * End:
737 */