ia64/xen-unstable

view xen/arch/x86/traps.c @ 14054:43e9952b07ea

x86: Better BUG() and ASSERT() logging.
Signed-off-by: Keir Fraser <keir@xensource.com>
author kfraser@localhost.localdomain
date Wed Feb 21 14:40:37 2007 +0000 (2007-02-21)
parents bca284f67702
children 40a6e2280d7b
line source
1 /******************************************************************************
2 * arch/x86/traps.c
3 *
4 * Modifications to Linux original are copyright (c) 2002-2004, K A Fraser
5 *
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License as published by
8 * the Free Software Foundation; either version 2 of the License, or
9 * (at your option) any later version.
10 *
11 * This program is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 * GNU General Public License for more details.
15 *
16 * You should have received a copy of the GNU General Public License
17 * along with this program; if not, write to the Free Software
18 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
19 */
21 /*
22 * Copyright (C) 1991, 1992 Linus Torvalds
23 *
24 * Pentium III FXSR, SSE support
25 * Gareth Hughes <gareth@valinux.com>, May 2000
26 */
28 #include <xen/config.h>
29 #include <xen/init.h>
30 #include <xen/sched.h>
31 #include <xen/lib.h>
32 #include <xen/errno.h>
33 #include <xen/mm.h>
34 #include <xen/console.h>
35 #include <xen/shutdown.h>
36 #include <asm/regs.h>
37 #include <xen/delay.h>
38 #include <xen/event.h>
39 #include <xen/spinlock.h>
40 #include <xen/irq.h>
41 #include <xen/perfc.h>
42 #include <xen/softirq.h>
43 #include <xen/domain_page.h>
44 #include <xen/symbols.h>
45 #include <xen/iocap.h>
46 #include <xen/nmi.h>
47 #include <xen/version.h>
48 #include <xen/kexec.h>
49 #include <asm/paging.h>
50 #include <asm/system.h>
51 #include <asm/io.h>
52 #include <asm/atomic.h>
53 #include <asm/desc.h>
54 #include <asm/debugreg.h>
55 #include <asm/smp.h>
56 #include <asm/flushtlb.h>
57 #include <asm/uaccess.h>
58 #include <asm/i387.h>
59 #include <asm/debugger.h>
60 #include <asm/msr.h>
61 #include <asm/shared.h>
62 #include <asm/x86_emulate.h>
63 #include <asm/hvm/vpt.h>
65 /*
66 * opt_nmi: one of 'ignore', 'dom0', or 'fatal'.
67 * fatal: Xen prints diagnostic message and then hangs.
68 * dom0: The NMI is virtualised to DOM0.
69 * ignore: The NMI error is cleared and ignored.
70 */
71 #ifdef NDEBUG
72 char opt_nmi[10] = "dom0";
73 #else
74 char opt_nmi[10] = "fatal";
75 #endif
76 string_param("nmi", opt_nmi);
78 /* Master table, used by all CPUs on x86/64, and by CPU0 on x86/32.*/
79 idt_entry_t idt_table[IDT_ENTRIES];
81 #define DECLARE_TRAP_HANDLER(_name) \
82 asmlinkage void _name(void); \
83 asmlinkage int do_ ## _name(struct cpu_user_regs *regs)
85 asmlinkage void nmi(void);
86 DECLARE_TRAP_HANDLER(divide_error);
87 DECLARE_TRAP_HANDLER(debug);
88 DECLARE_TRAP_HANDLER(int3);
89 DECLARE_TRAP_HANDLER(overflow);
90 DECLARE_TRAP_HANDLER(bounds);
91 DECLARE_TRAP_HANDLER(invalid_op);
92 DECLARE_TRAP_HANDLER(device_not_available);
93 DECLARE_TRAP_HANDLER(coprocessor_segment_overrun);
94 DECLARE_TRAP_HANDLER(invalid_TSS);
95 DECLARE_TRAP_HANDLER(segment_not_present);
96 DECLARE_TRAP_HANDLER(stack_segment);
97 DECLARE_TRAP_HANDLER(general_protection);
98 DECLARE_TRAP_HANDLER(page_fault);
99 DECLARE_TRAP_HANDLER(coprocessor_error);
100 DECLARE_TRAP_HANDLER(simd_coprocessor_error);
101 DECLARE_TRAP_HANDLER(alignment_check);
102 DECLARE_TRAP_HANDLER(spurious_interrupt_bug);
103 DECLARE_TRAP_HANDLER(machine_check);
105 long do_set_debugreg(int reg, unsigned long value);
106 unsigned long do_get_debugreg(int reg);
108 static int debug_stack_lines = 20;
109 integer_param("debug_stack_lines", debug_stack_lines);
111 #ifdef CONFIG_X86_32
112 #define stack_words_per_line 8
113 #define ESP_BEFORE_EXCEPTION(regs) ((unsigned long *)&regs->esp)
114 #else
115 #define stack_words_per_line 4
116 #define ESP_BEFORE_EXCEPTION(regs) ((unsigned long *)regs->rsp)
117 #endif
119 static void show_guest_stack(struct cpu_user_regs *regs)
120 {
121 int i;
122 unsigned long *stack, addr;
124 if ( is_hvm_vcpu(current) )
125 return;
127 if ( IS_COMPAT(container_of(regs, struct cpu_info, guest_cpu_user_regs)->current_vcpu->domain) )
128 {
129 compat_show_guest_stack(regs, debug_stack_lines);
130 return;
131 }
133 if ( vm86_mode(regs) )
134 {
135 stack = (unsigned long *)((regs->ss << 4) + (regs->esp & 0xffff));
136 printk("Guest stack trace from ss:sp = %04x:%04x (VM86)\n ",
137 regs->ss, (uint16_t)(regs->esp & 0xffff));
138 }
139 else
140 {
141 stack = (unsigned long *)regs->esp;
142 printk("Guest stack trace from "__OP"sp=%p:\n ", stack);
143 }
145 for ( i = 0; i < (debug_stack_lines*stack_words_per_line); i++ )
146 {
147 if ( ((long)stack & (STACK_SIZE-BYTES_PER_LONG)) == 0 )
148 break;
149 if ( get_user(addr, stack) )
150 {
151 if ( i != 0 )
152 printk("\n ");
153 printk("Fault while accessing guest memory.");
154 i = 1;
155 break;
156 }
157 if ( (i != 0) && ((i % stack_words_per_line) == 0) )
158 printk("\n ");
159 printk(" %p", _p(addr));
160 stack++;
161 }
162 if ( i == 0 )
163 printk("Stack empty.");
164 printk("\n");
165 }
167 #ifdef NDEBUG
169 static void show_trace(struct cpu_user_regs *regs)
170 {
171 unsigned long *stack = ESP_BEFORE_EXCEPTION(regs), addr;
173 printk("Xen call trace:\n ");
175 printk("[<%p>]", _p(regs->eip));
176 print_symbol(" %s\n ", regs->eip);
178 while ( ((long)stack & (STACK_SIZE-BYTES_PER_LONG)) != 0 )
179 {
180 addr = *stack++;
181 if ( is_kernel_text(addr) || is_kernel_inittext(addr) )
182 {
183 printk("[<%p>]", _p(addr));
184 print_symbol(" %s\n ", addr);
185 }
186 }
188 printk("\n");
189 }
191 #else
193 static void show_trace(struct cpu_user_regs *regs)
194 {
195 unsigned long *frame, next, addr, low, high;
197 printk("Xen call trace:\n ");
199 printk("[<%p>]", _p(regs->eip));
200 print_symbol(" %s\n ", regs->eip);
202 /* Bounds for range of valid frame pointer. */
203 low = (unsigned long)(ESP_BEFORE_EXCEPTION(regs) - 2);
204 high = (low & ~(STACK_SIZE - 1)) +
205 (STACK_SIZE - sizeof(struct cpu_info) - 2*sizeof(unsigned long));
207 /* The initial frame pointer. */
208 next = regs->ebp;
210 for ( ; ; )
211 {
212 /* Valid frame pointer? */
213 if ( (next < low) || (next >= high) )
214 {
215 /*
216 * Exception stack frames have a different layout, denoted by an
217 * inverted frame pointer.
218 */
219 next = ~next;
220 if ( (next < low) || (next >= high) )
221 break;
222 frame = (unsigned long *)next;
223 next = frame[0];
224 addr = frame[(offsetof(struct cpu_user_regs, eip) -
225 offsetof(struct cpu_user_regs, ebp))
226 / BYTES_PER_LONG];
227 }
228 else
229 {
230 /* Ordinary stack frame. */
231 frame = (unsigned long *)next;
232 next = frame[0];
233 addr = frame[1];
234 }
236 printk("[<%p>]", _p(addr));
237 print_symbol(" %s\n ", addr);
239 low = (unsigned long)&frame[2];
240 }
242 printk("\n");
243 }
245 #endif
247 void show_stack(struct cpu_user_regs *regs)
248 {
249 unsigned long *stack = ESP_BEFORE_EXCEPTION(regs), addr;
250 int i;
252 if ( guest_mode(regs) )
253 return show_guest_stack(regs);
255 printk("Xen stack trace from "__OP"sp=%p:\n ", stack);
257 for ( i = 0; i < (debug_stack_lines*stack_words_per_line); i++ )
258 {
259 if ( ((long)stack & (STACK_SIZE-BYTES_PER_LONG)) == 0 )
260 break;
261 if ( (i != 0) && ((i % stack_words_per_line) == 0) )
262 printk("\n ");
263 addr = *stack++;
264 printk(" %p", _p(addr));
265 }
266 if ( i == 0 )
267 printk("Stack empty.");
268 printk("\n");
270 show_trace(regs);
271 }
273 void show_xen_trace()
274 {
275 struct cpu_user_regs regs;
276 #ifdef __x86_64
277 __asm__("movq %%rsp,%0" : "=m" (regs.rsp));
278 __asm__("movq %%rbp,%0" : "=m" (regs.rbp));
279 __asm__("leaq 0(%%rip),%0" : "=a" (regs.rip));
280 #else
281 __asm__("movl %%esp,%0" : "=m" (regs.esp));
282 __asm__("movl %%ebp,%0" : "=m" (regs.ebp));
283 __asm__("call 1f; 1: popl %0" : "=a" (regs.eip));
284 #endif
285 show_trace(&regs);
286 }
288 void show_stack_overflow(unsigned long esp)
289 {
290 #ifdef MEMORY_GUARD
291 unsigned long esp_top;
292 unsigned long *stack, addr;
294 esp_top = (esp | (STACK_SIZE - 1)) - DEBUG_STACK_SIZE;
296 /* Trigger overflow trace if %esp is within 512 bytes of the guard page. */
297 if ( ((unsigned long)(esp - esp_top) > 512) &&
298 ((unsigned long)(esp_top - esp) > 512) )
299 return;
301 if ( esp < esp_top )
302 esp = esp_top;
304 printk("Xen stack overflow:\n ");
306 stack = (unsigned long *)esp;
307 while ( ((long)stack & (STACK_SIZE-BYTES_PER_LONG)) != 0 )
308 {
309 addr = *stack++;
310 if ( is_kernel_text(addr) || is_kernel_inittext(addr) )
311 {
312 printk("%p: [<%p>]", stack, _p(addr));
313 print_symbol(" %s\n ", addr);
314 }
315 }
317 printk("\n");
318 #endif
319 }
321 void show_execution_state(struct cpu_user_regs *regs)
322 {
323 show_registers(regs);
324 show_stack(regs);
325 }
327 char *trapstr(int trapnr)
328 {
329 static char *strings[] = {
330 "divide error", "debug", "nmi", "bkpt", "overflow", "bounds",
331 "invalid opcode", "device not available", "double fault",
332 "coprocessor segment", "invalid tss", "segment not found",
333 "stack error", "general protection fault", "page fault",
334 "spurious interrupt", "coprocessor error", "alignment check",
335 "machine check", "simd error"
336 };
338 if ( (trapnr < 0) || (trapnr >= ARRAY_SIZE(strings)) )
339 return "???";
341 return strings[trapnr];
342 }
344 /*
345 * This is called for faults at very unexpected times (e.g., when interrupts
346 * are disabled). In such situations we can't do much that is safe. We try to
347 * print out some tracing and then we just spin.
348 */
349 asmlinkage void fatal_trap(int trapnr, struct cpu_user_regs *regs)
350 {
351 watchdog_disable();
352 console_start_sync();
354 show_execution_state(regs);
356 if ( trapnr == TRAP_page_fault )
357 {
358 unsigned long cr2 = read_cr2();
359 printk("Faulting linear address: %p\n", _p(cr2));
360 show_page_walk(cr2);
361 }
363 panic("FATAL TRAP: vector = %d (%s)\n"
364 "[error_code=%04x] %s\n",
365 trapnr, trapstr(trapnr), regs->error_code,
366 (regs->eflags & X86_EFLAGS_IF) ? "" : ", IN INTERRUPT CONTEXT");
367 }
369 static int do_guest_trap(
370 int trapnr, const struct cpu_user_regs *regs, int use_error_code)
371 {
372 struct vcpu *v = current;
373 struct trap_bounce *tb;
374 const struct trap_info *ti;
376 tb = &v->arch.trap_bounce;
377 ti = &v->arch.guest_context.trap_ctxt[trapnr];
379 tb->flags = TBF_EXCEPTION;
380 tb->cs = ti->cs;
381 tb->eip = ti->address;
383 if ( use_error_code )
384 {
385 tb->flags |= TBF_EXCEPTION_ERRCODE;
386 tb->error_code = regs->error_code;
387 }
389 if ( TI_GET_IF(ti) )
390 tb->flags |= TBF_INTERRUPT;
392 if ( unlikely(null_trap_bounce(v, tb)) )
393 gdprintk(XENLOG_WARNING, "Unhandled %s fault/trap [#%d] in "
394 "domain %d on VCPU %d [ec=%04x]\n",
395 trapstr(trapnr), trapnr, v->domain->domain_id, v->vcpu_id,
396 regs->error_code);
398 return 0;
399 }
401 static inline int do_trap(
402 int trapnr, struct cpu_user_regs *regs, int use_error_code)
403 {
404 unsigned long fixup;
406 DEBUGGER_trap_entry(trapnr, regs);
408 if ( guest_mode(regs) )
409 return do_guest_trap(trapnr, regs, use_error_code);
411 if ( likely((fixup = search_exception_table(regs->eip)) != 0) )
412 {
413 dprintk(XENLOG_ERR, "Trap %d: %p -> %p\n",
414 trapnr, _p(regs->eip), _p(fixup));
415 regs->eip = fixup;
416 return 0;
417 }
419 DEBUGGER_trap_fatal(trapnr, regs);
421 show_execution_state(regs);
422 panic("FATAL TRAP: vector = %d (%s)\n"
423 "[error_code=%04x]\n",
424 trapnr, trapstr(trapnr), regs->error_code);
425 return 0;
426 }
428 #define DO_ERROR_NOCODE(trapnr, name) \
429 asmlinkage int do_##name(struct cpu_user_regs *regs) \
430 { \
431 return do_trap(trapnr, regs, 0); \
432 }
434 #define DO_ERROR(trapnr, name) \
435 asmlinkage int do_##name(struct cpu_user_regs *regs) \
436 { \
437 return do_trap(trapnr, regs, 1); \
438 }
440 DO_ERROR_NOCODE(TRAP_divide_error, divide_error)
441 DO_ERROR_NOCODE(TRAP_overflow, overflow)
442 DO_ERROR_NOCODE(TRAP_bounds, bounds)
443 DO_ERROR_NOCODE(TRAP_copro_seg, coprocessor_segment_overrun)
444 DO_ERROR( TRAP_invalid_tss, invalid_TSS)
445 DO_ERROR( TRAP_no_segment, segment_not_present)
446 DO_ERROR( TRAP_stack_error, stack_segment)
447 DO_ERROR_NOCODE(TRAP_copro_error, coprocessor_error)
448 DO_ERROR( TRAP_alignment_check, alignment_check)
449 DO_ERROR_NOCODE(TRAP_simd_error, simd_coprocessor_error)
451 int rdmsr_hypervisor_regs(
452 uint32_t idx, uint32_t *eax, uint32_t *edx)
453 {
454 idx -= 0x40000000;
455 if ( idx > 0 )
456 return 0;
458 *eax = *edx = 0;
459 return 1;
460 }
462 int wrmsr_hypervisor_regs(
463 uint32_t idx, uint32_t eax, uint32_t edx)
464 {
465 struct domain *d = current->domain;
467 idx -= 0x40000000;
468 if ( idx > 0 )
469 return 0;
471 switch ( idx )
472 {
473 case 0:
474 {
475 void *hypercall_page;
476 unsigned long mfn;
477 unsigned long gmfn = ((unsigned long)edx << 20) | (eax >> 12);
478 unsigned int idx = eax & 0xfff;
480 if ( idx > 0 )
481 {
482 gdprintk(XENLOG_WARNING,
483 "Dom%d: Out of range index %u to MSR %08x\n",
484 d->domain_id, idx, 0x40000000);
485 return 0;
486 }
488 mfn = gmfn_to_mfn(d, gmfn);
490 if ( !mfn_valid(mfn) ||
491 !get_page_and_type(mfn_to_page(mfn), d, PGT_writable_page) )
492 {
493 gdprintk(XENLOG_WARNING,
494 "Dom%d: Bad GMFN %lx (MFN %lx) to MSR %08x\n",
495 d->domain_id, gmfn, mfn, 0x40000000);
496 return 0;
497 }
499 hypercall_page = map_domain_page(mfn);
500 hypercall_page_initialise(d, hypercall_page);
501 unmap_domain_page(hypercall_page);
503 put_page_and_type(mfn_to_page(mfn));
504 break;
505 }
507 default:
508 BUG();
509 }
511 return 1;
512 }
514 int cpuid_hypervisor_leaves(
515 uint32_t idx, uint32_t *eax, uint32_t *ebx, uint32_t *ecx, uint32_t *edx)
516 {
517 idx -= 0x40000000;
518 if ( idx > 2 )
519 return 0;
521 switch ( idx )
522 {
523 case 0:
524 *eax = 0x40000002; /* Largest leaf */
525 *ebx = 0x566e6558; /* Signature 1: "XenV" */
526 *ecx = 0x65584d4d; /* Signature 2: "MMXe" */
527 *edx = 0x4d4d566e; /* Signature 3: "nVMM" */
528 break;
530 case 1:
531 *eax = (xen_major_version() << 16) | xen_minor_version();
532 *ebx = 0; /* Reserved */
533 *ecx = 0; /* Reserved */
534 *edx = 0; /* Reserved */
535 break;
537 case 2:
538 *eax = 1; /* Number of hypercall-transfer pages */
539 *ebx = 0x40000000; /* MSR base address */
540 *ecx = 0; /* Features 1 */
541 *edx = 0; /* Features 2 */
542 break;
544 default:
545 BUG();
546 }
548 return 1;
549 }
551 static int emulate_forced_invalid_op(struct cpu_user_regs *regs)
552 {
553 char sig[5], instr[2];
554 uint32_t a, b, c, d;
555 unsigned long eip, rc;
557 a = regs->eax;
558 b = regs->ebx;
559 c = regs->ecx;
560 d = regs->edx;
561 eip = regs->eip;
563 /* Check for forced emulation signature: ud2 ; .ascii "xen". */
564 if ( (rc = copy_from_user(sig, (char *)eip, sizeof(sig))) != 0 )
565 {
566 propagate_page_fault(eip + sizeof(sig) - rc, 0);
567 return EXCRET_fault_fixed;
568 }
569 if ( memcmp(sig, "\xf\xbxen", sizeof(sig)) )
570 return 0;
571 eip += sizeof(sig);
573 /* We only emulate CPUID. */
574 if ( ( rc = copy_from_user(instr, (char *)eip, sizeof(instr))) != 0 )
575 {
576 propagate_page_fault(eip + sizeof(instr) - rc, 0);
577 return EXCRET_fault_fixed;
578 }
579 if ( memcmp(instr, "\xf\xa2", sizeof(instr)) )
580 return 0;
581 eip += sizeof(instr);
583 __asm__ (
584 "cpuid"
585 : "=a" (a), "=b" (b), "=c" (c), "=d" (d)
586 : "0" (a), "1" (b), "2" (c), "3" (d) );
588 if ( regs->eax == 1 )
589 {
590 /* Modify Feature Information. */
591 clear_bit(X86_FEATURE_VME, &d);
592 clear_bit(X86_FEATURE_DE, &d);
593 clear_bit(X86_FEATURE_PSE, &d);
594 clear_bit(X86_FEATURE_PGE, &d);
595 if ( !supervisor_mode_kernel )
596 clear_bit(X86_FEATURE_SEP, &d);
597 if ( !IS_PRIV(current->domain) )
598 clear_bit(X86_FEATURE_MTRR, &d);
599 }
600 else if ( regs->eax == 0x80000001 )
601 {
602 /* Modify Feature Information. */
603 clear_bit(X86_FEATURE_RDTSCP % 32, &d);
604 }
605 else
606 {
607 (void)cpuid_hypervisor_leaves(regs->eax, &a, &b, &c, &d);
608 }
610 regs->eax = a;
611 regs->ebx = b;
612 regs->ecx = c;
613 regs->edx = d;
614 regs->eip = eip;
616 return EXCRET_fault_fixed;
617 }
619 asmlinkage int do_invalid_op(struct cpu_user_regs *regs)
620 {
621 struct bug_frame bug;
622 struct bug_frame_str bug_str;
623 char *filename, *predicate, *eip = (char *)regs->eip;
624 int rc, id, lineno;
626 DEBUGGER_trap_entry(TRAP_invalid_op, regs);
628 if ( likely(guest_mode(regs)) )
629 {
630 if ( (rc = emulate_forced_invalid_op(regs)) != 0 )
631 return rc;
632 return do_guest_trap(TRAP_invalid_op, regs, 0);
633 }
635 if ( !is_kernel(eip) ||
636 __copy_from_user(&bug, eip, sizeof(bug)) ||
637 memcmp(bug.ud2, "\xf\xb", sizeof(bug.ud2)) ||
638 (bug.ret != 0xc2) )
639 goto die;
641 id = bug.id & 3;
642 if ( id == BUGFRAME_rsvd )
643 goto die;
645 if ( id == BUGFRAME_dump )
646 {
647 show_execution_state(regs);
648 regs->eip += sizeof(bug);
649 return EXCRET_fault_fixed;
650 }
652 /* BUG() or ASSERT(): decode the filename pointer and line number. */
653 ASSERT((id == BUGFRAME_bug) || (id == BUGFRAME_assert));
654 eip += sizeof(bug);
655 if ( !is_kernel(eip) ||
656 __copy_from_user(&bug_str, eip, sizeof(bug_str)) ||
657 memcmp(bug_str.mov, BUG_MOV_STR, sizeof(bug_str.mov)) )
658 goto die;
660 filename = is_kernel(bug_str.str) ? (char *)bug_str.str : "<unknown>";
661 lineno = bug.id >> 2;
663 if ( id == BUGFRAME_bug )
664 {
665 printk("Xen BUG at %.50s:%d\n", filename, lineno);
666 DEBUGGER_trap_fatal(TRAP_invalid_op, regs);
667 show_execution_state(regs);
668 panic("Xen BUG at %.50s:%d\n", filename, lineno);
669 }
671 /* ASSERT(): decode the predicate string pointer. */
672 ASSERT(id == BUGFRAME_assert);
673 eip += sizeof(bug_str);
674 if ( !is_kernel(eip) ||
675 __copy_from_user(&bug_str, eip, sizeof(bug_str)) ||
676 memcmp(bug_str.mov, BUG_MOV_STR, sizeof(bug_str.mov)) )
677 goto die;
679 predicate = is_kernel(bug_str.str) ? (char *)bug_str.str : "<unknown>";
680 printk("Assertion '%s' failed at %.50s:%d\n",
681 predicate, filename, lineno);
682 DEBUGGER_trap_fatal(TRAP_invalid_op, regs);
683 show_execution_state(regs);
684 panic("Assertion '%s' failed at %.50s:%d\n",
685 predicate, filename, lineno);
687 die:
688 DEBUGGER_trap_fatal(TRAP_invalid_op, regs);
689 show_execution_state(regs);
690 panic("FATAL TRAP: vector = %d (invalid opcode)\n", TRAP_invalid_op);
691 return 0;
692 }
694 asmlinkage int do_int3(struct cpu_user_regs *regs)
695 {
696 DEBUGGER_trap_entry(TRAP_int3, regs);
698 if ( !guest_mode(regs) )
699 {
700 DEBUGGER_trap_fatal(TRAP_int3, regs);
701 show_execution_state(regs);
702 panic("FATAL TRAP: vector = 3 (Int3)\n");
703 }
705 return do_guest_trap(TRAP_int3, regs, 0);
706 }
708 asmlinkage int do_machine_check(struct cpu_user_regs *regs)
709 {
710 fatal_trap(TRAP_machine_check, regs);
711 return 0;
712 }
714 void propagate_page_fault(unsigned long addr, u16 error_code)
715 {
716 struct trap_info *ti;
717 struct vcpu *v = current;
718 struct trap_bounce *tb = &v->arch.trap_bounce;
720 v->arch.guest_context.ctrlreg[2] = addr;
721 arch_set_cr2(v, addr);
723 /* Re-set error_code.user flag appropriately for the guest. */
724 error_code &= ~PFEC_user_mode;
725 if ( !guest_kernel_mode(v, guest_cpu_user_regs()) )
726 error_code |= PFEC_user_mode;
728 ti = &v->arch.guest_context.trap_ctxt[TRAP_page_fault];
729 tb->flags = TBF_EXCEPTION | TBF_EXCEPTION_ERRCODE;
730 tb->error_code = error_code;
731 tb->cs = ti->cs;
732 tb->eip = ti->address;
733 if ( TI_GET_IF(ti) )
734 tb->flags |= TBF_INTERRUPT;
735 if ( unlikely(null_trap_bounce(v, tb)) )
736 {
737 printk("Unhandled page fault in domain %d on VCPU %d (ec=%04X)\n",
738 v->domain->domain_id, v->vcpu_id, error_code);
739 show_page_walk(addr);
740 }
741 }
743 static int handle_gdt_ldt_mapping_fault(
744 unsigned long offset, struct cpu_user_regs *regs)
745 {
746 /* Which vcpu's area did we fault in, and is it in the ldt sub-area? */
747 unsigned int is_ldt_area = (offset >> (GDT_LDT_VCPU_VA_SHIFT-1)) & 1;
748 unsigned int vcpu_area = (offset >> GDT_LDT_VCPU_VA_SHIFT);
750 /* Should never fault in another vcpu's area. */
751 BUG_ON(vcpu_area != current->vcpu_id);
753 /* Byte offset within the gdt/ldt sub-area. */
754 offset &= (1UL << (GDT_LDT_VCPU_VA_SHIFT-1)) - 1UL;
756 if ( likely(is_ldt_area) )
757 {
758 /* LDT fault: Copy a mapping from the guest's LDT, if it is valid. */
759 if ( unlikely(map_ldt_shadow_page(offset >> PAGE_SHIFT) == 0) )
760 {
761 /* In hypervisor mode? Leave it to the #PF handler to fix up. */
762 if ( !guest_mode(regs) )
763 return 0;
764 /* In guest mode? Propagate #PF to guest, with adjusted %cr2. */
765 propagate_page_fault(
766 current->arch.guest_context.ldt_base + offset,
767 regs->error_code);
768 }
769 }
770 else
771 {
772 /* GDT fault: handle the fault as #GP(selector). */
773 regs->error_code = (u16)offset & ~7;
774 (void)do_general_protection(regs);
775 }
777 return EXCRET_fault_fixed;
778 }
780 #ifdef HYPERVISOR_VIRT_END
781 #define IN_HYPERVISOR_RANGE(va) \
782 (((va) >= HYPERVISOR_VIRT_START) && ((va) < HYPERVISOR_VIRT_END))
783 #else
784 #define IN_HYPERVISOR_RANGE(va) \
785 (((va) >= HYPERVISOR_VIRT_START))
786 #endif
788 static int __spurious_page_fault(
789 unsigned long addr, struct cpu_user_regs *regs)
790 {
791 unsigned long mfn, cr3 = read_cr3();
792 #if CONFIG_PAGING_LEVELS >= 4
793 l4_pgentry_t l4e, *l4t;
794 #endif
795 #if CONFIG_PAGING_LEVELS >= 3
796 l3_pgentry_t l3e, *l3t;
797 #endif
798 l2_pgentry_t l2e, *l2t;
799 l1_pgentry_t l1e, *l1t;
800 unsigned int required_flags, disallowed_flags;
802 /* Reserved bit violations are never spurious faults. */
803 if ( regs->error_code & PFEC_reserved_bit )
804 return 0;
806 required_flags = _PAGE_PRESENT;
807 if ( regs->error_code & PFEC_write_access )
808 required_flags |= _PAGE_RW;
809 if ( regs->error_code & PFEC_user_mode )
810 required_flags |= _PAGE_USER;
812 disallowed_flags = 0;
813 if ( regs->error_code & PFEC_insn_fetch )
814 disallowed_flags |= _PAGE_NX;
816 mfn = cr3 >> PAGE_SHIFT;
818 #if CONFIG_PAGING_LEVELS >= 4
819 l4t = map_domain_page(mfn);
820 l4e = l4e_read_atomic(&l4t[l4_table_offset(addr)]);
821 mfn = l4e_get_pfn(l4e);
822 unmap_domain_page(l4t);
823 if ( ((l4e_get_flags(l4e) & required_flags) != required_flags) ||
824 (l4e_get_flags(l4e) & disallowed_flags) )
825 return 0;
826 #endif
828 #if CONFIG_PAGING_LEVELS >= 3
829 l3t = map_domain_page(mfn);
830 #ifdef CONFIG_X86_PAE
831 l3t += (cr3 & 0xFE0UL) >> 3;
832 #endif
833 l3e = l3e_read_atomic(&l3t[l3_table_offset(addr)]);
834 mfn = l3e_get_pfn(l3e);
835 unmap_domain_page(l3t);
836 #ifdef CONFIG_X86_PAE
837 if ( !(l3e_get_flags(l3e) & _PAGE_PRESENT) )
838 return 0;
839 #else
840 if ( ((l3e_get_flags(l3e) & required_flags) != required_flags) ||
841 (l3e_get_flags(l3e) & disallowed_flags) )
842 return 0;
843 #endif
844 #endif
846 l2t = map_domain_page(mfn);
847 l2e = l2e_read_atomic(&l2t[l2_table_offset(addr)]);
848 mfn = l2e_get_pfn(l2e);
849 unmap_domain_page(l2t);
850 if ( ((l2e_get_flags(l2e) & required_flags) != required_flags) ||
851 (l2e_get_flags(l2e) & disallowed_flags) )
852 return 0;
853 if ( l2e_get_flags(l2e) & _PAGE_PSE )
854 {
855 l1e = l1e_empty(); /* define before use in debug tracing */
856 goto spurious;
857 }
859 l1t = map_domain_page(mfn);
860 l1e = l1e_read_atomic(&l1t[l1_table_offset(addr)]);
861 mfn = l1e_get_pfn(l1e);
862 unmap_domain_page(l1t);
863 if ( ((l1e_get_flags(l1e) & required_flags) != required_flags) ||
864 (l1e_get_flags(l1e) & disallowed_flags) )
865 return 0;
867 spurious:
868 dprintk(XENLOG_WARNING, "Spurious fault in domain %u:%u "
869 "at addr %lx, e/c %04x\n",
870 current->domain->domain_id, current->vcpu_id,
871 addr, regs->error_code);
872 #if CONFIG_PAGING_LEVELS >= 4
873 dprintk(XENLOG_WARNING, " l4e = %"PRIpte"\n", l4e_get_intpte(l4e));
874 #endif
875 #if CONFIG_PAGING_LEVELS >= 3
876 dprintk(XENLOG_WARNING, " l3e = %"PRIpte"\n", l3e_get_intpte(l3e));
877 #endif
878 dprintk(XENLOG_WARNING, " l2e = %"PRIpte"\n", l2e_get_intpte(l2e));
879 dprintk(XENLOG_WARNING, " l1e = %"PRIpte"\n", l1e_get_intpte(l1e));
880 #ifndef NDEBUG
881 show_registers(regs);
882 #endif
883 return 1;
884 }
886 static int spurious_page_fault(
887 unsigned long addr, struct cpu_user_regs *regs)
888 {
889 unsigned long flags;
890 int is_spurious;
892 /*
893 * Disabling interrupts prevents TLB flushing, and hence prevents
894 * page tables from becoming invalid under our feet during the walk.
895 */
896 local_irq_save(flags);
897 is_spurious = __spurious_page_fault(addr, regs);
898 local_irq_restore(flags);
900 return is_spurious;
901 }
903 static int fixup_page_fault(unsigned long addr, struct cpu_user_regs *regs)
904 {
905 struct vcpu *v = current;
906 struct domain *d = v->domain;
908 if ( unlikely(IN_HYPERVISOR_RANGE(addr)) )
909 {
910 if ( paging_mode_external(d) && guest_mode(regs) )
911 return paging_fault(addr, regs);
912 if ( (addr >= GDT_LDT_VIRT_START) && (addr < GDT_LDT_VIRT_END) )
913 return handle_gdt_ldt_mapping_fault(
914 addr - GDT_LDT_VIRT_START, regs);
915 return 0;
916 }
918 ASSERT(!in_irq());
919 ASSERT(regs->eflags & X86_EFLAGS_IF);
921 if ( VM_ASSIST(d, VMASST_TYPE_writable_pagetables) &&
922 guest_kernel_mode(v, regs) &&
923 /* Do not check if access-protection fault since the page may
924 legitimately be not present in shadow page tables */
925 ((regs->error_code & PFEC_write_access) == PFEC_write_access) &&
926 ptwr_do_page_fault(v, addr, regs) )
927 return EXCRET_fault_fixed;
929 if ( paging_mode_enabled(d) )
930 return paging_fault(addr, regs);
932 return 0;
933 }
935 /*
936 * #PF error code:
937 * Bit 0: Protection violation (=1) ; Page not present (=0)
938 * Bit 1: Write access
939 * Bit 2: User mode (=1) ; Supervisor mode (=0)
940 * Bit 3: Reserved bit violation
941 * Bit 4: Instruction fetch
942 */
943 asmlinkage int do_page_fault(struct cpu_user_regs *regs)
944 {
945 unsigned long addr, fixup;
946 int rc;
948 addr = read_cr2();
950 DEBUGGER_trap_entry(TRAP_page_fault, regs);
952 perfc_incrc(page_faults);
954 if ( unlikely((rc = fixup_page_fault(addr, regs)) != 0) )
955 return rc;
957 if ( unlikely(!guest_mode(regs)) )
958 {
959 if ( spurious_page_fault(addr, regs) )
960 return EXCRET_not_a_fault;
962 if ( likely((fixup = search_exception_table(regs->eip)) != 0) )
963 {
964 perfc_incrc(copy_user_faults);
965 regs->eip = fixup;
966 return 0;
967 }
969 DEBUGGER_trap_fatal(TRAP_page_fault, regs);
971 show_execution_state(regs);
972 show_page_walk(addr);
973 panic("FATAL PAGE FAULT\n"
974 "[error_code=%04x]\n"
975 "Faulting linear address: %p\n",
976 regs->error_code, _p(addr));
977 }
979 propagate_page_fault(addr, regs->error_code);
980 return 0;
981 }
983 /*
984 * Early handler to deal with spurious page faults. For example, consider a
985 * routine that uses a mapping immediately after installing it (making it
986 * present). The CPU may speculatively execute the memory access before
987 * executing the PTE write. The instruction will then be marked to cause a
988 * page fault when it is retired, despite the fact that the PTE is present and
989 * correct at that point in time.
990 */
991 asmlinkage int do_early_page_fault(struct cpu_user_regs *regs)
992 {
993 static int stuck;
994 static unsigned long prev_eip, prev_cr2;
995 unsigned long cr2 = read_cr2();
997 BUG_ON(smp_processor_id() != 0);
999 if ( (regs->eip != prev_eip) || (cr2 != prev_cr2) )
1001 prev_eip = regs->eip;
1002 prev_cr2 = cr2;
1003 stuck = 0;
1004 return EXCRET_not_a_fault;
1007 if ( stuck++ == 1000 )
1008 panic("Early fatal page fault at %04x:%p (cr2=%p, ec=%04x)\n",
1009 regs->cs, _p(regs->eip), _p(cr2), regs->error_code);
1011 return EXCRET_not_a_fault;
1014 long do_fpu_taskswitch(int set)
1016 struct vcpu *v = current;
1018 if ( set )
1020 v->arch.guest_context.ctrlreg[0] |= X86_CR0_TS;
1021 stts();
1023 else
1025 v->arch.guest_context.ctrlreg[0] &= ~X86_CR0_TS;
1026 if ( test_bit(_VCPUF_fpu_dirtied, &v->vcpu_flags) )
1027 clts();
1030 return 0;
1033 static int read_descriptor(unsigned int sel,
1034 const struct vcpu *v,
1035 const struct cpu_user_regs * regs,
1036 unsigned long *base,
1037 unsigned long *limit,
1038 unsigned int *ar,
1039 unsigned int vm86attr)
1041 struct desc_struct desc;
1043 if ( !vm86_mode(regs) )
1045 if ( sel < 4)
1046 desc.b = desc.a = 0;
1047 else if ( __get_user(desc,
1048 (const struct desc_struct *)(!(sel & 4)
1049 ? GDT_VIRT_START(v)
1050 : LDT_VIRT_START(v))
1051 + (sel >> 3)) )
1052 return 0;
1053 if ( !(vm86attr & _SEGMENT_CODE) )
1054 desc.b &= ~_SEGMENT_L;
1056 else
1058 desc.a = (sel << 20) | 0xffff;
1059 desc.b = vm86attr | (sel >> 12);
1062 *ar = desc.b & 0x00f0ff00;
1063 if ( !(desc.b & _SEGMENT_L) )
1065 *base = (desc.a >> 16) + ((desc.b & 0xff) << 16) + (desc.b & 0xff000000);
1066 *limit = (desc.a & 0xffff) | (desc.b & 0x000f0000);
1067 if ( desc.b & _SEGMENT_G )
1068 *limit = ((*limit + 1) << 12) - 1;
1069 #ifndef NDEBUG
1070 if ( !vm86_mode(regs) && sel > 3 )
1072 unsigned int a, l;
1073 unsigned char valid;
1075 __asm__("larl %2, %0\n\tsetz %1" : "=r" (a), "=rm" (valid) : "rm" (sel));
1076 BUG_ON(valid && (a & 0x00f0ff00) != *ar);
1077 __asm__("lsll %2, %0\n\tsetz %1" : "=r" (l), "=rm" (valid) : "rm" (sel));
1078 BUG_ON(valid && l != *limit);
1080 #endif
1082 else
1084 *base = 0UL;
1085 *limit = ~0UL;
1088 return 1;
1091 /* Has the guest requested sufficient permission for this I/O access? */
1092 static inline int guest_io_okay(
1093 unsigned int port, unsigned int bytes,
1094 struct vcpu *v, struct cpu_user_regs *regs)
1096 #if defined(__x86_64__)
1097 /* If in user mode, switch to kernel mode just to read I/O bitmap. */
1098 int user_mode = !(v->arch.flags & TF_kernel_mode);
1099 #define TOGGLE_MODE() if ( user_mode ) toggle_guest_mode(v)
1100 #elif defined(__i386__)
1101 #define TOGGLE_MODE() ((void)0)
1102 #endif
1104 if ( !vm86_mode(regs) &&
1105 (v->arch.iopl >= (guest_kernel_mode(v, regs) ? 1 : 3)) )
1106 return 1;
1108 if ( v->arch.iobmp_limit > (port + bytes) )
1110 union { uint8_t bytes[2]; uint16_t mask; } x;
1112 /*
1113 * Grab permission bytes from guest space. Inaccessible bytes are
1114 * read as 0xff (no access allowed).
1115 */
1116 TOGGLE_MODE();
1117 switch ( __copy_from_guest_offset(&x.bytes[0], v->arch.iobmp,
1118 port>>3, 2) )
1120 default: x.bytes[0] = ~0;
1121 case 1: x.bytes[1] = ~0;
1122 case 0: break;
1124 TOGGLE_MODE();
1126 if ( (x.mask & (((1<<bytes)-1) << (port&7))) == 0 )
1127 return 1;
1130 return 0;
1133 /* Has the administrator granted sufficient permission for this I/O access? */
1134 static inline int admin_io_okay(
1135 unsigned int port, unsigned int bytes,
1136 struct vcpu *v, struct cpu_user_regs *regs)
1138 return ioports_access_permitted(v->domain, port, port + bytes - 1);
1141 #define guest_inb_okay(_p, _d, _r) admin_io_okay(_p, 1, _d, _r)
1142 #define guest_inw_okay(_p, _d, _r) admin_io_okay(_p, 2, _d, _r)
1143 #define guest_inl_okay(_p, _d, _r) admin_io_okay(_p, 4, _d, _r)
1144 #define guest_outb_okay(_p, _d, _r) admin_io_okay(_p, 1, _d, _r)
1145 #define guest_outw_okay(_p, _d, _r) admin_io_okay(_p, 2, _d, _r)
1146 #define guest_outl_okay(_p, _d, _r) admin_io_okay(_p, 4, _d, _r)
1148 /* I/O emulation support. Helper routines for, and type of, the stack stub.*/
1149 void host_to_guest_gpr_switch(struct cpu_user_regs *)
1150 __attribute__((__regparm__(1)));
1151 unsigned long guest_to_host_gpr_switch(unsigned long)
1152 __attribute__((__regparm__(1)));
1154 /* Instruction fetch with error handling. */
1155 #define insn_fetch(type, base, eip, limit) \
1156 ({ unsigned long _rc, _ptr = (base) + (eip); \
1157 type _x; \
1158 if ( (limit) < sizeof(_x) - 1 || (eip) > (limit) - (sizeof(_x) - 1) ) \
1159 goto fail; \
1160 if ( (_rc = copy_from_user(&_x, (type *)_ptr, sizeof(_x))) != 0 ) \
1161 { \
1162 propagate_page_fault(_ptr + sizeof(_x) - _rc, 0); \
1163 return EXCRET_fault_fixed; \
1164 } \
1165 (eip) += sizeof(_x); _x; })
1167 #if defined(CONFIG_X86_32)
1168 # define read_sreg(regs, sr) ((regs)->sr)
1169 #elif defined(CONFIG_X86_64)
1170 # define read_sreg(regs, sr) read_segment_register(sr)
1171 #endif
1173 static int emulate_privileged_op(struct cpu_user_regs *regs)
1175 struct vcpu *v = current;
1176 unsigned long *reg, eip = regs->eip, res;
1177 u8 opcode, modrm_reg = 0, modrm_rm = 0, rep_prefix = 0, lock = 0, rex = 0;
1178 enum { lm_seg_none, lm_seg_fs, lm_seg_gs } lm_ovr = lm_seg_none;
1179 unsigned int port, i, data_sel, ar, data, rc;
1180 unsigned int op_bytes, op_default, ad_bytes, ad_default;
1181 #define rd_ad(reg) (ad_bytes >= sizeof(regs->reg) \
1182 ? regs->reg \
1183 : ad_bytes == 4 \
1184 ? (u32)regs->reg \
1185 : (u16)regs->reg)
1186 #define wr_ad(reg, val) (ad_bytes >= sizeof(regs->reg) \
1187 ? regs->reg = (val) \
1188 : ad_bytes == 4 \
1189 ? (*(u32 *)&regs->reg = (val)) \
1190 : (*(u16 *)&regs->reg = (val)))
1191 unsigned long code_base, code_limit;
1192 char io_emul_stub[16];
1193 void (*io_emul)(struct cpu_user_regs *) __attribute__((__regparm__(1)));
1194 u32 l, h;
1196 if ( !read_descriptor(regs->cs, v, regs,
1197 &code_base, &code_limit, &ar,
1198 _SEGMENT_CODE|_SEGMENT_S|_SEGMENT_DPL|_SEGMENT_P) )
1199 goto fail;
1200 op_default = op_bytes = (ar & (_SEGMENT_L|_SEGMENT_DB)) ? 4 : 2;
1201 ad_default = ad_bytes = (ar & _SEGMENT_L) ? 8 : op_default;
1202 if ( !(ar & _SEGMENT_S) ||
1203 !(ar & _SEGMENT_P) ||
1204 !(ar & _SEGMENT_CODE) )
1205 goto fail;
1207 /* emulating only opcodes not allowing SS to be default */
1208 data_sel = read_sreg(regs, ds);
1210 /* Legacy prefixes. */
1211 for ( i = 0; i < 8; i++, rex == opcode || (rex = 0) )
1213 switch ( opcode = insn_fetch(u8, code_base, eip, code_limit) )
1215 case 0x66: /* operand-size override */
1216 op_bytes = op_default ^ 6; /* switch between 2/4 bytes */
1217 continue;
1218 case 0x67: /* address-size override */
1219 ad_bytes = ad_default != 4 ? 4 : 2; /* switch to 2/4 bytes */
1220 continue;
1221 case 0x2e: /* CS override */
1222 data_sel = regs->cs;
1223 continue;
1224 case 0x3e: /* DS override */
1225 data_sel = read_sreg(regs, ds);
1226 continue;
1227 case 0x26: /* ES override */
1228 data_sel = read_sreg(regs, es);
1229 continue;
1230 case 0x64: /* FS override */
1231 data_sel = read_sreg(regs, fs);
1232 lm_ovr = lm_seg_fs;
1233 continue;
1234 case 0x65: /* GS override */
1235 data_sel = read_sreg(regs, gs);
1236 lm_ovr = lm_seg_gs;
1237 continue;
1238 case 0x36: /* SS override */
1239 data_sel = regs->ss;
1240 continue;
1241 case 0xf0: /* LOCK */
1242 lock = 1;
1243 continue;
1244 case 0xf2: /* REPNE/REPNZ */
1245 case 0xf3: /* REP/REPE/REPZ */
1246 rep_prefix = 1;
1247 continue;
1248 default:
1249 if ( (ar & _SEGMENT_L) && (opcode & 0xf0) == 0x40 )
1251 rex = opcode;
1252 continue;
1254 break;
1256 break;
1259 /* REX prefix. */
1260 if ( rex & 8 ) /* REX.W */
1261 op_bytes = 4; /* emulating only opcodes not supporting 64-bit operands */
1262 modrm_reg = (rex & 4) << 1; /* REX.R */
1263 /* REX.X does not need to be decoded. */
1264 modrm_rm = (rex & 1) << 3; /* REX.B */
1266 if ( opcode == 0x0f )
1267 goto twobyte_opcode;
1269 if ( lock )
1270 goto fail;
1272 /* Input/Output String instructions. */
1273 if ( (opcode >= 0x6c) && (opcode <= 0x6f) )
1275 unsigned long data_base, data_limit;
1277 if ( rep_prefix && (rd_ad(ecx) == 0) )
1278 goto done;
1280 if ( !(opcode & 2) )
1282 data_sel = read_sreg(regs, es);
1283 lm_ovr = lm_seg_none;
1286 if ( !(ar & _SEGMENT_L) )
1288 if ( !read_descriptor(data_sel, v, regs,
1289 &data_base, &data_limit, &ar,
1290 _SEGMENT_WR|_SEGMENT_S|_SEGMENT_DPL|_SEGMENT_P) )
1291 goto fail;
1292 if ( !(ar & _SEGMENT_S) ||
1293 !(ar & _SEGMENT_P) ||
1294 (opcode & 2 ?
1295 (ar & _SEGMENT_CODE) && !(ar & _SEGMENT_WR) :
1296 (ar & _SEGMENT_CODE) || !(ar & _SEGMENT_WR)) )
1297 goto fail;
1299 #ifdef CONFIG_X86_64
1300 else
1302 if ( lm_ovr == lm_seg_none || data_sel < 4 )
1304 switch ( lm_ovr )
1306 case lm_seg_none:
1307 data_base = 0UL;
1308 break;
1309 case lm_seg_fs:
1310 data_base = v->arch.guest_context.fs_base;
1311 break;
1312 case lm_seg_gs:
1313 if ( guest_kernel_mode(v, regs) )
1314 data_base = v->arch.guest_context.gs_base_kernel;
1315 else
1316 data_base = v->arch.guest_context.gs_base_user;
1317 break;
1320 else
1321 read_descriptor(data_sel, v, regs,
1322 &data_base, &data_limit, &ar,
1323 0);
1324 data_limit = ~0UL;
1325 ar = _SEGMENT_WR|_SEGMENT_S|_SEGMENT_DPL|_SEGMENT_P;
1327 #endif
1329 continue_io_string:
1330 switch ( opcode )
1332 case 0x6c: /* INSB */
1333 op_bytes = 1;
1334 case 0x6d: /* INSW/INSL */
1335 if ( data_limit < op_bytes - 1 ||
1336 rd_ad(edi) > data_limit - (op_bytes - 1) ||
1337 !guest_io_okay((u16)regs->edx, op_bytes, v, regs) )
1338 goto fail;
1339 port = (u16)regs->edx;
1340 switch ( op_bytes )
1342 case 1:
1343 /* emulate PIT counter 2 */
1344 data = (u8)(guest_inb_okay(port, v, regs) ? inb(port) :
1345 ((port == 0x42 || port == 0x43 || port == 0x61) ?
1346 pv_pit_handler(port, 0, 0) : ~0));
1347 break;
1348 case 2:
1349 data = (u16)(guest_inw_okay(port, v, regs) ? inw(port) : ~0);
1350 break;
1351 case 4:
1352 data = (u32)(guest_inl_okay(port, v, regs) ? inl(port) : ~0);
1353 break;
1355 if ( (rc = copy_to_user((void *)data_base + rd_ad(edi), &data, op_bytes)) != 0 )
1357 propagate_page_fault(data_base + rd_ad(edi) + op_bytes - rc,
1358 PFEC_write_access);
1359 return EXCRET_fault_fixed;
1361 wr_ad(edi, regs->edi + (int)((regs->eflags & EF_DF) ? -op_bytes : op_bytes));
1362 break;
1364 case 0x6e: /* OUTSB */
1365 op_bytes = 1;
1366 case 0x6f: /* OUTSW/OUTSL */
1367 if ( data_limit < op_bytes - 1 ||
1368 rd_ad(esi) > data_limit - (op_bytes - 1) ||
1369 !guest_io_okay((u16)regs->edx, op_bytes, v, regs) )
1370 goto fail;
1371 rc = copy_from_user(&data, (void *)data_base + rd_ad(esi), op_bytes);
1372 if ( rc != 0 )
1374 propagate_page_fault(data_base + rd_ad(esi) + op_bytes - rc, 0);
1375 return EXCRET_fault_fixed;
1377 port = (u16)regs->edx;
1378 switch ( op_bytes )
1380 case 1:
1381 if ( guest_outb_okay(port, v, regs) )
1382 outb((u8)data, port);
1383 else if ( port == 0x42 || port == 0x43 || port == 0x61 )
1384 pv_pit_handler(port, data, 1);
1385 break;
1386 case 2:
1387 if ( guest_outw_okay(port, v, regs) )
1388 outw((u16)data, port);
1389 break;
1390 case 4:
1391 if ( guest_outl_okay(port, v, regs) )
1392 outl((u32)data, port);
1393 break;
1395 wr_ad(esi, regs->esi + (int)((regs->eflags & EF_DF) ? -op_bytes : op_bytes));
1396 break;
1399 if ( rep_prefix && (wr_ad(ecx, regs->ecx - 1) != 0) )
1401 if ( !hypercall_preempt_check() )
1402 goto continue_io_string;
1403 eip = regs->eip;
1406 goto done;
1409 /*
1410 * Very likely to be an I/O instruction (IN/OUT).
1411 * Build an on-stack stub to execute the instruction with full guest
1412 * GPR context. This is needed for some systems which (ab)use IN/OUT
1413 * to communicate with BIOS code in system-management mode.
1414 */
1415 /* call host_to_guest_gpr_switch */
1416 io_emul_stub[0] = 0xe8;
1417 *(s32 *)&io_emul_stub[1] =
1418 (char *)host_to_guest_gpr_switch - &io_emul_stub[5];
1419 /* data16 or nop */
1420 io_emul_stub[5] = (op_bytes != 2) ? 0x90 : 0x66;
1421 /* <io-access opcode> */
1422 io_emul_stub[6] = opcode;
1423 /* imm8 or nop */
1424 io_emul_stub[7] = 0x90;
1425 /* jmp guest_to_host_gpr_switch */
1426 io_emul_stub[8] = 0xe9;
1427 *(s32 *)&io_emul_stub[9] =
1428 (char *)guest_to_host_gpr_switch - &io_emul_stub[13];
1430 /* Handy function-typed pointer to the stub. */
1431 io_emul = (void *)io_emul_stub;
1433 /* I/O Port and Interrupt Flag instructions. */
1434 switch ( opcode )
1436 case 0xe4: /* IN imm8,%al */
1437 op_bytes = 1;
1438 case 0xe5: /* IN imm8,%eax */
1439 port = insn_fetch(u8, code_base, eip, code_limit);
1440 io_emul_stub[7] = port; /* imm8 */
1441 exec_in:
1442 if ( !guest_io_okay(port, op_bytes, v, regs) )
1443 goto fail;
1444 switch ( op_bytes )
1446 case 1:
1447 if ( guest_inb_okay(port, v, regs) )
1448 io_emul(regs);
1449 else if ( port == 0x42 || port == 0x43 || port == 0x61 )
1451 regs->eax &= ~0xffUL;
1452 regs->eax |= pv_pit_handler(port, 0, 0);
1454 else
1455 regs->eax |= (u8)~0;
1456 break;
1457 case 2:
1458 if ( guest_inw_okay(port, v, regs) )
1459 io_emul(regs);
1460 else
1461 regs->eax |= (u16)~0;
1462 break;
1463 case 4:
1464 if ( guest_inl_okay(port, v, regs) )
1465 io_emul(regs);
1466 else
1467 regs->eax = (u32)~0;
1468 break;
1470 goto done;
1472 case 0xec: /* IN %dx,%al */
1473 op_bytes = 1;
1474 case 0xed: /* IN %dx,%eax */
1475 port = (u16)regs->edx;
1476 goto exec_in;
1478 case 0xe6: /* OUT %al,imm8 */
1479 op_bytes = 1;
1480 case 0xe7: /* OUT %eax,imm8 */
1481 port = insn_fetch(u8, code_base, eip, code_limit);
1482 io_emul_stub[7] = port; /* imm8 */
1483 exec_out:
1484 if ( !guest_io_okay(port, op_bytes, v, regs) )
1485 goto fail;
1486 switch ( op_bytes )
1488 case 1:
1489 if ( guest_outb_okay(port, v, regs) )
1490 io_emul(regs);
1491 else if ( port == 0x42 || port == 0x43 || port == 0x61 )
1492 pv_pit_handler(port, regs->eax, 1);
1493 break;
1494 case 2:
1495 if ( guest_outw_okay(port, v, regs) )
1496 io_emul(regs);
1497 break;
1498 case 4:
1499 if ( guest_outl_okay(port, v, regs) )
1500 io_emul(regs);
1501 break;
1503 goto done;
1505 case 0xee: /* OUT %al,%dx */
1506 op_bytes = 1;
1507 case 0xef: /* OUT %eax,%dx */
1508 port = (u16)regs->edx;
1509 goto exec_out;
1511 case 0xfa: /* CLI */
1512 case 0xfb: /* STI */
1513 if ( v->arch.iopl < (guest_kernel_mode(v, regs) ? 1 : 3) )
1514 goto fail;
1515 /*
1516 * This is just too dangerous to allow, in my opinion. Consider if the
1517 * caller then tries to reenable interrupts using POPF: we can't trap
1518 * that and we'll end up with hard-to-debug lockups. Fast & loose will
1519 * do for us. :-)
1520 */
1521 /*v->vcpu_info->evtchn_upcall_mask = (opcode == 0xfa);*/
1522 goto done;
1525 /* No decode of this single-byte opcode. */
1526 goto fail;
1528 twobyte_opcode:
1529 /* Two-byte opcodes only emulated from guest kernel. */
1530 if ( !guest_kernel_mode(v, regs) )
1531 goto fail;
1533 /* Privileged (ring 0) instructions. */
1534 opcode = insn_fetch(u8, code_base, eip, code_limit);
1535 if ( lock && (opcode & ~3) != 0x20 )
1536 goto fail;
1537 switch ( opcode )
1539 case 0x06: /* CLTS */
1540 (void)do_fpu_taskswitch(0);
1541 break;
1543 case 0x09: /* WBINVD */
1544 /* Ignore the instruction if unprivileged. */
1545 if ( !cache_flush_permitted(v->domain) )
1546 /* Non-physdev domain attempted WBINVD; ignore for now since
1547 newer linux uses this in some start-of-day timing loops */
1549 else
1550 wbinvd();
1551 break;
1553 case 0x20: /* MOV CR?,<reg> */
1554 opcode = insn_fetch(u8, code_base, eip, code_limit);
1555 modrm_reg += ((opcode >> 3) & 7) + (lock << 3);
1556 modrm_rm |= (opcode >> 0) & 7;
1557 reg = decode_register(modrm_rm, regs, 0);
1558 switch ( modrm_reg )
1560 case 0: /* Read CR0 */
1561 *reg = (read_cr0() & ~X86_CR0_TS) |
1562 v->arch.guest_context.ctrlreg[0];
1563 break;
1565 case 2: /* Read CR2 */
1566 *reg = v->arch.guest_context.ctrlreg[2];
1567 break;
1569 case 3: /* Read CR3 */
1570 if ( !IS_COMPAT(v->domain) )
1571 *reg = xen_pfn_to_cr3(mfn_to_gmfn(
1572 v->domain, pagetable_get_pfn(v->arch.guest_table)));
1573 #ifdef CONFIG_COMPAT
1574 else
1575 *reg = compat_pfn_to_cr3(mfn_to_gmfn(
1576 v->domain, l4e_get_pfn(*(l4_pgentry_t *)__va(pagetable_get_paddr(v->arch.guest_table)))));
1577 #endif
1578 break;
1580 case 4: /* Read CR4 */
1581 /*
1582 * Guests can read CR4 to see what features Xen has enabled. We
1583 * therefore lie about PGE & PSE as they are unavailable to guests.
1584 */
1585 *reg = read_cr4() & ~(X86_CR4_PGE|X86_CR4_PSE);
1586 break;
1588 default:
1589 goto fail;
1591 break;
1593 case 0x21: /* MOV DR?,<reg> */
1594 opcode = insn_fetch(u8, code_base, eip, code_limit);
1595 modrm_reg += ((opcode >> 3) & 7) + (lock << 3);
1596 modrm_rm |= (opcode >> 0) & 7;
1597 reg = decode_register(modrm_rm, regs, 0);
1598 if ( (res = do_get_debugreg(modrm_reg)) > (unsigned long)-256 )
1599 goto fail;
1600 *reg = res;
1601 break;
1603 case 0x22: /* MOV <reg>,CR? */
1604 opcode = insn_fetch(u8, code_base, eip, code_limit);
1605 modrm_reg += ((opcode >> 3) & 7) + (lock << 3);
1606 modrm_rm |= (opcode >> 0) & 7;
1607 reg = decode_register(modrm_rm, regs, 0);
1608 switch ( modrm_reg )
1610 case 0: /* Write CR0 */
1611 if ( (*reg ^ read_cr0()) & ~X86_CR0_TS )
1613 gdprintk(XENLOG_WARNING,
1614 "Attempt to change unmodifiable CR0 flags.\n");
1615 goto fail;
1617 (void)do_fpu_taskswitch(!!(*reg & X86_CR0_TS));
1618 break;
1620 case 2: /* Write CR2 */
1621 v->arch.guest_context.ctrlreg[2] = *reg;
1622 arch_set_cr2(v, *reg);
1623 break;
1625 case 3: /* Write CR3 */
1626 LOCK_BIGLOCK(v->domain);
1627 if ( !IS_COMPAT(v->domain) )
1628 rc = new_guest_cr3(gmfn_to_mfn(v->domain, xen_cr3_to_pfn(*reg)));
1629 #ifdef CONFIG_COMPAT
1630 else
1631 rc = new_guest_cr3(gmfn_to_mfn(v->domain, compat_cr3_to_pfn(*reg)));
1632 #endif
1633 UNLOCK_BIGLOCK(v->domain);
1634 if ( rc == 0 ) /* not okay */
1635 goto fail;
1636 break;
1638 case 4:
1639 if ( *reg != (read_cr4() & ~(X86_CR4_PGE|X86_CR4_PSE)) )
1641 gdprintk(XENLOG_WARNING, "Attempt to change CR4 flags.\n");
1642 goto fail;
1644 break;
1646 default:
1647 goto fail;
1649 break;
1651 case 0x23: /* MOV <reg>,DR? */
1652 opcode = insn_fetch(u8, code_base, eip, code_limit);
1653 modrm_reg += ((opcode >> 3) & 7) + (lock << 3);
1654 modrm_rm |= (opcode >> 0) & 7;
1655 reg = decode_register(modrm_rm, regs, 0);
1656 if ( do_set_debugreg(modrm_reg, *reg) != 0 )
1657 goto fail;
1658 break;
1660 case 0x30: /* WRMSR */
1661 switch ( regs->ecx )
1663 #ifdef CONFIG_X86_64
1664 case MSR_FS_BASE:
1665 if ( IS_COMPAT(v->domain) )
1666 goto fail;
1667 if ( wrmsr_safe(MSR_FS_BASE, regs->eax, regs->edx) )
1668 goto fail;
1669 v->arch.guest_context.fs_base =
1670 ((u64)regs->edx << 32) | regs->eax;
1671 break;
1672 case MSR_GS_BASE:
1673 if ( IS_COMPAT(v->domain) )
1674 goto fail;
1675 if ( wrmsr_safe(MSR_GS_BASE, regs->eax, regs->edx) )
1676 goto fail;
1677 v->arch.guest_context.gs_base_kernel =
1678 ((u64)regs->edx << 32) | regs->eax;
1679 break;
1680 case MSR_SHADOW_GS_BASE:
1681 if ( IS_COMPAT(v->domain) )
1682 goto fail;
1683 if ( wrmsr_safe(MSR_SHADOW_GS_BASE, regs->eax, regs->edx) )
1684 goto fail;
1685 v->arch.guest_context.gs_base_user =
1686 ((u64)regs->edx << 32) | regs->eax;
1687 break;
1688 #endif
1689 default:
1690 if ( wrmsr_hypervisor_regs(regs->ecx, regs->eax, regs->edx) )
1691 break;
1693 if ( (rdmsr_safe(regs->ecx, l, h) != 0) ||
1694 (regs->eax != l) || (regs->edx != h) )
1695 gdprintk(XENLOG_WARNING, "Domain attempted WRMSR %p from "
1696 "%08x:%08x to %08lx:%08lx.\n",
1697 _p(regs->ecx), h, l, (long)regs->edx, (long)regs->eax);
1698 break;
1700 break;
1702 case 0x32: /* RDMSR */
1703 switch ( regs->ecx )
1705 #ifdef CONFIG_X86_64
1706 case MSR_FS_BASE:
1707 if ( IS_COMPAT(v->domain) )
1708 goto fail;
1709 regs->eax = v->arch.guest_context.fs_base & 0xFFFFFFFFUL;
1710 regs->edx = v->arch.guest_context.fs_base >> 32;
1711 break;
1712 case MSR_GS_BASE:
1713 if ( IS_COMPAT(v->domain) )
1714 goto fail;
1715 regs->eax = v->arch.guest_context.gs_base_kernel & 0xFFFFFFFFUL;
1716 regs->edx = v->arch.guest_context.gs_base_kernel >> 32;
1717 break;
1718 case MSR_SHADOW_GS_BASE:
1719 if ( IS_COMPAT(v->domain) )
1720 goto fail;
1721 regs->eax = v->arch.guest_context.gs_base_user & 0xFFFFFFFFUL;
1722 regs->edx = v->arch.guest_context.gs_base_user >> 32;
1723 break;
1724 #endif
1725 case MSR_EFER:
1726 if ( rdmsr_safe(regs->ecx, regs->eax, regs->edx) )
1727 goto fail;
1728 break;
1729 default:
1730 if ( rdmsr_hypervisor_regs(regs->ecx, &l, &h) )
1732 regs->eax = l;
1733 regs->edx = h;
1734 break;
1736 /* Everyone can read the MSR space. */
1737 /* gdprintk(XENLOG_WARNING,"Domain attempted RDMSR %p.\n",
1738 _p(regs->ecx));*/
1739 if ( rdmsr_safe(regs->ecx, regs->eax, regs->edx) )
1740 goto fail;
1741 break;
1743 break;
1745 default:
1746 goto fail;
1749 #undef wr_ad
1750 #undef rd_ad
1752 done:
1753 regs->eip = eip;
1754 return EXCRET_fault_fixed;
1756 fail:
1757 return 0;
1760 asmlinkage int do_general_protection(struct cpu_user_regs *regs)
1762 struct vcpu *v = current;
1763 unsigned long fixup;
1765 DEBUGGER_trap_entry(TRAP_gp_fault, regs);
1767 if ( regs->error_code & 1 )
1768 goto hardware_gp;
1770 if ( !guest_mode(regs) )
1771 goto gp_in_kernel;
1773 /*
1774 * Cunning trick to allow arbitrary "INT n" handling.
1776 * We set DPL == 0 on all vectors in the IDT. This prevents any INT <n>
1777 * instruction from trapping to the appropriate vector, when that might not
1778 * be expected by Xen or the guest OS. For example, that entry might be for
1779 * a fault handler (unlike traps, faults don't increment EIP), or might
1780 * expect an error code on the stack (which a software trap never
1781 * provides), or might be a hardware interrupt handler that doesn't like
1782 * being called spuriously.
1784 * Instead, a GPF occurs with the faulting IDT vector in the error code.
1785 * Bit 1 is set to indicate that an IDT entry caused the fault. Bit 0 is
1786 * clear to indicate that it's a software fault, not hardware.
1788 * NOTE: Vectors 3 and 4 are dealt with from their own handler. This is
1789 * okay because they can only be triggered by an explicit DPL-checked
1790 * instruction. The DPL specified by the guest OS for these vectors is NOT
1791 * CHECKED!!
1792 */
1793 if ( (regs->error_code & 3) == 2 )
1795 /* This fault must be due to <INT n> instruction. */
1796 const struct trap_info *ti;
1797 unsigned char vector = regs->error_code >> 3;
1798 ti = &v->arch.guest_context.trap_ctxt[vector];
1799 if ( permit_softint(TI_GET_DPL(ti), v, regs) )
1801 regs->eip += 2;
1802 return do_guest_trap(vector, regs, 0);
1806 /* Emulate some simple privileged and I/O instructions. */
1807 if ( (regs->error_code == 0) &&
1808 emulate_privileged_op(regs) )
1809 return 0;
1811 #if defined(__i386__)
1812 if ( VM_ASSIST(v->domain, VMASST_TYPE_4gb_segments) &&
1813 (regs->error_code == 0) &&
1814 gpf_emulate_4gb(regs) )
1815 return 0;
1816 #endif
1818 /* Pass on GPF as is. */
1819 return do_guest_trap(TRAP_gp_fault, regs, 1);
1821 gp_in_kernel:
1823 if ( likely((fixup = search_exception_table(regs->eip)) != 0) )
1825 dprintk(XENLOG_INFO, "GPF (%04x): %p -> %p\n",
1826 regs->error_code, _p(regs->eip), _p(fixup));
1827 regs->eip = fixup;
1828 return 0;
1831 DEBUGGER_trap_fatal(TRAP_gp_fault, regs);
1833 hardware_gp:
1834 show_execution_state(regs);
1835 panic("GENERAL PROTECTION FAULT\n[error_code=%04x]\n", regs->error_code);
1836 return 0;
1839 static void nmi_softirq(void)
1841 /* Only used to defer wakeup of dom0,vcpu0 to a safe (non-NMI) context. */
1842 vcpu_kick(dom0->vcpu[0]);
1845 static void nmi_dom0_report(unsigned int reason_idx)
1847 struct domain *d;
1848 struct vcpu *v;
1850 if ( ((d = dom0) == NULL) || ((v = d->vcpu[0]) == NULL) )
1851 return;
1853 set_bit(reason_idx, nmi_reason(d));
1855 if ( !test_and_set_bit(_VCPUF_nmi_pending, &v->vcpu_flags) )
1856 raise_softirq(NMI_SOFTIRQ); /* not safe to wake up a vcpu here */
1859 asmlinkage void mem_parity_error(struct cpu_user_regs *regs)
1861 switch ( opt_nmi[0] )
1863 case 'd': /* 'dom0' */
1864 nmi_dom0_report(_XEN_NMIREASON_parity_error);
1865 case 'i': /* 'ignore' */
1866 break;
1867 default: /* 'fatal' */
1868 console_force_unlock();
1869 printk("\n\nNMI - MEMORY ERROR\n");
1870 fatal_trap(TRAP_nmi, regs);
1873 outb((inb(0x61) & 0x0f) | 0x04, 0x61); /* clear-and-disable parity check */
1874 mdelay(1);
1875 outb((inb(0x61) & 0x0b) | 0x00, 0x61); /* enable parity check */
1878 asmlinkage void io_check_error(struct cpu_user_regs *regs)
1880 switch ( opt_nmi[0] )
1882 case 'd': /* 'dom0' */
1883 nmi_dom0_report(_XEN_NMIREASON_io_error);
1884 case 'i': /* 'ignore' */
1885 break;
1886 default: /* 'fatal' */
1887 console_force_unlock();
1888 printk("\n\nNMI - I/O ERROR\n");
1889 fatal_trap(TRAP_nmi, regs);
1892 outb((inb(0x61) & 0x0f) | 0x08, 0x61); /* clear-and-disable IOCK */
1893 mdelay(1);
1894 outb((inb(0x61) & 0x07) | 0x00, 0x61); /* enable IOCK */
1897 static void unknown_nmi_error(unsigned char reason)
1899 switch ( opt_nmi[0] )
1901 case 'd': /* 'dom0' */
1902 nmi_dom0_report(_XEN_NMIREASON_unknown);
1903 case 'i': /* 'ignore' */
1904 break;
1905 default: /* 'fatal' */
1906 printk("Uhhuh. NMI received for unknown reason %02x.\n", reason);
1907 printk("Dazed and confused, but trying to continue\n");
1908 printk("Do you have a strange power saving mode enabled?\n");
1909 kexec_crash();
1913 static int dummy_nmi_callback(struct cpu_user_regs *regs, int cpu)
1915 return 0;
1918 static nmi_callback_t nmi_callback = dummy_nmi_callback;
1920 asmlinkage void do_nmi(struct cpu_user_regs *regs)
1922 unsigned int cpu = smp_processor_id();
1923 unsigned char reason;
1925 ++nmi_count(cpu);
1927 if ( nmi_callback(regs, cpu) )
1928 return;
1930 if ( nmi_watchdog )
1931 nmi_watchdog_tick(regs);
1933 /* Only the BSP gets external NMIs from the system. */
1934 if ( cpu == 0 )
1936 reason = inb(0x61);
1937 if ( reason & 0x80 )
1938 mem_parity_error(regs);
1939 else if ( reason & 0x40 )
1940 io_check_error(regs);
1941 else if ( !nmi_watchdog )
1942 unknown_nmi_error((unsigned char)(reason&0xff));
1946 void set_nmi_callback(nmi_callback_t callback)
1948 nmi_callback = callback;
1951 void unset_nmi_callback(void)
1953 nmi_callback = dummy_nmi_callback;
1956 asmlinkage int math_state_restore(struct cpu_user_regs *regs)
1958 BUG_ON(!guest_mode(regs));
1960 setup_fpu(current);
1962 if ( current->arch.guest_context.ctrlreg[0] & X86_CR0_TS )
1964 do_guest_trap(TRAP_no_device, regs, 0);
1965 current->arch.guest_context.ctrlreg[0] &= ~X86_CR0_TS;
1968 return EXCRET_fault_fixed;
1971 asmlinkage int do_debug(struct cpu_user_regs *regs)
1973 unsigned long condition;
1974 struct vcpu *v = current;
1976 __asm__ __volatile__("mov %%db6,%0" : "=r" (condition));
1978 /* Mask out spurious debug traps due to lazy DR7 setting */
1979 if ( (condition & (DR_TRAP0|DR_TRAP1|DR_TRAP2|DR_TRAP3)) &&
1980 (v->arch.guest_context.debugreg[7] == 0) )
1982 __asm__("mov %0,%%db7" : : "r" (0UL));
1983 goto out;
1986 DEBUGGER_trap_entry(TRAP_debug, regs);
1988 if ( !guest_mode(regs) )
1990 /* Clear TF just for absolute sanity. */
1991 regs->eflags &= ~EF_TF;
1992 /*
1993 * We ignore watchpoints when they trigger within Xen. This may happen
1994 * when a buffer is passed to us which previously had a watchpoint set
1995 * on it. No need to bump EIP; the only faulting trap is an instruction
1996 * breakpoint, which can't happen to us.
1997 */
1998 goto out;
2001 /* Save debug status register where guest OS can peek at it */
2002 v->arch.guest_context.debugreg[6] = condition;
2004 return do_guest_trap(TRAP_debug, regs, 0);
2006 out:
2007 return EXCRET_not_a_fault;
2010 asmlinkage int do_spurious_interrupt_bug(struct cpu_user_regs *regs)
2012 return EXCRET_not_a_fault;
2015 void set_intr_gate(unsigned int n, void *addr)
2017 #ifdef __i386__
2018 int i;
2019 /* Keep secondary tables in sync with IRQ updates. */
2020 for ( i = 1; i < NR_CPUS; i++ )
2021 if ( idt_tables[i] != NULL )
2022 _set_gate(&idt_tables[i][n], 14, 0, addr);
2023 #endif
2024 _set_gate(&idt_table[n], 14, 0, addr);
2027 void set_system_gate(unsigned int n, void *addr)
2029 _set_gate(idt_table+n,14,3,addr);
2032 void set_task_gate(unsigned int n, unsigned int sel)
2034 idt_table[n].a = sel << 16;
2035 idt_table[n].b = 0x8500;
2038 void set_tss_desc(unsigned int n, void *addr)
2040 _set_tssldt_desc(
2041 gdt_table + __TSS(n) - FIRST_RESERVED_GDT_ENTRY,
2042 (unsigned long)addr,
2043 offsetof(struct tss_struct, __cacheline_filler) - 1,
2044 9);
2045 #ifdef CONFIG_COMPAT
2046 _set_tssldt_desc(
2047 compat_gdt_table + __TSS(n) - FIRST_RESERVED_GDT_ENTRY,
2048 (unsigned long)addr,
2049 offsetof(struct tss_struct, __cacheline_filler) - 1,
2050 11);
2051 #endif
2054 void __init trap_init(void)
2056 extern void percpu_traps_init(void);
2058 /*
2059 * Note that interrupt gates are always used, rather than trap gates. We
2060 * must have interrupts disabled until DS/ES/FS/GS are saved because the
2061 * first activation must have the "bad" value(s) for these registers and
2062 * we may lose them if another activation is installed before they are
2063 * saved. The page-fault handler also needs interrupts disabled until %cr2
2064 * has been read and saved on the stack.
2065 */
2066 set_intr_gate(TRAP_divide_error,&divide_error);
2067 set_intr_gate(TRAP_debug,&debug);
2068 set_intr_gate(TRAP_nmi,&nmi);
2069 set_system_gate(TRAP_int3,&int3); /* usable from all privileges */
2070 set_system_gate(TRAP_overflow,&overflow); /* usable from all privileges */
2071 set_intr_gate(TRAP_bounds,&bounds);
2072 set_intr_gate(TRAP_invalid_op,&invalid_op);
2073 set_intr_gate(TRAP_no_device,&device_not_available);
2074 set_intr_gate(TRAP_copro_seg,&coprocessor_segment_overrun);
2075 set_intr_gate(TRAP_invalid_tss,&invalid_TSS);
2076 set_intr_gate(TRAP_no_segment,&segment_not_present);
2077 set_intr_gate(TRAP_stack_error,&stack_segment);
2078 set_intr_gate(TRAP_gp_fault,&general_protection);
2079 set_intr_gate(TRAP_page_fault,&page_fault);
2080 set_intr_gate(TRAP_spurious_int,&spurious_interrupt_bug);
2081 set_intr_gate(TRAP_copro_error,&coprocessor_error);
2082 set_intr_gate(TRAP_alignment_check,&alignment_check);
2083 set_intr_gate(TRAP_machine_check,&machine_check);
2084 set_intr_gate(TRAP_simd_error,&simd_coprocessor_error);
2086 percpu_traps_init();
2088 cpu_init();
2090 open_softirq(NMI_SOFTIRQ, nmi_softirq);
2094 long do_set_trap_table(XEN_GUEST_HANDLE(trap_info_t) traps)
2096 struct trap_info cur;
2097 struct trap_info *dst = current->arch.guest_context.trap_ctxt;
2098 long rc = 0;
2100 /* If no table is presented then clear the entire virtual IDT. */
2101 if ( guest_handle_is_null(traps) )
2103 memset(dst, 0, 256 * sizeof(*dst));
2104 init_int80_direct_trap(current);
2105 return 0;
2108 for ( ; ; )
2110 if ( hypercall_preempt_check() )
2112 rc = hypercall_create_continuation(
2113 __HYPERVISOR_set_trap_table, "h", traps);
2114 break;
2117 if ( copy_from_guest(&cur, traps, 1) )
2119 rc = -EFAULT;
2120 break;
2123 if ( cur.address == 0 )
2124 break;
2126 fixup_guest_code_selector(current->domain, cur.cs);
2128 memcpy(&dst[cur.vector], &cur, sizeof(cur));
2130 if ( cur.vector == 0x80 )
2131 init_int80_direct_trap(current);
2133 guest_handle_add_offset(traps, 1);
2136 return rc;
2140 long set_debugreg(struct vcpu *p, int reg, unsigned long value)
2142 int i;
2144 switch ( reg )
2146 case 0:
2147 if ( !access_ok(value, sizeof(long)) )
2148 return -EPERM;
2149 if ( p == current )
2150 __asm__ ( "mov %0, %%db0" : : "r" (value) );
2151 break;
2152 case 1:
2153 if ( !access_ok(value, sizeof(long)) )
2154 return -EPERM;
2155 if ( p == current )
2156 __asm__ ( "mov %0, %%db1" : : "r" (value) );
2157 break;
2158 case 2:
2159 if ( !access_ok(value, sizeof(long)) )
2160 return -EPERM;
2161 if ( p == current )
2162 __asm__ ( "mov %0, %%db2" : : "r" (value) );
2163 break;
2164 case 3:
2165 if ( !access_ok(value, sizeof(long)) )
2166 return -EPERM;
2167 if ( p == current )
2168 __asm__ ( "mov %0, %%db3" : : "r" (value) );
2169 break;
2170 case 6:
2171 /*
2172 * DR6: Bits 4-11,16-31 reserved (set to 1).
2173 * Bit 12 reserved (set to 0).
2174 */
2175 value &= 0xffffefff; /* reserved bits => 0 */
2176 value |= 0xffff0ff0; /* reserved bits => 1 */
2177 if ( p == current )
2178 __asm__ ( "mov %0, %%db6" : : "r" (value) );
2179 break;
2180 case 7:
2181 /*
2182 * DR7: Bit 10 reserved (set to 1).
2183 * Bits 11-12,14-15 reserved (set to 0).
2184 * Privileged bits:
2185 * GD (bit 13): must be 0.
2186 * R/Wn (bits 16-17,20-21,24-25,28-29): mustn't be 10.
2187 * LENn (bits 18-19,22-23,26-27,30-31): mustn't be 10.
2188 */
2189 /* DR7 == 0 => debugging disabled for this domain. */
2190 if ( value != 0 )
2192 value &= 0xffff27ff; /* reserved bits => 0 */
2193 value |= 0x00000400; /* reserved bits => 1 */
2194 if ( (value & (1<<13)) != 0 ) return -EPERM;
2195 for ( i = 0; i < 16; i += 2 )
2196 if ( ((value >> (i+16)) & 3) == 2 ) return -EPERM;
2198 if ( p == current )
2199 __asm__ ( "mov %0, %%db7" : : "r" (value) );
2200 break;
2201 default:
2202 return -EINVAL;
2205 p->arch.guest_context.debugreg[reg] = value;
2206 return 0;
2209 long do_set_debugreg(int reg, unsigned long value)
2211 return set_debugreg(current, reg, value);
2214 unsigned long do_get_debugreg(int reg)
2216 if ( (reg < 0) || (reg > 7) ) return -EINVAL;
2217 return current->arch.guest_context.debugreg[reg];
2220 /*
2221 * Local variables:
2222 * mode: C
2223 * c-set-style: "BSD"
2224 * c-basic-offset: 4
2225 * tab-width: 4
2226 * indent-tabs-mode: nil
2227 * End:
2228 */