ia64/xen-unstable

changeset 15438:3cf5052ba5e5

x86: machine check exception handling

Properly handle MCE (connecting the exisiting, but so far unused
vendor specific handlers). HVM guests don't own CR4.MCE (and hence
can't suppress the exception) anymore, preventing silent machine
shutdown.

Signed-off-by: Jan Beulich <jbeulich@novell.com>
Signed-off-by: Keir Fraser <keir@xensource.com>
author kfraser@localhost.localdomain
date Thu Jun 21 15:10:04 2007 +0100 (2007-06-21)
parents 899a44cb6ef6
children 04d4b7b6f5b7
files xen/arch/x86/hvm/svm/svm.c xen/arch/x86/hvm/svm/vmcb.c xen/arch/x86/hvm/vmx/vmcs.c xen/arch/x86/hvm/vmx/vmx.c xen/arch/x86/traps.c xen/arch/x86/x86_32/entry.S xen/arch/x86/x86_64/entry.S xen/arch/x86/x86_64/traps.c xen/include/asm-x86/hvm/hvm.h xen/include/asm-x86/hvm/svm/vmcb.h xen/include/asm-x86/hvm/trace.h xen/include/asm-x86/hvm/vmx/vmx.h xen/include/asm-x86/processor.h xen/include/public/trace.h
line diff
     1.1 --- a/xen/arch/x86/hvm/svm/svm.c	Thu Jun 21 14:03:57 2007 +0100
     1.2 +++ b/xen/arch/x86/hvm/svm/svm.c	Thu Jun 21 15:10:04 2007 +0100
     1.3 @@ -391,7 +391,7 @@ int svm_vmcb_restore(struct vcpu *v, str
     1.4      }
     1.5  
     1.6   skip_cr3:
     1.7 -    vmcb->cr4 = c->cr4 | SVM_CR4_HOST_MASK;
     1.8 +    vmcb->cr4 = c->cr4 | HVM_CR4_HOST_MASK;
     1.9      v->arch.hvm_svm.cpu_shadow_cr4 = c->cr4;
    1.10      
    1.11      vmcb->idtr.limit = c->idtr_limit;
    1.12 @@ -448,7 +448,8 @@ int svm_vmcb_restore(struct vcpu *v, str
    1.13      /* update VMCB for nested paging restore */
    1.14      if ( paging_mode_hap(v->domain) ) {
    1.15          vmcb->cr0 = v->arch.hvm_svm.cpu_shadow_cr0;
    1.16 -        vmcb->cr4 = v->arch.hvm_svm.cpu_shadow_cr4;
    1.17 +        vmcb->cr4 = v->arch.hvm_svm.cpu_shadow_cr4 |
    1.18 +                    (HVM_CR4_HOST_MASK & ~X86_CR4_PAE);
    1.19          vmcb->cr3 = c->cr3;
    1.20          vmcb->np_enable = 1;
    1.21          vmcb->g_pat = 0x0007040600070406ULL; /* guest PAT */
    1.22 @@ -805,8 +806,10 @@ static void svm_ctxt_switch_from(struct 
    1.23          : : "a" (__pa(root_vmcb[cpu])) );
    1.24  
    1.25  #ifdef __x86_64__
    1.26 -    /* Resume use of IST2 for NMIs now that the host TR is reinstated. */
    1.27 -    idt_tables[cpu][TRAP_nmi].a |= 2UL << 32;
    1.28 +    /* Resume use of ISTs now that the host TR is reinstated. */
    1.29 +    idt_tables[cpu][TRAP_double_fault].a  |= 1UL << 32; /* IST1 */
    1.30 +    idt_tables[cpu][TRAP_nmi].a           |= 2UL << 32; /* IST2 */
    1.31 +    idt_tables[cpu][TRAP_machine_check].a |= 3UL << 32; /* IST3 */
    1.32  #endif
    1.33  }
    1.34  
    1.35 @@ -826,10 +829,12 @@ static void svm_ctxt_switch_to(struct vc
    1.36      set_segment_register(ss, 0);
    1.37  
    1.38      /*
    1.39 -     * Cannot use IST2 for NMIs while we are running with the guest TR. But
    1.40 -     * this doesn't matter: the IST is only needed to handle SYSCALL/SYSRET.
    1.41 +     * Cannot use ISTs for NMI/#MC/#DF while we are running with the guest TR.
    1.42 +     * But this doesn't matter: the IST is only req'd to handle SYSCALL/SYSRET.
    1.43       */
    1.44 -    idt_tables[cpu][TRAP_nmi].a &= ~(2UL << 32);
    1.45 +    idt_tables[cpu][TRAP_double_fault].a  &= ~(3UL << 32);
    1.46 +    idt_tables[cpu][TRAP_nmi].a           &= ~(3UL << 32);
    1.47 +    idt_tables[cpu][TRAP_machine_check].a &= ~(3UL << 32);
    1.48  #endif
    1.49  
    1.50      svm_restore_dr(v);
    1.51 @@ -1823,9 +1828,19 @@ static int mov_to_cr(int gpreg, int cr, 
    1.52          break;
    1.53  
    1.54      case 4: /* CR4 */
    1.55 +        if ( value & ~mmu_cr4_features )
    1.56 +        {
    1.57 +            HVM_DBG_LOG(DBG_LEVEL_1, "Guest attempts to enable unsupported "
    1.58 +                        "CR4 features %lx (host %lx)",
    1.59 +                        value, mmu_cr4_features);
    1.60 +            svm_inject_exception(v, TRAP_gp_fault, 1, 0);
    1.61 +            break;
    1.62 +        }
    1.63 +
    1.64          if ( paging_mode_hap(v->domain) )
    1.65          {
    1.66 -            vmcb->cr4 = v->arch.hvm_svm.cpu_shadow_cr4 = value;
    1.67 +            v->arch.hvm_svm.cpu_shadow_cr4 = value;
    1.68 +            vmcb->cr4 = value | (HVM_CR4_HOST_MASK & ~X86_CR4_PAE);
    1.69              paging_update_paging_modes(v);
    1.70              /* signal paging update to ASID handler */
    1.71              svm_asid_g_update_paging (v);
    1.72 @@ -1875,7 +1890,7 @@ static int mov_to_cr(int gpreg, int cr, 
    1.73          }
    1.74  
    1.75          v->arch.hvm_svm.cpu_shadow_cr4 = value;
    1.76 -        vmcb->cr4 = value | SVM_CR4_HOST_MASK;
    1.77 +        vmcb->cr4 = value | HVM_CR4_HOST_MASK;
    1.78    
    1.79          /*
    1.80           * Writing to CR4 to modify the PSE, PGE, or PAE flag invalidates
    1.81 @@ -2265,12 +2280,13 @@ static int svm_reset_to_realmode(struct 
    1.82      vmcb->cr2 = 0;
    1.83      vmcb->efer = EFER_SVME;
    1.84  
    1.85 -    vmcb->cr4 = SVM_CR4_HOST_MASK;
    1.86 +    vmcb->cr4 = HVM_CR4_HOST_MASK;
    1.87      v->arch.hvm_svm.cpu_shadow_cr4 = 0;
    1.88  
    1.89      if ( paging_mode_hap(v->domain) ) {
    1.90          vmcb->cr0 = v->arch.hvm_svm.cpu_shadow_cr0;
    1.91 -        vmcb->cr4 = v->arch.hvm_svm.cpu_shadow_cr4;
    1.92 +        vmcb->cr4 = v->arch.hvm_svm.cpu_shadow_cr4 |
    1.93 +                    (HVM_CR4_HOST_MASK & ~X86_CR4_PAE);
    1.94      }
    1.95  
    1.96      /* This will jump to ROMBIOS */
    1.97 @@ -2411,6 +2427,12 @@ asmlinkage void svm_vmexit_handler(struc
    1.98          break;
    1.99      }
   1.100  
   1.101 +    case VMEXIT_EXCEPTION_MC:
   1.102 +        HVMTRACE_0D(MCE, v);
   1.103 +        svm_store_cpu_guest_regs(v, regs, NULL);
   1.104 +        do_machine_check(regs);
   1.105 +        break;
   1.106 +
   1.107      case VMEXIT_VINTR:
   1.108          vmcb->vintr.fields.irq = 0;
   1.109          vmcb->general1_intercepts &= ~GENERAL1_INTERCEPT_VINTR;
     2.1 --- a/xen/arch/x86/hvm/svm/vmcb.c	Thu Jun 21 14:03:57 2007 +0100
     2.2 +++ b/xen/arch/x86/hvm/svm/vmcb.c	Thu Jun 21 15:10:04 2007 +0100
     2.3 @@ -224,7 +224,7 @@ static int construct_vmcb(struct vcpu *v
     2.4      /* Guest CR4. */
     2.5      arch_svm->cpu_shadow_cr4 =
     2.6          read_cr4() & ~(X86_CR4_PGE | X86_CR4_PSE | X86_CR4_PAE);
     2.7 -    vmcb->cr4 = arch_svm->cpu_shadow_cr4 | SVM_CR4_HOST_MASK;
     2.8 +    vmcb->cr4 = arch_svm->cpu_shadow_cr4 | HVM_CR4_HOST_MASK;
     2.9  
    2.10      paging_update_paging_modes(v);
    2.11      vmcb->cr3 = v->arch.hvm_vcpu.hw_cr3; 
    2.12 @@ -235,7 +235,9 @@ static int construct_vmcb(struct vcpu *v
    2.13          vmcb->np_enable = 1; /* enable nested paging */
    2.14          vmcb->g_pat = 0x0007040600070406ULL; /* guest PAT */
    2.15          vmcb->h_cr3 = pagetable_get_paddr(v->domain->arch.phys_table);
    2.16 -        vmcb->cr4 = arch_svm->cpu_shadow_cr4 = 0;
    2.17 +        vmcb->cr4 = arch_svm->cpu_shadow_cr4 =
    2.18 +                    (HVM_CR4_HOST_MASK & ~X86_CR4_PAE);
    2.19 +        vmcb->exception_intercepts = HVM_TRAP_MASK;
    2.20  
    2.21          /* No point in intercepting CR0/3/4 reads, because the hardware 
    2.22           * will return the guest versions anyway. */
    2.23 @@ -249,7 +251,7 @@ static int construct_vmcb(struct vcpu *v
    2.24      }
    2.25      else
    2.26      {
    2.27 -        vmcb->exception_intercepts = 1U << TRAP_page_fault;
    2.28 +        vmcb->exception_intercepts = HVM_TRAP_MASK | (1U << TRAP_page_fault);
    2.29      }
    2.30  
    2.31      return 0;
     3.1 --- a/xen/arch/x86/hvm/vmx/vmcs.c	Thu Jun 21 14:03:57 2007 +0100
     3.2 +++ b/xen/arch/x86/hvm/vmx/vmcs.c	Thu Jun 21 15:10:04 2007 +0100
     3.3 @@ -421,7 +421,7 @@ static void construct_vmcs(struct vcpu *
     3.4      __vmwrite(VMCS_LINK_POINTER_HIGH, ~0UL);
     3.5  #endif
     3.6  
     3.7 -    __vmwrite(EXCEPTION_BITMAP, 1U << TRAP_page_fault);
     3.8 +    __vmwrite(EXCEPTION_BITMAP, HVM_TRAP_MASK | (1U << TRAP_page_fault));
     3.9  
    3.10      /* Guest CR0. */
    3.11      cr0 = read_cr0();
     4.1 --- a/xen/arch/x86/hvm/vmx/vmx.c	Thu Jun 21 14:03:57 2007 +0100
     4.2 +++ b/xen/arch/x86/hvm/vmx/vmx.c	Thu Jun 21 15:10:04 2007 +0100
     4.3 @@ -615,7 +615,7 @@ int vmx_vmcs_restore(struct vcpu *v, str
     4.4      }
     4.5  #endif
     4.6  
     4.7 -    __vmwrite(GUEST_CR4, (c->cr4 | VMX_CR4_HOST_MASK));
     4.8 +    __vmwrite(GUEST_CR4, (c->cr4 | HVM_CR4_HOST_MASK));
     4.9      v->arch.hvm_vmx.cpu_shadow_cr4 = c->cr4;
    4.10      __vmwrite(CR4_READ_SHADOW, v->arch.hvm_vmx.cpu_shadow_cr4);
    4.11  
    4.12 @@ -2001,7 +2001,7 @@ static int vmx_world_restore(struct vcpu
    4.13      else
    4.14          HVM_DBG_LOG(DBG_LEVEL_VMMU, "Update CR3 value = %x", c->cr3);
    4.15  
    4.16 -    __vmwrite(GUEST_CR4, (c->cr4 | VMX_CR4_HOST_MASK));
    4.17 +    __vmwrite(GUEST_CR4, (c->cr4 | HVM_CR4_HOST_MASK));
    4.18      v->arch.hvm_vmx.cpu_shadow_cr4 = c->cr4;
    4.19      __vmwrite(CR4_READ_SHADOW, v->arch.hvm_vmx.cpu_shadow_cr4);
    4.20  
    4.21 @@ -2400,6 +2400,14 @@ static int mov_to_cr(int gp, int cr, str
    4.22      case 4: /* CR4 */
    4.23          old_cr = v->arch.hvm_vmx.cpu_shadow_cr4;
    4.24  
    4.25 +        if ( value & ~mmu_cr4_features )
    4.26 +        {
    4.27 +            HVM_DBG_LOG(DBG_LEVEL_1, "Guest attempts to enable unsupported "
    4.28 +                        "CR4 features %lx (host %lx)",
    4.29 +                        value, mmu_cr4_features);
    4.30 +            vmx_inject_hw_exception(v, TRAP_gp_fault, 0);
    4.31 +            break;
    4.32 +        }
    4.33          if ( (value & X86_CR4_PAE) && !(old_cr & X86_CR4_PAE) )
    4.34          {
    4.35              if ( vmx_pgbit_test(v) )
    4.36 @@ -2440,7 +2448,7 @@ static int mov_to_cr(int gp, int cr, str
    4.37              }
    4.38          }
    4.39  
    4.40 -        __vmwrite(GUEST_CR4, value| VMX_CR4_HOST_MASK);
    4.41 +        __vmwrite(GUEST_CR4, value | HVM_CR4_HOST_MASK);
    4.42          v->arch.hvm_vmx.cpu_shadow_cr4 = value;
    4.43          __vmwrite(CR4_READ_SHADOW, v->arch.hvm_vmx.cpu_shadow_cr4);
    4.44  
    4.45 @@ -2826,7 +2834,8 @@ static void vmx_reflect_exception(struct
    4.46      }
    4.47  }
    4.48  
    4.49 -static void vmx_failed_vmentry(unsigned int exit_reason)
    4.50 +static void vmx_failed_vmentry(unsigned int exit_reason,
    4.51 +                               struct cpu_user_regs *regs)
    4.52  {
    4.53      unsigned int failed_vmentry_reason = (uint16_t)exit_reason;
    4.54      unsigned long exit_qualification;
    4.55 @@ -2843,6 +2852,9 @@ static void vmx_failed_vmentry(unsigned 
    4.56          break;
    4.57      case EXIT_REASON_MACHINE_CHECK:
    4.58          printk("caused by machine check.\n");
    4.59 +        HVMTRACE_0D(MCE, current);
    4.60 +        vmx_store_cpu_guest_regs(current, regs, NULL);
    4.61 +        do_machine_check(regs);
    4.62          break;
    4.63      default:
    4.64          printk("reason not known yet!");
    4.65 @@ -2872,7 +2884,7 @@ asmlinkage void vmx_vmexit_handler(struc
    4.66          local_irq_enable();
    4.67  
    4.68      if ( unlikely(exit_reason & VMX_EXIT_REASONS_FAILED_VMENTRY) )
    4.69 -        return vmx_failed_vmentry(exit_reason);
    4.70 +        return vmx_failed_vmentry(exit_reason, regs);
    4.71  
    4.72      switch ( exit_reason )
    4.73      {
    4.74 @@ -2923,12 +2935,20 @@ asmlinkage void vmx_vmexit_handler(struc
    4.75              vmx_inject_hw_exception(v, TRAP_page_fault, regs->error_code);
    4.76              break;
    4.77          case TRAP_nmi:
    4.78 -            HVMTRACE_0D(NMI, v);
    4.79              if ( (intr_info & INTR_INFO_INTR_TYPE_MASK) == INTR_TYPE_NMI )
    4.80 +            {
    4.81 +                HVMTRACE_0D(NMI, v);
    4.82 +                vmx_store_cpu_guest_regs(v, regs, NULL);
    4.83                  do_nmi(regs); /* Real NMI, vector 2: normal processing. */
    4.84 +            }
    4.85              else
    4.86                  vmx_reflect_exception(v);
    4.87              break;
    4.88 +        case TRAP_machine_check:
    4.89 +            HVMTRACE_0D(MCE, v);
    4.90 +            vmx_store_cpu_guest_regs(v, regs, NULL);
    4.91 +            do_machine_check(regs);
    4.92 +            break;
    4.93          default:
    4.94              goto exit_and_crash;
    4.95          }
     5.1 --- a/xen/arch/x86/traps.c	Thu Jun 21 14:03:57 2007 +0100
     5.2 +++ b/xen/arch/x86/traps.c	Thu Jun 21 15:10:04 2007 +0100
     5.3 @@ -86,6 +86,7 @@ asmlinkage void _name(void);            
     5.4  asmlinkage int do_ ## _name(struct cpu_user_regs *regs)
     5.5  
     5.6  asmlinkage void nmi(void);
     5.7 +asmlinkage void machine_check(void);
     5.8  DECLARE_TRAP_HANDLER(divide_error);
     5.9  DECLARE_TRAP_HANDLER(debug);
    5.10  DECLARE_TRAP_HANDLER(int3);
    5.11 @@ -103,7 +104,6 @@ DECLARE_TRAP_HANDLER(coprocessor_error);
    5.12  DECLARE_TRAP_HANDLER(simd_coprocessor_error);
    5.13  DECLARE_TRAP_HANDLER(alignment_check);
    5.14  DECLARE_TRAP_HANDLER(spurious_interrupt_bug);
    5.15 -DECLARE_TRAP_HANDLER(machine_check);
    5.16  
    5.17  long do_set_debugreg(int reg, unsigned long value);
    5.18  unsigned long do_get_debugreg(int reg);
    5.19 @@ -731,10 +731,11 @@ asmlinkage int do_int3(struct cpu_user_r
    5.20      return do_guest_trap(TRAP_int3, regs, 0);
    5.21  }
    5.22  
    5.23 -asmlinkage int do_machine_check(struct cpu_user_regs *regs)
    5.24 +asmlinkage void do_machine_check(struct cpu_user_regs *regs)
    5.25  {
    5.26 -    fatal_trap(TRAP_machine_check, regs);
    5.27 -    return 0;
    5.28 +    extern fastcall void (*machine_check_vector)(
    5.29 +        struct cpu_user_regs *, long error_code);
    5.30 +    machine_check_vector(regs, regs->error_code);
    5.31  }
    5.32  
    5.33  void propagate_page_fault(unsigned long addr, u16 error_code)
     6.1 --- a/xen/arch/x86/x86_32/entry.S	Thu Jun 21 14:03:57 2007 +0100
     6.2 +++ b/xen/arch/x86/x86_32/entry.S	Thu Jun 21 15:10:04 2007 +0100
     6.3 @@ -534,10 +534,6 @@ ENTRY(page_fault)
     6.4          movw  $TRAP_page_fault,2(%esp)
     6.5          jmp   handle_exception
     6.6  
     6.7 -ENTRY(machine_check)
     6.8 -        pushl $TRAP_machine_check<<16
     6.9 -        jmp   handle_exception
    6.10 -
    6.11  ENTRY(spurious_interrupt_bug)
    6.12          pushl $TRAP_spurious_int<<16
    6.13          jmp   handle_exception
    6.14 @@ -550,18 +546,20 @@ 1:      movl  %esp,%eax
    6.15          addl  $4,%esp
    6.16          jmp   restore_all_xen
    6.17  
    6.18 -ENTRY(nmi)
    6.19 +handle_nmi_mce:
    6.20  #ifdef CONFIG_X86_SUPERVISOR_MODE_KERNEL
    6.21 -        # NMI entry protocol is incompatible with guest kernel in ring 0.
    6.22 +        # NMI/MCE entry protocol is incompatible with guest kernel in ring 0.
    6.23 +        addl  $4,%esp
    6.24          iret
    6.25  #else
    6.26          # Save state but do not trash the segment registers!
    6.27 -        pushl $TRAP_nmi<<16
    6.28 -        SAVE_ALL(.Lnmi_xen,.Lnmi_common)
    6.29 -.Lnmi_common:
    6.30 -        movl  %esp,%eax
    6.31 -        pushl %eax
    6.32 -        call  do_nmi
    6.33 +        SAVE_ALL(.Lnmi_mce_xen,.Lnmi_mce_common)
    6.34 +.Lnmi_mce_common:
    6.35 +        xorl  %eax,%eax
    6.36 +        movw  UREGS_entry_vector(%esp),%ax
    6.37 +        movl  %esp,%edx
    6.38 +        pushl %edx
    6.39 +        call  *exception_table(,%eax,4)
    6.40          addl  $4,%esp
    6.41          /* 
    6.42           * NB. We may return to Xen context with polluted %ds/%es. But in such
    6.43 @@ -569,13 +567,13 @@ ENTRY(nmi)
    6.44           * be detected by SAVE_ALL(), or we have rolled back restore_guest.
    6.45           */
    6.46          jmp   ret_from_intr
    6.47 -.Lnmi_xen:
    6.48 +.Lnmi_mce_xen:
    6.49          /* Check the outer (guest) context for %ds/%es state validity. */
    6.50          GET_GUEST_REGS(%ebx)
    6.51          testl $X86_EFLAGS_VM,%ss:UREGS_eflags(%ebx)
    6.52          mov   %ds,%eax
    6.53          mov   %es,%edx
    6.54 -        jnz   .Lnmi_vm86
    6.55 +        jnz   .Lnmi_mce_vm86
    6.56          /* We may have interrupted Xen while messing with %ds/%es... */
    6.57          cmpw  %ax,%cx
    6.58          mov   %ecx,%ds             /* Ensure %ds is valid */
    6.59 @@ -587,19 +585,27 @@ ENTRY(nmi)
    6.60          movl  $.Lrestore_sregs_guest,%ecx
    6.61          movl  %edx,UREGS_es(%ebx)  /* Ensure guest frame contains guest ES */
    6.62          cmpl  %ecx,UREGS_eip(%esp)
    6.63 -        jbe   .Lnmi_common
    6.64 +        jbe   .Lnmi_mce_common
    6.65          cmpl  $.Lrestore_iret_guest,UREGS_eip(%esp)
    6.66 -        ja    .Lnmi_common
    6.67 +        ja    .Lnmi_mce_common
    6.68          /* Roll outer context restore_guest back to restoring %ds/%es. */
    6.69          movl  %ecx,UREGS_eip(%esp)
    6.70 -        jmp   .Lnmi_common
    6.71 -.Lnmi_vm86:
    6.72 +        jmp   .Lnmi_mce_common
    6.73 +.Lnmi_mce_vm86:
    6.74          /* vm86 is easy: the CPU saved %ds/%es so we can safely stomp them. */
    6.75          mov   %ecx,%ds
    6.76          mov   %ecx,%es
    6.77 -        jmp   .Lnmi_common
    6.78 +        jmp   .Lnmi_mce_common
    6.79  #endif /* !CONFIG_X86_SUPERVISOR_MODE_KERNEL */
    6.80  
    6.81 +ENTRY(nmi)
    6.82 +        pushl $TRAP_nmi<<16
    6.83 +        jmp   handle_nmi_mce
    6.84 +
    6.85 +ENTRY(machine_check)
    6.86 +        pushl $TRAP_machine_check<<16
    6.87 +        jmp   handle_nmi_mce
    6.88 +
    6.89  ENTRY(setup_vm86_frame)
    6.90          mov %ecx,%ds
    6.91          mov %ecx,%es
    6.92 @@ -620,7 +626,7 @@ ENTRY(setup_vm86_frame)
    6.93  ENTRY(exception_table)
    6.94          .long do_divide_error
    6.95          .long do_debug
    6.96 -        .long 0 # nmi
    6.97 +        .long do_nmi
    6.98          .long do_int3
    6.99          .long do_overflow
   6.100          .long do_bounds
     7.1 --- a/xen/arch/x86/x86_64/entry.S	Thu Jun 21 14:03:57 2007 +0100
     7.2 +++ b/xen/arch/x86/x86_64/entry.S	Thu Jun 21 15:10:04 2007 +0100
     7.3 @@ -505,11 +505,6 @@ ENTRY(page_fault)
     7.4          movl  $TRAP_page_fault,4(%rsp)
     7.5          jmp   handle_exception
     7.6  
     7.7 -ENTRY(machine_check)
     7.8 -        pushq $0
     7.9 -        movl  $TRAP_machine_check,4(%rsp)
    7.10 -        jmp   handle_exception
    7.11 -
    7.12  ENTRY(spurious_interrupt_bug)
    7.13          pushq $0
    7.14          movl  $TRAP_spurious_int,4(%rsp)
    7.15 @@ -527,31 +522,38 @@ ENTRY(early_page_fault)
    7.16          call  do_early_page_fault
    7.17          jmp   restore_all_xen
    7.18  
    7.19 -ENTRY(nmi)
    7.20 -        pushq $0
    7.21 +handle_ist_exception:
    7.22          SAVE_ALL
    7.23          testb $3,UREGS_cs(%rsp)
    7.24 -        jz    nmi_in_hypervisor_mode
    7.25 +        jz    1f
    7.26          /* Interrupted guest context. Copy the context to stack bottom. */
    7.27 -        GET_GUEST_REGS(%rbx)
    7.28 +        GET_GUEST_REGS(%rdi)
    7.29 +        movq  %rsp,%rsi
    7.30          movl  $UREGS_kernel_sizeof/8,%ecx
    7.31 -1:      popq  %rax
    7.32 -        movq  %rax,(%rbx)
    7.33 -        addq  $8,%rbx
    7.34 -        loop  1b
    7.35 -        subq  $UREGS_kernel_sizeof,%rbx
    7.36 -        movq  %rbx,%rsp
    7.37 -nmi_in_hypervisor_mode:
    7.38 -        movq  %rsp,%rdi
    7.39 -        call  do_nmi
    7.40 +        movq  %rdi,%rsp
    7.41 +        rep   movsq
    7.42 +1:      movq  %rsp,%rdi
    7.43 +        movl  UREGS_entry_vector(%rsp),%eax
    7.44 +        leaq  exception_table(%rip),%rdx
    7.45 +        callq *(%rdx,%rax,8)
    7.46          jmp   ret_from_intr
    7.47  
    7.48 +ENTRY(nmi)
    7.49 +        pushq $0
    7.50 +        movl  $TRAP_nmi,4(%rsp)
    7.51 +        jmp   handle_ist_exception
    7.52 +
    7.53 +ENTRY(machine_check)
    7.54 +        pushq $0
    7.55 +        movl  $TRAP_machine_check,4(%rsp)
    7.56 +        jmp   handle_ist_exception
    7.57 +
    7.58  .data
    7.59  
    7.60  ENTRY(exception_table)
    7.61          .quad do_divide_error
    7.62          .quad do_debug
    7.63 -        .quad 0 # nmi
    7.64 +        .quad do_nmi
    7.65          .quad do_int3
    7.66          .quad do_overflow
    7.67          .quad do_bounds
     8.1 --- a/xen/arch/x86/x86_64/traps.c	Thu Jun 21 14:03:57 2007 +0100
     8.2 +++ b/xen/arch/x86/x86_64/traps.c	Thu Jun 21 15:10:04 2007 +0100
     8.3 @@ -294,8 +294,9 @@ void __init percpu_traps_init(void)
     8.4      {
     8.5          /* Specify dedicated interrupt stacks for NMIs and double faults. */
     8.6          set_intr_gate(TRAP_double_fault, &double_fault);
     8.7 -        idt_table[TRAP_double_fault].a |= 1UL << 32; /* IST1 */
     8.8 -        idt_table[TRAP_nmi].a          |= 2UL << 32; /* IST2 */
     8.9 +        idt_table[TRAP_double_fault].a  |= 1UL << 32; /* IST1 */
    8.10 +        idt_table[TRAP_nmi].a           |= 2UL << 32; /* IST2 */
    8.11 +        idt_table[TRAP_machine_check].a |= 3UL << 32; /* IST3 */
    8.12  
    8.13          /*
    8.14           * The 32-on-64 hypercall entry vector is only accessible from ring 1.
    8.15 @@ -310,7 +311,10 @@ void __init percpu_traps_init(void)
    8.16      stack_bottom = (char *)get_stack_bottom();
    8.17      stack        = (char *)((unsigned long)stack_bottom & ~(STACK_SIZE - 1));
    8.18  
    8.19 -    /* Double-fault handler has its own per-CPU 2kB stack. */
    8.20 +    /* Machine Check handler has its own per-CPU 1kB stack. */
    8.21 +    init_tss[cpu].ist[2] = (unsigned long)&stack[1024];
    8.22 +
    8.23 +    /* Double-fault handler has its own per-CPU 1kB stack. */
    8.24      init_tss[cpu].ist[0] = (unsigned long)&stack[2048];
    8.25  
    8.26      /* NMI handler has its own per-CPU 1kB stack. */
     9.1 --- a/xen/include/asm-x86/hvm/hvm.h	Thu Jun 21 14:03:57 2007 +0100
     9.2 +++ b/xen/include/asm-x86/hvm/hvm.h	Thu Jun 21 15:10:04 2007 +0100
     9.3 @@ -302,4 +302,11 @@ static inline int hvm_event_injection_fa
     9.4      return hvm_funcs.event_injection_faulted(v);
     9.5  }
     9.6  
     9.7 +/* These bits in the CR4 are owned by the host */
     9.8 +#define HVM_CR4_HOST_MASK (mmu_cr4_features & \
     9.9 +    (X86_CR4_VMXE | X86_CR4_PAE | X86_CR4_MCE))
    9.10 +
    9.11 +/* These exceptions must always be intercepted. */
    9.12 +#define HVM_TRAP_MASK (1U << TRAP_machine_check)
    9.13 +
    9.14  #endif /* __ASM_X86_HVM_HVM_H__ */
    10.1 --- a/xen/include/asm-x86/hvm/svm/vmcb.h	Thu Jun 21 14:03:57 2007 +0100
    10.2 +++ b/xen/include/asm-x86/hvm/svm/vmcb.h	Thu Jun 21 15:10:04 2007 +0100
    10.3 @@ -464,14 +464,6 @@ void svm_destroy_vmcb(struct vcpu *v);
    10.4  
    10.5  void setup_vmcb_dump(void);
    10.6  
    10.7 -/* These bits in the CR4 are owned by the host */
    10.8 -#if CONFIG_PAGING_LEVELS >= 3
    10.9 -#define SVM_CR4_HOST_MASK (X86_CR4_PAE)
   10.10 -#else
   10.11 -#define SVM_CR4_HOST_MASK 0
   10.12 -#endif
   10.13 -
   10.14 -
   10.15  #endif /* ASM_X86_HVM_SVM_VMCS_H__ */
   10.16  
   10.17  /*
    11.1 --- a/xen/include/asm-x86/hvm/trace.h	Thu Jun 21 14:03:57 2007 +0100
    11.2 +++ b/xen/include/asm-x86/hvm/trace.h	Thu Jun 21 15:10:04 2007 +0100
    11.3 @@ -21,6 +21,7 @@
    11.4  #define DO_TRC_HVM_CPUID       1
    11.5  #define DO_TRC_HVM_INTR        1
    11.6  #define DO_TRC_HVM_NMI         1
    11.7 +#define DO_TRC_HVM_MCE         1
    11.8  #define DO_TRC_HVM_SMI         1
    11.9  #define DO_TRC_HVM_VMMCALL     1
   11.10  #define DO_TRC_HVM_HLT         1
    12.1 --- a/xen/include/asm-x86/hvm/vmx/vmx.h	Thu Jun 21 14:03:57 2007 +0100
    12.2 +++ b/xen/include/asm-x86/hvm/vmx/vmx.h	Thu Jun 21 15:10:04 2007 +0100
    12.3 @@ -143,13 +143,6 @@ void vmx_vlapic_msr_changed(struct vcpu 
    12.4  #define X86_SEG_AR_GRANULARITY  (1u << 15) /* 15, granularity */
    12.5  #define X86_SEG_AR_SEG_UNUSABLE (1u << 16) /* 16, segment unusable */
    12.6  
    12.7 -/* These bits in the CR4 are owned by the host */
    12.8 -#if CONFIG_PAGING_LEVELS >= 3
    12.9 -#define VMX_CR4_HOST_MASK (X86_CR4_VMXE | X86_CR4_PAE)
   12.10 -#else
   12.11 -#define VMX_CR4_HOST_MASK (X86_CR4_VMXE)
   12.12 -#endif
   12.13 -
   12.14  #define VMCALL_OPCODE   ".byte 0x0f,0x01,0xc1\n"
   12.15  #define VMCLEAR_OPCODE  ".byte 0x66,0x0f,0xc7\n"        /* reg/opcode: /6 */
   12.16  #define VMLAUNCH_OPCODE ".byte 0x0f,0x01,0xc2\n"
    13.1 --- a/xen/include/asm-x86/processor.h	Thu Jun 21 14:03:57 2007 +0100
    13.2 +++ b/xen/include/asm-x86/processor.h	Thu Jun 21 15:10:04 2007 +0100
    13.3 @@ -566,7 +566,8 @@ void compat_show_guest_stack(struct cpu_
    13.4  extern void mtrr_ap_init(void);
    13.5  extern void mtrr_bp_init(void);
    13.6  
    13.7 -extern void mcheck_init(struct cpuinfo_x86 *c);
    13.8 +void mcheck_init(struct cpuinfo_x86 *c);
    13.9 +asmlinkage void do_machine_check(struct cpu_user_regs *regs);
   13.10  
   13.11  int cpuid_hypervisor_leaves(
   13.12      uint32_t idx, uint32_t *eax, uint32_t *ebx, uint32_t *ecx, uint32_t *edx);
    14.1 --- a/xen/include/public/trace.h	Thu Jun 21 14:03:57 2007 +0100
    14.2 +++ b/xen/include/public/trace.h	Thu Jun 21 15:10:04 2007 +0100
    14.3 @@ -88,6 +88,7 @@
    14.4  #define TRC_HVM_VMMCALL         (TRC_HVM_HANDLER + 0x12)
    14.5  #define TRC_HVM_HLT             (TRC_HVM_HANDLER + 0x13)
    14.6  #define TRC_HVM_INVLPG          (TRC_HVM_HANDLER + 0x14)
    14.7 +#define TRC_HVM_MCE             (TRC_HVM_HANDLER + 0x15)
    14.8  
    14.9  /* This structure represents a single trace buffer record. */
   14.10  struct t_rec {