ia64/xen-unstable

changeset 3290:b9ab4345fd1b

bitkeeper revision 1.1159.1.483 (41c0c417XYObowWqbfqU0cdLx30C9w)

Initial Intel VMX changes to support unmodified Linux guests on Intel's VT p
latform.
author iap10@labyrinth.cl.cam.ac.uk
date Wed Dec 15 23:09:11 2004 +0000 (2004-12-15)
parents 86e3ae8351cd
children 37cb59b9ddfd
files .rootkeys docs/misc/VMX_changes.txt xen/arch/x86/domain.c xen/arch/x86/io_apic.c xen/arch/x86/setup.c xen/arch/x86/shadow.c xen/arch/x86/time.c xen/arch/x86/vmx.c xen/arch/x86/vmx_io.c xen/arch/x86/vmx_vmcs.c xen/arch/x86/x86_32/entry.S xen/common/event_channel.c xen/common/kernel.c xen/common/softirq.c xen/include/asm-x86/config.h xen/include/asm-x86/cpufeature.h xen/include/asm-x86/e820.h xen/include/asm-x86/mm.h xen/include/asm-x86/msr.h xen/include/asm-x86/processor.h xen/include/asm-x86/shadow.h xen/include/asm-x86/vmx.h xen/include/asm-x86/vmx_cpu.h xen/include/asm-x86/vmx_platform.h xen/include/asm-x86/vmx_vmcs.h xen/include/public/arch-x86_32.h xen/include/public/io/ioreq.h xen/include/xen/sched.h xen/include/xen/types.h
line diff
     1.1 --- a/.rootkeys	Wed Dec 15 18:19:36 2004 +0000
     1.2 +++ b/.rootkeys	Wed Dec 15 23:09:11 2004 +0000
     1.3 @@ -15,6 +15,7 @@ 3f9e7d53iC47UnlfORp9iC1vai6kWw docs/Make
     1.4  4187c1c7IWmBinGdI19kL4MuZ6RLbQ docs/check_pkgs
     1.5  3f9e7d60PWZJeVh5xdnk0nLUdxlqEA docs/figs/xenlogo.eps
     1.6  418a3248xjIqmNKo0v_XQSfAvlBGFw docs/html.sty
     1.7 +41c0c4116itF389v0CEWcmzue6zJkA docs/misc/VMX_changes.txt
     1.8  4022a73cgxX1ryj1HgS-IwwB6NUi2A docs/misc/XenDebugger-HOWTO
     1.9  412f4bd9sm5mCQ8BkrgKcAKZGadq7Q docs/misc/blkif-drivers-explained.txt
    1.10  40d6ccbfKKBq8jE0ula4eHEzBiQuDA docs/misc/xen_config.html
    1.11 @@ -698,6 +699,9 @@ 3ddb79bcfUN3-UBCPzX26IU8bq-3aw xen/arch/
    1.12  3ddb79bc-Udq7ol-NX4q9XsYnN7A2Q xen/arch/x86/time.c
    1.13  3ddb79bccYVzXZJyVaxuv5T42Z1Fsw xen/arch/x86/trampoline.S
    1.14  3ddb79bcOftONV9h4QCxXOfiT0h91w xen/arch/x86/traps.c
    1.15 +41c0c411tD3C7TpfDMiFTf7BaNd_Dg xen/arch/x86/vmx.c
    1.16 +41c0c411ODt8uEmV-yUxpQLpqimE5Q xen/arch/x86/vmx_io.c
    1.17 +41c0c4128URE0dxcO15JME_MuKBPfg xen/arch/x86/vmx_vmcs.c
    1.18  419cbedeQDg8IrO3izo3o5rQNlo0kQ xen/arch/x86/x86_32/asm-offsets.c
    1.19  3e32af9aRnYGl4GMOaDKp7JdfhOGhg xen/arch/x86/x86_32/domain_page.c
    1.20  3ddb79bcecupHj56ZbTa3B0FxDowMg xen/arch/x86/x86_32/entry.S
    1.21 @@ -808,6 +812,7 @@ 3ddb79c3r9-31dIsewPV3P3i8HALsQ xen/inclu
    1.22  3ddb79c34BFiXjBJ_cCKB0aCsV1IDw xen/include/asm-x86/desc.h
    1.23  40715b2dTokMLYGSuD58BnxOqyWVew xen/include/asm-x86/div64.h
    1.24  3e20b82fl1jmQiKdLy7fxMcutfpjWA xen/include/asm-x86/domain_page.h
    1.25 +41c0c412Ufq5sAvri3dMHC1BXiO6Gw xen/include/asm-x86/e820.h
    1.26  3ddb79c3NU8Zy40OTrq3D-i30Y3t4A xen/include/asm-x86/fixmap.h
    1.27  3e2d29944GI24gf7vOP_7x8EyuqxeA xen/include/asm-x86/flushtlb.h
    1.28  3ddb79c39o75zPP0T1aQQ4mNrCAN2w xen/include/asm-x86/hardirq.h
    1.29 @@ -837,6 +842,10 @@ 40e1966akOHWvvunCED7x3HPv35QvQ xen/inclu
    1.30  3ddb79c3ezddh34MdelJpa5tNR00Dw xen/include/asm-x86/system.h
    1.31  3ddb79c4HugMq7IYGxcQKFBpKwKhzA xen/include/asm-x86/types.h
    1.32  40cf1596saFaHD5DC5zvrSn7CDCWGQ xen/include/asm-x86/uaccess.h
    1.33 +41c0c412k6GHYF3cJtDdw37ee3TVaw xen/include/asm-x86/vmx.h
    1.34 +41c0c412hck3QX-6_MaXaISGkngQuA xen/include/asm-x86/vmx_cpu.h
    1.35 +41c0c41243jC1mcArZx_t3YkBL4lTA xen/include/asm-x86/vmx_platform.h
    1.36 +41c0c412lQ0NVVN9PsOSznQ-qhOiPA xen/include/asm-x86/vmx_vmcs.h
    1.37  418fbcfe_WliJPToeVM-9VStvym-hw xen/include/asm-x86/x86_32/asm_defns.h
    1.38  3ddb79c2ADvRmdexd9y3AYK9_NTx-Q xen/include/asm-x86/x86_32/current.h
    1.39  3ddb79c3mbqEM7QQr3zVq7NiBNhouA xen/include/asm-x86/x86_32/regs.h
    1.40 @@ -857,6 +866,7 @@ 403cd194j2pyLqXD8FJ-ukvZzkPenw xen/inclu
    1.41  4121d149udGfSUGhn3k1ECz0bM31nQ xen/include/public/grant_table.h
    1.42  40f5623bqoi4GEoBiiUc6TZk1HjsMg xen/include/public/io/blkif.h
    1.43  40dc4076pVeE1kEEWzcUaNZin65kCA xen/include/public/io/domain_controller.h
    1.44 +41c0c412FLc0gunlJl91qMYscFtXVA xen/include/public/io/ioreq.h
    1.45  40f5623cTZ80EwjWUBlh44A9F9i_Lg xen/include/public/io/netif.h
    1.46  4051db79512nOCGweabrFWO2M2h5ng xen/include/public/physdev.h
    1.47  40589968wmhPmV5-ENbBYmMjnedgKw xen/include/public/sched_ctl.h
     2.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
     2.2 +++ b/docs/misc/VMX_changes.txt	Wed Dec 15 23:09:11 2004 +0000
     2.3 @@ -0,0 +1,90 @@
     2.4 +Changes to Xen in support of Intel(R) Vanderpool Technology
     2.5 +-------------------------------------------------------------
     2.6 +
     2.7 +Our VT extensions to the Xen hypervisor provide full platform
     2.8 +virtualization, including CPU(s), memory, and I/O infrastructure. The
     2.9 +generic code in Xen handles and schedules those virtual machines as it
    2.10 +does for the existing para-virtualized domains.
    2.11 +
    2.12 +Full virtualization required by the OS guests requires full device
    2.13 +virtualization as well. The device models in BOCHS
    2.14 +(http://bochs.sourceforge.net/) were decoupled from the CPU
    2.15 +virtualization, and are used to virtualize the legacy devices (such as
    2.16 +keyboard, mouse, VGA, IDE) in the PC platform. At this point, the
    2.17 +device models run in user mode on domain 0, not in the Xen hypervisor.
    2.18 +
    2.19 +We would like to thank Ian Pratt and Keir Fraser for reviewing our
    2.20 +design and code intensively, and for providing numerous useful
    2.21 +suggestions to improve the architecture and code. 
    2.22 +
    2.23 +We have a list of Intel team members who take credit for making this
    2.24 +release happen: Yunhong Jiang, Nitin Kamble, Chengyuan Li, Xin Li,
    2.25 +Xiaofeng Ling, Benjamin Liu, Asit Mallick, Jun Nakajima, Sunil Saxena,
    2.26 +Arun Sharma, Edwin Zhai, Jeff Zheng, and Louis Zhuang. We'll continue
    2.27 +to add more features to complete full virtualization in Xen using VT.
    2.28 +
    2.29 +The notes document the changes to the Xen hypervisor in order to add
    2.30 +VT support. The changes to other areas, such as Control Panel will be
    2.31 +added as we deliver the code.
    2.32 +
    2.33 +Summary of changes for the first release
    2.34 +----------------------------------------
    2.35 +December 15, 2004
    2.36 +
    2.37 +    * VT specific event handling and domain management were added. 
    2.38 +
    2.39 +    * Shadow mode was extended to support full 32-bit guests
    2.40 +    
    2.41 +    * Domain switching code was extended to support VT domain
    2.42 +    
    2.43 +    * I/O request handling was added to communicate with the device model
    2.44 +
    2.45 +    * Domain builder was extended to provide the environment when the
    2.46 +      guest enters the protected mode, including E820 memory and VGA
    2.47 +      info, typically obtained by BIOS calls.
    2.48 +
    2.49 +New code:
    2.50 +---------
    2.51 +    VT (Vanderpool Technology) is based on the new VMX (Virtual
    2.52 +    Machine Extensions) architecture. The current release of the
    2.53 +    software supports 32-bit only.
    2.54 +
    2.55 +    * arch/x86/vmx.[ch] and arch/x86/vmx_*.[ch]: created to handle
    2.56 +      VMX-specific events in order to provide virtual machine.
    2.57 +
    2.58 +    * arch/x86/x86_32/entry.S: new code path was added to have the
    2.59 +      first-level handler from VM exits. The first-level handler calls
    2.60 +      the second-level handler in arch/x86/vmx.c.
    2.61 +
    2.62 +    * arch/x86/setup.c: new function start_vmx() to init_intel() to
    2.63 +      enable VMX mode.
    2.64 +
    2.65 +    * include/asm-x86/config.h: #ifdef CONFIG_VMX was added.
    2.66 +
    2.67 +    * arch/x86/domain.c: new code patch was added to create a VMX
    2.68 +      domain given the flag from the control panel.
    2.69 +
    2.70 +    * include/public/io/ioreq.h: A new data structure was added to
    2.71 +      define the I/O requests between the Xen hypervisor and the
    2.72 +      device models.
    2.73 +
    2.74 +Changes to the existing code:
    2.75 +-----------------------------
    2.76 +
    2.77 +    * arch/x86/shadow.[ch]: new mode SHM_full_32 was added to support
    2.78 +      full virtualization. The current Xen code assumes that the guest
    2.79 +      page directory and tables have _machine_ (or host) physical page
    2.80 +      frame numbers, and the new code allows to support _guest_
    2.81 +      physical page frame numbers
    2.82 +
    2.83 +    * include/asm-x86/processor.h: struct arch_vmx_struct arch_vmx has
    2.84 +      been added to the thread_struct data structure. The arch_vmx has
    2.85 +      the addtional VMX-related CPU context.
    2.86 +
    2.87 +    * arch/x86/io_apic.c: reverse mapping between vector and irq has
    2.88 +      been added. We will revisit this code when considering MSI
    2.89 +      support.
    2.90 +
    2.91 +--- Jun
    2.92 +
    2.93 +
     3.1 --- a/xen/arch/x86/domain.c	Wed Dec 15 18:19:36 2004 +0000
     3.2 +++ b/xen/arch/x86/domain.c	Wed Dec 15 23:09:11 2004 +0000
     3.3 @@ -32,6 +32,10 @@
     3.4  #include <asm/shadow.h>
     3.5  #include <xen/console.h>
     3.6  #include <xen/elf.h>
     3.7 +#include <asm/vmx.h>
     3.8 +#include <asm/vmx_vmcs.h>
     3.9 +#include <xen/kernel.h>
    3.10 +#include <public/io/ioreq.h>
    3.11  #include <xen/multicall.h>
    3.12  
    3.13  #if !defined(CONFIG_X86_64BITMODE)
    3.14 @@ -158,6 +162,9 @@ void machine_restart(char * __unused)
    3.15      smp_send_stop();
    3.16      disable_IO_APIC();
    3.17  #endif
    3.18 +#ifdef CONFIG_VMX
    3.19 +    stop_vmx();
    3.20 +#endif
    3.21  
    3.22      if(!reboot_thru_bios) {
    3.23          /* rebooting needs to touch the page at absolute addr 0 */
    3.24 @@ -239,6 +246,97 @@ void arch_do_createdomain(struct exec_do
    3.25      }
    3.26  }
    3.27  
    3.28 +#ifdef CONFIG_VMX
    3.29 +void arch_vmx_do_resume(struct exec_domain *d) 
    3.30 +{
    3.31 +    vmx_do_resume(d);
    3.32 +    reset_stack_and_jump(vmx_asm_do_resume);
    3.33 +}
    3.34 +
    3.35 +void arch_vmx_do_launch(struct exec_domain *d) 
    3.36 +{
    3.37 +    vmx_do_launch(d);
    3.38 +    reset_stack_and_jump(vmx_asm_do_launch);
    3.39 +}
    3.40 +
    3.41 +static void monitor_mk_pagetable(struct exec_domain *ed)
    3.42 +{
    3.43 +    unsigned long mpfn;
    3.44 +    l2_pgentry_t *mpl2e;
    3.45 +    struct pfn_info *mpfn_info;
    3.46 +    struct mm_struct *m = &ed->mm;
    3.47 +    struct domain *d = ed->domain;
    3.48 +
    3.49 +    mpfn_info = alloc_domheap_page(NULL);
    3.50 +    ASSERT( mpfn_info ); 
    3.51 +
    3.52 +    mpfn = (unsigned long) (mpfn_info - frame_table);
    3.53 +    mpl2e = (l2_pgentry_t *) map_domain_mem(mpfn << PAGE_SHIFT);
    3.54 +    memset(mpl2e, 0, PAGE_SIZE);
    3.55 +
    3.56 +    memcpy(&mpl2e[DOMAIN_ENTRIES_PER_L2_PAGETABLE], 
    3.57 +           &idle_pg_table[DOMAIN_ENTRIES_PER_L2_PAGETABLE],
    3.58 +           HYPERVISOR_ENTRIES_PER_L2_PAGETABLE * sizeof(l2_pgentry_t));
    3.59 +
    3.60 +    m->monitor_table = mk_pagetable(mpfn << PAGE_SHIFT);
    3.61 +    m->shadow_mode = SHM_full_32;
    3.62 +
    3.63 +    mpl2e[PERDOMAIN_VIRT_START >> L2_PAGETABLE_SHIFT] =
    3.64 +        mk_l2_pgentry((__pa(d->mm_perdomain_pt) & PAGE_MASK) 
    3.65 +                      | __PAGE_HYPERVISOR);
    3.66 +
    3.67 +    unmap_domain_mem(mpl2e);
    3.68 +}
    3.69 +
    3.70 +static int vmx_final_setup_guestos(struct exec_domain *d,
    3.71 +                                   full_execution_context_t *full_context)
    3.72 +{
    3.73 +    int error;
    3.74 +    execution_context_t *context;
    3.75 +    struct vmcs_struct *vmcs;
    3.76 +    unsigned long guest_pa;
    3.77 +
    3.78 +    context = &full_context->cpu_ctxt;
    3.79 +
    3.80 +    /*
    3.81 +     * Create a new VMCS
    3.82 +     */
    3.83 +    if (!(vmcs = alloc_vmcs())) {
    3.84 +        printk("Failed to create a new VMCS\n");
    3.85 +        return -ENOMEM;
    3.86 +    }
    3.87 +
    3.88 +    memset(&d->thread.arch_vmx, 0, sizeof (struct arch_vmx_struct));
    3.89 +
    3.90 +    d->thread.arch_vmx.vmcs = vmcs;
    3.91 +    error = construct_vmcs(&d->thread.arch_vmx, context, full_context, VMCS_USE_HOST_ENV);
    3.92 +    if (error < 0) {
    3.93 +        printk("Failed to construct a new VMCS\n");
    3.94 +        goto out;
    3.95 +    }
    3.96 +
    3.97 +    monitor_mk_pagetable(d);
    3.98 +    guest_pa = pagetable_val(d->mm.pagetable);
    3.99 +    clear_bit(VMX_CPU_STATE_PG_ENABLED, &d->thread.arch_vmx.cpu_state);
   3.100 +
   3.101 +    d->thread.arch_vmx.vmx_platform.real_mode_data = 
   3.102 +        (unsigned long *) context->esi;
   3.103 +
   3.104 +    memset(&d->domain->shared_info->evtchn_mask[0], 0xff, 
   3.105 +           sizeof(d->domain->shared_info->evtchn_mask));
   3.106 +    clear_bit(IOPACKET_PORT, &d->domain->shared_info->evtchn_mask[0]);
   3.107 +
   3.108 +    d->thread.schedule_tail = arch_vmx_do_launch;
   3.109 +
   3.110 +    return 0;
   3.111 +
   3.112 +out:
   3.113 +    free_vmcs(vmcs);
   3.114 +    d->thread.arch_vmx.vmcs = 0;
   3.115 +    return error;
   3.116 +}
   3.117 +#endif
   3.118 +
   3.119  int arch_final_setup_guestos(struct exec_domain *d, full_execution_context_t *c)
   3.120  {
   3.121      unsigned long phys_basetab;
   3.122 @@ -310,6 +408,11 @@ int arch_final_setup_guestos(struct exec
   3.123          }
   3.124      }
   3.125  
   3.126 +#ifdef CONFIG_VMX
   3.127 +    if (c->flags & ECF_VMX_GUEST)
   3.128 +        return vmx_final_setup_guestos(d, c);
   3.129 +#endif
   3.130 +
   3.131      return 0;
   3.132  }
   3.133  
   3.134 @@ -356,7 +459,8 @@ void switch_to(struct exec_domain *prev_
   3.135      struct tss_struct *tss = init_tss + smp_processor_id();
   3.136      execution_context_t *stack_ec = get_execution_context();
   3.137      int i;
   3.138 -    
   3.139 +    unsigned long vmx_domain = next_p->thread.arch_vmx.flags; 
   3.140 +
   3.141      __cli();
   3.142  
   3.143      /* Switch guest general-register state. */
   3.144 @@ -375,12 +479,6 @@ void switch_to(struct exec_domain *prev_
   3.145                 &next_p->thread.user_ctxt,
   3.146                 sizeof(*stack_ec));
   3.147  
   3.148 -        SET_FAST_TRAP(&next_p->thread);
   3.149 -
   3.150 -        /* Switch the guest OS ring-1 stack. */
   3.151 -        tss->esp1 = next->guestos_sp;
   3.152 -        tss->ss1  = next->guestos_ss;
   3.153 -
   3.154          /* Maybe switch the debug registers. */
   3.155          if ( unlikely(next->debugreg[7]) )
   3.156          {
   3.157 @@ -393,6 +491,24 @@ void switch_to(struct exec_domain *prev_
   3.158              loaddebug(next, 7);
   3.159          }
   3.160  
   3.161 +         if (vmx_domain) {
   3.162 +            /* Switch page tables. */
   3.163 +            write_ptbase(&next_p->mm);
   3.164 + 
   3.165 +            set_current(next_p);
   3.166 +            /* Switch GDT and LDT. */
   3.167 +            __asm__ __volatile__ ("lgdt %0" : "=m" (*next_p->mm.gdt));
   3.168 +
   3.169 +            __sti();
   3.170 +            return;
   3.171 +         }
   3.172 + 
   3.173 +        SET_FAST_TRAP(&next_p->thread);
   3.174 +
   3.175 +        /* Switch the guest OS ring-1 stack. */
   3.176 +        tss->esp1 = next->guestos_sp;
   3.177 +        tss->ss1  = next->guestos_ss;
   3.178 +
   3.179          /* Switch page tables. */
   3.180          write_ptbase(&next_p->mm);
   3.181      }
     4.1 --- a/xen/arch/x86/io_apic.c	Wed Dec 15 18:19:36 2004 +0000
     4.2 +++ b/xen/arch/x86/io_apic.c	Wed Dec 15 23:09:11 2004 +0000
     4.3 @@ -615,6 +615,10 @@ static inline int IO_APIC_irq_trigger(in
     4.4  
     4.5  int irq_vector[NR_IRQS] = { FIRST_DEVICE_VECTOR , 0 };
     4.6  
     4.7 +#ifdef CONFIG_VMX
     4.8 +int vector_irq[256];
     4.9 +#endif
    4.10 +
    4.11  static int __init assign_irq_vector(int irq)
    4.12  {
    4.13  	static int current_vector = FIRST_DEVICE_VECTOR, offset = 0;
    4.14 @@ -637,6 +641,10 @@ next:
    4.15  		panic("ran out of interrupt sources!");
    4.16  
    4.17  	IO_APIC_VECTOR(irq) = current_vector;
    4.18 +#ifdef CONFIG_VMX
    4.19 +        vector_irq[current_vector] = irq;
    4.20 +        printk("vector_irq[%x] = %d\n", current_vector, irq);
    4.21 +#endif
    4.22  	return current_vector;
    4.23  }
    4.24  
     5.1 --- a/xen/arch/x86/setup.c	Wed Dec 15 18:19:36 2004 +0000
     5.2 +++ b/xen/arch/x86/setup.c	Wed Dec 15 23:09:11 2004 +0000
     5.3 @@ -146,6 +146,11 @@ static void __init init_intel(struct cpu
     5.4          }
     5.5      }
     5.6  #endif
     5.7 +
     5.8 +#ifdef CONFIG_VMX
     5.9 +    start_vmx();
    5.10 +#endif
    5.11 +
    5.12  }
    5.13  
    5.14  static void __init init_amd(struct cpuinfo_x86 *c)
     6.1 --- a/xen/arch/x86/shadow.c	Wed Dec 15 18:19:36 2004 +0000
     6.2 +++ b/xen/arch/x86/shadow.c	Wed Dec 15 23:09:11 2004 +0000
     6.3 @@ -120,7 +120,10 @@ static inline int clear_shadow_page(
     6.4          /* We clear L2 pages by zeroing the guest entries. */
     6.5      case PGT_l2_page_table:
     6.6          p = map_domain_mem((spage - frame_table) << PAGE_SHIFT);
     6.7 -        memset(p, 0, DOMAIN_ENTRIES_PER_L2_PAGETABLE * sizeof(*p));
     6.8 +        if (m->shadow_mode == SHM_full_32)
     6.9 +            memset(p, 0, ENTRIES_PER_L2_PAGETABLE * sizeof(*p));
    6.10 +        else 
    6.11 +            memset(p, 0, DOMAIN_ENTRIES_PER_L2_PAGETABLE * sizeof(*p));
    6.12          unmap_domain_mem(p);
    6.13          break;
    6.14  
    6.15 @@ -433,12 +436,24 @@ void unshadow_table(unsigned long gpfn, 
    6.16      free_shadow_page(&d->exec_domain[0]->mm, &frame_table[spfn]);
    6.17  }
    6.18  
    6.19 +#ifdef CONFIG_VMX
    6.20 +void vmx_shadow_clear_state(struct mm_struct *m) 
    6.21 +{
    6.22 +    SH_VVLOG("vmx_clear_shadow_state: \n");
    6.23 +    clear_shadow_state(m);
    6.24 +}
    6.25 +#endif
    6.26 +
    6.27 +
    6.28  unsigned long shadow_l2_table( 
    6.29      struct mm_struct *m, unsigned long gpfn)
    6.30  {
    6.31      struct pfn_info *spfn_info;
    6.32      unsigned long    spfn;
    6.33 -    l2_pgentry_t    *spl2e;
    6.34 +    l2_pgentry_t    *spl2e = 0, *gpl2e;
    6.35 +    unsigned long guest_gpfn;
    6.36 +
    6.37 +    __get_machine_to_phys(m, guest_gpfn, gpfn);
    6.38  
    6.39      SH_VVLOG("shadow_l2_table( %08lx )", gpfn);
    6.40  
    6.41 @@ -451,33 +466,41 @@ unsigned long shadow_l2_table(
    6.42      perfc_incr(shadow_l2_pages);
    6.43  
    6.44      spfn = spfn_info - frame_table;
    6.45 -
    6.46 -    /* Mark pfn as being shadowed; update field to point at shadow. */
    6.47 -    set_shadow_status(m, gpfn, spfn | PSH_shadowed);
    6.48 +  /* Mark pfn as being shadowed; update field to point at shadow. */
    6.49 +    set_shadow_status(m, guest_gpfn, spfn | PSH_shadowed);
    6.50   
    6.51 -    spl2e = (l2_pgentry_t *)map_domain_mem(spfn << PAGE_SHIFT);
    6.52 -
    6.53 -    /*
    6.54 -     * We could proactively fill in PDEs for pages that are already shadowed.
    6.55 -     * However, we tried it and it didn't help performance. This is simpler.
    6.56 -     */
    6.57 -    memset(spl2e, 0, DOMAIN_ENTRIES_PER_L2_PAGETABLE * sizeof(l2_pgentry_t));
    6.58 -
    6.59  #ifdef __i386__
    6.60      /* Install hypervisor and 2x linear p.t. mapings. */
    6.61 -    memcpy(&spl2e[DOMAIN_ENTRIES_PER_L2_PAGETABLE], 
    6.62 -           &idle_pg_table[DOMAIN_ENTRIES_PER_L2_PAGETABLE],
    6.63 -           HYPERVISOR_ENTRIES_PER_L2_PAGETABLE * sizeof(l2_pgentry_t));
    6.64 -    spl2e[LINEAR_PT_VIRT_START >> L2_PAGETABLE_SHIFT] =
    6.65 -        mk_l2_pgentry((gpfn << PAGE_SHIFT) | __PAGE_HYPERVISOR);
    6.66 -    spl2e[SH_LINEAR_PT_VIRT_START >> L2_PAGETABLE_SHIFT] =
    6.67 -        mk_l2_pgentry((spfn << PAGE_SHIFT) | __PAGE_HYPERVISOR);
    6.68 -    spl2e[PERDOMAIN_VIRT_START >> L2_PAGETABLE_SHIFT] =
    6.69 -        mk_l2_pgentry(__pa(frame_table[gpfn].u.inuse.domain->mm_perdomain_pt) |
    6.70 -                      __PAGE_HYPERVISOR);
    6.71 +    if (m->shadow_mode == SHM_full_32) 
    6.72 +        vmx_update_shadow_state(m, gpfn, spfn);
    6.73 +    else {
    6.74 +        spl2e = (l2_pgentry_t *)map_domain_mem(spfn << PAGE_SHIFT);
    6.75 +        // can't use the linear map as we may not be in the right PT
    6.76 +        gpl2e = (l2_pgentry_t *) map_domain_mem(gpfn << PAGE_SHIFT);
    6.77 +        /*
    6.78 +         * We could proactively fill in PDEs for pages that are already shadowed.
    6.79 +         * However, we tried it and it didn't help performance. This is simpler.
    6.80 +         */
    6.81 +        memset(spl2e, 0, DOMAIN_ENTRIES_PER_L2_PAGETABLE * sizeof(l2_pgentry_t));
    6.82 +
    6.83 +        /* Install hypervisor and 2x linear p.t. mapings. */
    6.84 +        memcpy(&spl2e[DOMAIN_ENTRIES_PER_L2_PAGETABLE], 
    6.85 +               &idle_pg_table[DOMAIN_ENTRIES_PER_L2_PAGETABLE],
    6.86 +               HYPERVISOR_ENTRIES_PER_L2_PAGETABLE * sizeof(l2_pgentry_t));
    6.87 +        spl2e[LINEAR_PT_VIRT_START >> L2_PAGETABLE_SHIFT] =
    6.88 +            mk_l2_pgentry((gpfn << PAGE_SHIFT) | __PAGE_HYPERVISOR);
    6.89 +        spl2e[SH_LINEAR_PT_VIRT_START >> L2_PAGETABLE_SHIFT] =
    6.90 +            mk_l2_pgentry((spfn << PAGE_SHIFT) | __PAGE_HYPERVISOR);
    6.91 +        spl2e[PERDOMAIN_VIRT_START >> L2_PAGETABLE_SHIFT] =
    6.92 +            mk_l2_pgentry(__pa(frame_table[gpfn].u.inuse.domain->mm_perdomain_pt) |
    6.93 +			  __PAGE_HYPERVISOR);
    6.94 +    }
    6.95  #endif
    6.96  
    6.97 -    unmap_domain_mem(spl2e);
    6.98 +    if (m->shadow_mode != SHM_full_32) 
    6.99 +    {                           
   6.100 +        unmap_domain_mem(spl2e);
   6.101 +    }
   6.102  
   6.103      SH_VLOG("shadow_l2_table( %08lx -> %08lx)", gpfn, spfn);
   6.104      return spfn;
   6.105 @@ -486,13 +509,13 @@ unsigned long shadow_l2_table(
   6.106  static void shadow_map_l1_into_current_l2(unsigned long va)
   6.107  { 
   6.108      struct mm_struct *m = &current->mm;
   6.109 -    unsigned long    *gpl1e, *spl1e, gpde, spde, gl1pfn, sl1pfn, sl1ss;
   6.110 +    unsigned long    *gpl1e, *spl1e, gpl2e, spl2e, gl1pfn, sl1pfn=0, sl1ss;
   6.111      struct pfn_info  *sl1pfn_info;
   6.112      int               i;
   6.113  
   6.114 -    gpde = l2_pgentry_val(linear_l2_table[va >> L2_PAGETABLE_SHIFT]);
   6.115 +    __guest_get_pl2e(m, va, &gpl2e);
   6.116  
   6.117 -    gl1pfn = gpde >> PAGE_SHIFT;
   6.118 +    gl1pfn = gpl2e >> PAGE_SHIFT;
   6.119  
   6.120      sl1ss = __shadow_status(m, gl1pfn);
   6.121      if ( !(sl1ss & PSH_shadowed) )
   6.122 @@ -510,11 +533,10 @@ static void shadow_map_l1_into_current_l
   6.123  
   6.124          set_shadow_status(m, gl1pfn, PSH_shadowed | sl1pfn);
   6.125  
   6.126 -        l2pde_general(m, &gpde, &spde, sl1pfn);
   6.127 +        l2pde_general(m, &gpl2e, &spl2e, sl1pfn);
   6.128  
   6.129 -        linear_l2_table[va>>L2_PAGETABLE_SHIFT] = mk_l2_pgentry(gpde);
   6.130 -        shadow_linear_l2_table[va>>L2_PAGETABLE_SHIFT] =
   6.131 -            mk_l2_pgentry(spde);
   6.132 +        __guest_set_pl2e(m, va, gpl2e);
   6.133 +        __shadow_set_pl2e(m, va, spl2e);
   6.134  
   6.135          gpl1e = (unsigned long *) &(linear_pg_table[
   6.136              (va>>PAGE_SHIFT) & ~(ENTRIES_PER_L1_PAGETABLE-1)]);
   6.137 @@ -531,13 +553,38 @@ static void shadow_map_l1_into_current_l
   6.138          SH_VVLOG("4b: was shadowed, l2 missing ( %08lx )", sl1pfn);
   6.139  
   6.140          sl1pfn = sl1ss & PSH_pfn_mask;
   6.141 -        l2pde_general(m, &gpde, &spde, sl1pfn);
   6.142 -
   6.143 -        linear_l2_table[va >> L2_PAGETABLE_SHIFT] = mk_l2_pgentry(gpde);
   6.144 -        shadow_linear_l2_table[va >> L2_PAGETABLE_SHIFT] = mk_l2_pgentry(spde);
   6.145 +        l2pde_general(m, &gpl2e, &spl2e, sl1pfn);
   6.146 +        __guest_set_pl2e(m, va, gpl2e);
   6.147 +        __shadow_set_pl2e(m, va, spl2e);
   6.148      }              
   6.149  }
   6.150  
   6.151 +#ifdef CONFIG_VMX
   6.152 +void vmx_shadow_invlpg(struct mm_struct *m, unsigned long va)
   6.153 +{
   6.154 +    unsigned long gpte, spte, host_pfn;
   6.155 +
   6.156 +    if (__put_user(0L, (unsigned long *)
   6.157 +                   &shadow_linear_pg_table[va >> PAGE_SHIFT])) {
   6.158 +        vmx_shadow_clear_state(m);
   6.159 +        return;
   6.160 +    }
   6.161 +
   6.162 +    if (__get_user(gpte, (unsigned long *)
   6.163 +                   &linear_pg_table[va >> PAGE_SHIFT])) {
   6.164 +        return;
   6.165 +    }
   6.166 +
   6.167 +    host_pfn = phys_to_machine_mapping[gpte >> PAGE_SHIFT];
   6.168 +    spte = (host_pfn << PAGE_SHIFT) | (gpte & ~PAGE_MASK);
   6.169 +
   6.170 +    if (__put_user(spte, (unsigned long *)
   6.171 +                   &shadow_linear_pg_table[va >> PAGE_SHIFT])) {
   6.172 +        return;
   6.173 +    }
   6.174 +}
   6.175 +#endif
   6.176 +
   6.177  int shadow_fault(unsigned long va, long error_code)
   6.178  {
   6.179      unsigned long gpte, spte;
   6.180 @@ -718,6 +765,9 @@ static int check_pte(
   6.181      int level, int i)
   6.182  {
   6.183      unsigned long mask, gpfn, spfn;
   6.184 +#ifdef CONFIG_VMX
   6.185 +    unsigned long guest_gpfn;
   6.186 +#endif
   6.187  
   6.188      if ( (spte == 0) || (spte == 0xdeadface) || (spte == 0x00000E00) )
   6.189          return 1;  /* always safe */
   6.190 @@ -761,8 +811,20 @@ static int check_pte(
   6.191          if ( level < 2 )
   6.192              FAIL("Shadow in L1 entry?");
   6.193  
   6.194 -        if ( __shadow_status(m, gpfn) != (PSH_shadowed | spfn) )
   6.195 -            FAIL("spfn problem g.sf=%08lx", __shadow_status(m, gpfn));
   6.196 +        if (m->shadow_mode == SHM_full_32) {
   6.197 +
   6.198 +            guest_gpfn = phys_to_machine_mapping[gpfn];
   6.199 +
   6.200 +            if ( __shadow_status(m, guest_gpfn) != (PSH_shadowed | spfn) )
   6.201 +                FAIL("spfn problem g.sf=%08lx", 
   6.202 +                     __shadow_status(m, guest_gpfn) );
   6.203 +            
   6.204 +        } else {
   6.205 +            if ( __shadow_status(m, gpfn) != (PSH_shadowed | spfn) )
   6.206 +                FAIL("spfn problem g.sf=%08lx", 
   6.207 +                     __shadow_status(m, gpfn) );
   6.208 +        }
   6.209 +
   6.210      }
   6.211  
   6.212      return 1;
   6.213 @@ -800,6 +862,7 @@ int check_pagetable(struct mm_struct *m,
   6.214      unsigned long gpfn, spfn;
   6.215      int           i;
   6.216      l2_pgentry_t *gpl2e, *spl2e;
   6.217 +    unsigned long host_gpfn = 0;
   6.218  
   6.219      sh_check_name = s;
   6.220  
   6.221 @@ -809,20 +872,29 @@ int check_pagetable(struct mm_struct *m,
   6.222  
   6.223      gpfn = gptbase >> PAGE_SHIFT;
   6.224  
   6.225 -    if ( !(__shadow_status(m, gpfn) & PSH_shadowed) )
   6.226 +    __get_phys_to_machine(m, host_gpfn, gpfn);
   6.227 +  
   6.228 +    if ( ! (__shadow_status(m, gpfn) & PSH_shadowed) )
   6.229      {
   6.230          printk("%s-PT %08lx not shadowed\n", s, gptbase);
   6.231 -        if ( __shadow_status(m, gpfn) != 0 )
   6.232 -            BUG();
   6.233 -        return 0;
   6.234 -    }
   6.235 +
   6.236 +        if( __shadow_status(m, gpfn) != 0 ) BUG();
   6.237 +            return 0;
   6.238 +    }   
   6.239   
   6.240      spfn = __shadow_status(m, gpfn) & PSH_pfn_mask;
   6.241  
   6.242 -    if ( __shadow_status(m, gpfn) != (PSH_shadowed | spfn) )
   6.243 -        FAILPT("ptbase shadow inconsistent1");
   6.244 +    if ( ! __shadow_status(m, gpfn) == (PSH_shadowed | spfn) )
   6.245 +            FAILPT("ptbase shadow inconsistent1");
   6.246  
   6.247 -    gpl2e = (l2_pgentry_t *) map_domain_mem( gpfn << PAGE_SHIFT );
   6.248 +    if (m->shadow_mode == SHM_full_32) 
   6.249 +    {
   6.250 +        host_gpfn = phys_to_machine_mapping[gpfn];
   6.251 +        gpl2e = (l2_pgentry_t *) map_domain_mem( host_gpfn << PAGE_SHIFT );
   6.252 +
   6.253 +    } else
   6.254 +        gpl2e = (l2_pgentry_t *) map_domain_mem( gpfn << PAGE_SHIFT );
   6.255 +
   6.256      spl2e = (l2_pgentry_t *) map_domain_mem( spfn << PAGE_SHIFT );
   6.257  
   6.258      if ( memcmp(&spl2e[DOMAIN_ENTRIES_PER_L2_PAGETABLE],
   6.259 @@ -830,7 +902,6 @@ int check_pagetable(struct mm_struct *m,
   6.260                  ((SH_LINEAR_PT_VIRT_START >> L2_PAGETABLE_SHIFT) -
   6.261                   DOMAIN_ENTRIES_PER_L2_PAGETABLE) * sizeof(l2_pgentry_t)) )
   6.262      {
   6.263 -        printk("gpfn=%08lx spfn=%08lx\n", gpfn, spfn);
   6.264          for ( i = DOMAIN_ENTRIES_PER_L2_PAGETABLE; 
   6.265                i < (SH_LINEAR_PT_VIRT_START >> L2_PAGETABLE_SHIFT);
   6.266                i++ )
   6.267 @@ -851,11 +922,12 @@ int check_pagetable(struct mm_struct *m,
   6.268                                     L2_PAGETABLE_SHIFT]),
   6.269                 (spfn << PAGE_SHIFT) | __PAGE_HYPERVISOR);
   6.270  
   6.271 -    if ( (l2_pgentry_val(spl2e[PERDOMAIN_VIRT_START >> L2_PAGETABLE_SHIFT]) !=
   6.272 -          ((__pa(frame_table[gpfn].u.inuse.domain->mm.perdomain_pt) | 
   6.273 +    if (m->shadow_mode != SHM_full_32) {
   6.274 +        if ( (l2_pgentry_val(spl2e[PERDOMAIN_VIRT_START >> L2_PAGETABLE_SHIFT]) !=
   6.275 +              ((__pa(frame_table[gpfn].u.inuse.domain->mm.perdomain_pt) | 
   6.276              __PAGE_HYPERVISOR))) )
   6.277 -        FAILPT("hypervisor per-domain map inconsistent");
   6.278 -
   6.279 +            FAILPT("hypervisor per-domain map inconsistent");
   6.280 +    }
   6.281  
   6.282      /* Check the whole L2. */
   6.283      for ( i = 0; i < DOMAIN_ENTRIES_PER_L2_PAGETABLE; i++ )
     7.1 --- a/xen/arch/x86/time.c	Wed Dec 15 18:19:36 2004 +0000
     7.2 +++ b/xen/arch/x86/time.c	Wed Dec 15 23:09:11 2004 +0000
     7.3 @@ -50,7 +50,7 @@ static s_time_t        stime_irq;       
     7.4  static unsigned long   wc_sec, wc_usec; /* UTC time at last 'time update'.   */
     7.5  static rwlock_t        time_lock = RW_LOCK_UNLOCKED;
     7.6  
     7.7 -static void timer_interrupt(int irq, void *dev_id, struct xen_regs *regs)
     7.8 +void timer_interrupt(int irq, void *dev_id, struct xen_regs *regs)
     7.9  {
    7.10      write_lock_irq(&time_lock);
    7.11  
     8.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
     8.2 +++ b/xen/arch/x86/vmx.c	Wed Dec 15 23:09:11 2004 +0000
     8.3 @@ -0,0 +1,913 @@
     8.4 +/*
     8.5 + * vmx.c: handling VMX architecture-related VM exits
     8.6 + * Copyright (c) 2004, Intel Corporation.
     8.7 + *
     8.8 + * This program is free software; you can redistribute it and/or modify it
     8.9 + * under the terms and conditions of the GNU General Public License,
    8.10 + * version 2, as published by the Free Software Foundation.
    8.11 + *
    8.12 + * This program is distributed in the hope it will be useful, but WITHOUT
    8.13 + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
    8.14 + * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
    8.15 + * more details.
    8.16 + *
    8.17 + * You should have received a copy of the GNU General Public License along with
    8.18 + * this program; if not, write to the Free Software Foundation, Inc., 59 Temple
    8.19 + * Place - Suite 330, Boston, MA 02111-1307 USA.
    8.20 + *
    8.21 + */
    8.22 +
    8.23 +#include <xen/config.h>
    8.24 +#include <xen/init.h>
    8.25 +#include <xen/lib.h>
    8.26 +#include <xen/sched.h>
    8.27 +#include <asm/current.h>
    8.28 +#include <asm/io.h>
    8.29 +#include <asm/irq.h>
    8.30 +#include <asm/shadow.h>
    8.31 +#include <asm/regs.h>
    8.32 +#include <asm/cpufeature.h>
    8.33 +#include <asm/processor.h>
    8.34 +#include <asm/types.h>
    8.35 +#include <asm/msr.h>
    8.36 +#include <asm/spinlock.h>
    8.37 +#include <asm/vmx.h>
    8.38 +#include <asm/vmx_vmcs.h>
    8.39 +#include <public/io/ioreq.h>
    8.40 +
    8.41 +int vmcs_size;
    8.42 +unsigned int opt_vmx_debug_level;
    8.43 +
    8.44 +int start_vmx()
    8.45 +{
    8.46 +    struct vmcs_struct *vmcs;
    8.47 +    unsigned long ecx;
    8.48 +    u64 phys_vmcs;      /* debugging */
    8.49 +
    8.50 +    vmcs_size = VMCS_SIZE;
    8.51 +    /*
    8.52 +     * Xen does not fill x86_capability words except 0.
    8.53 +     */
    8.54 +    ecx = cpuid_ecx(1);
    8.55 +    boot_cpu_data.x86_capability[4] = ecx;
    8.56 +
    8.57 +    if (!(test_bit(X86_FEATURE_VMXE, &boot_cpu_data.x86_capability)))
    8.58 +        return 0;
    8.59 +
    8.60 +    set_in_cr4(X86_CR4_VMXE);   /* Enable VMXE */
    8.61 +
    8.62 +    if (!(vmcs = alloc_vmcs())) {
    8.63 +        printk("Failed to allocate VMCS\n");    
    8.64 +        return 0;
    8.65 +    }
    8.66 +
    8.67 +    phys_vmcs = (u64) virt_to_phys(vmcs);
    8.68 +
    8.69 +    if (!(__vmxon(phys_vmcs))) {
    8.70 +        printk("VMXON is done\n");
    8.71 +    }
    8.72 +
    8.73 +    return 1;
    8.74 +}
    8.75 +
    8.76 +void stop_vmx()
    8.77 +{
    8.78 +    if (test_bit(X86_FEATURE_VMXE, &boot_cpu_data.x86_capability))
    8.79 +        __vmxoff();
    8.80 +}
    8.81 +
    8.82 +/*
    8.83 + * Not all cases recevie valid value in the VM-exit instruction length field.
    8.84 + */
    8.85 +#define __get_instruction_length(len) \
    8.86 +    __vmread(INSTRUCTION_LEN, &(len)); \
    8.87 +     if ((len) < 1 || (len) > 15) \
    8.88 +        __vmx_bug(&regs);
    8.89 +
    8.90 +static void inline __update_guest_eip(unsigned long inst_len) 
    8.91 +{
    8.92 +    unsigned long current_eip;
    8.93 +
    8.94 +    __vmread(GUEST_EIP, &current_eip);
    8.95 +    __vmwrite(GUEST_EIP, current_eip + inst_len);
    8.96 +}
    8.97 +
    8.98 +
    8.99 +#include <asm/domain_page.h>
   8.100 +
   8.101 +static int vmx_do_page_fault(unsigned long va, unsigned long error_code) 
   8.102 +{
   8.103 +    unsigned long eip, pfn;
   8.104 +    unsigned int index;
   8.105 +    unsigned long gpde = 0;
   8.106 +    int result;
   8.107 +    struct exec_domain *ed = current;
   8.108 +    struct mm_struct *m = &ed->mm;
   8.109 +
   8.110 +#if VMX_DEBUG
   8.111 +    {
   8.112 +        __vmread(GUEST_EIP, &eip);
   8.113 +        VMX_DBG_LOG(DBG_LEVEL_VMMU, 
   8.114 +                "vmx_do_page_fault = 0x%lx, eip = %lx, erro_code = %lx\n", 
   8.115 +                va, eip, error_code);
   8.116 +    }
   8.117 +#endif
   8.118 +    /*
   8.119 +     * Set up guest page directory cache to make linear_pt_table[] work.
   8.120 +     */
   8.121 +    __guest_get_pl2e(m, va, &gpde);
   8.122 +    if (!(gpde & _PAGE_PRESENT))
   8.123 +        return 0;
   8.124 +
   8.125 +    index = (va >> L2_PAGETABLE_SHIFT);
   8.126 +    if (!l2_pgentry_val(m->guest_pl2e_cache[index])) {
   8.127 +        pfn = phys_to_machine_mapping[gpde >> PAGE_SHIFT];
   8.128 +
   8.129 +        VMX_DBG_LOG(DBG_LEVEL_VMMU, "vmx_do_page_fault: pagetable = %lx\n",
   8.130 +                pagetable_val(m->pagetable));
   8.131 +
   8.132 +        m->guest_pl2e_cache[index] = 
   8.133 +            mk_l2_pgentry((pfn << PAGE_SHIFT) | __PAGE_HYPERVISOR);
   8.134 +    }
   8.135 +
   8.136 +    if ((result = shadow_fault(va, error_code)))
   8.137 +        return result;
   8.138 +    
   8.139 +    return 0;       /* failed to resolve, i.e raise #PG */
   8.140 +}
   8.141 +
   8.142 +static void vmx_do_general_protection_fault(struct xen_regs *regs) 
   8.143 +{
   8.144 +    unsigned long eip, error_code;
   8.145 +
   8.146 +    __vmread(GUEST_EIP, &eip);
   8.147 +    __vmread(VM_EXIT_INTR_ERROR_CODE, &error_code);
   8.148 +
   8.149 +    VMX_DBG_LOG(DBG_LEVEL_1, 
   8.150 +            "vmx_general_protection_fault: eip = %lx, erro_code = %lx\n",
   8.151 +            eip, error_code);
   8.152 +
   8.153 +    VMX_DBG_LOG(DBG_LEVEL_1, 
   8.154 +            "eax=%x, ebx=%x, ecx=%x, edx=%x, esi=%x, edi=%x\n",
   8.155 +            regs->eax, regs->ebx, regs->ecx, regs->edx, regs->esi, regs->edi);
   8.156 +
   8.157 +    __vmx_bug(regs);
   8.158 +}
   8.159 +
   8.160 +static void vmx_vmexit_do_cpuid(unsigned long input, struct xen_regs *regs) 
   8.161 +{
   8.162 +    int eax, ebx, ecx, edx;
   8.163 +    unsigned long eip;
   8.164 +
   8.165 +    __vmread(GUEST_EIP, &eip);
   8.166 +
   8.167 +    VMX_DBG_LOG(DBG_LEVEL_1, 
   8.168 +            "do_cpuid: (eax) %x, (ebx) %x, (ecx) %x, (edx) %x, (esi) %x, (edi) %x\n", regs->eax, regs->ebx, regs->ecx, regs->edx, regs->esi, regs->edi);
   8.169 +
   8.170 +    cpuid(input, &eax, &ebx, &ecx, &edx);
   8.171 +
   8.172 +    if (input == 1) {
   8.173 +        clear_bit(X86_FEATURE_PSE, &edx);
   8.174 +        clear_bit(X86_FEATURE_PAE, &edx);
   8.175 +        clear_bit(X86_FEATURE_PSE36, &edx);
   8.176 +    }
   8.177 +
   8.178 +    regs->eax = (unsigned long) eax;
   8.179 +    regs->ebx = (unsigned long) ebx;
   8.180 +    regs->ecx = (unsigned long) ecx;
   8.181 +    regs->edx = (unsigned long) edx;
   8.182 +
   8.183 +    VMX_DBG_LOG(DBG_LEVEL_1, 
   8.184 +            "vmx_vmexit_do_cpuid: eip: %lx, input: %lx, out:eax=%x, ebx=%x, ecx=%x, edx=%x\n", 
   8.185 +            eip, input, eax, ebx, ecx, edx);
   8.186 +
   8.187 +}
   8.188 +
   8.189 +#define CASE_GET_REG_P(REG, reg)    \
   8.190 +    case REG_ ## REG: reg_p = &(regs->reg); break
   8.191 +
   8.192 +static void vmx_dr_access (unsigned long exit_qualification, struct xen_regs *regs)
   8.193 +{
   8.194 +    unsigned int reg;
   8.195 +    u32 *reg_p = 0;
   8.196 +    struct exec_domain *ed = current;
   8.197 +    u32 eip;
   8.198 +
   8.199 +    __vmread(GUEST_EIP, &eip);
   8.200 +
   8.201 +    reg = exit_qualification & DEBUG_REG_ACCESS_NUM;
   8.202 +
   8.203 +    VMX_DBG_LOG(DBG_LEVEL_1, 
   8.204 +                "vmx_dr_access : eip=%08x, reg=%d, exit_qualification = %lx\n",
   8.205 +                eip, reg, exit_qualification);
   8.206 +
   8.207 +    switch(exit_qualification & DEBUG_REG_ACCESS_REG) {
   8.208 +        CASE_GET_REG_P(EAX, eax);
   8.209 +        CASE_GET_REG_P(ECX, ecx);
   8.210 +        CASE_GET_REG_P(EDX, edx);
   8.211 +        CASE_GET_REG_P(EBX, ebx);
   8.212 +        CASE_GET_REG_P(EBP, ebp);
   8.213 +        CASE_GET_REG_P(ESI, esi);
   8.214 +        CASE_GET_REG_P(EDI, edi);
   8.215 +    case REG_ESP:
   8.216 +        break;  
   8.217 +    default:
   8.218 +        __vmx_bug(regs);
   8.219 +    }
   8.220 +        
   8.221 +    switch (exit_qualification & DEBUG_REG_ACCESS_TYPE) {
   8.222 +    case TYPE_MOV_TO_DR: 
   8.223 +        /* don't need to check the range */
   8.224 +        if (reg != REG_ESP)
   8.225 +            ed->thread.debugreg[reg] = *reg_p; 
   8.226 +        else {
   8.227 +            unsigned long value;
   8.228 +            __vmread(GUEST_ESP, &value);
   8.229 +            ed->thread.debugreg[reg] = value;
   8.230 +        }
   8.231 +        break;
   8.232 +    case TYPE_MOV_FROM_DR:
   8.233 +        if (reg != REG_ESP)
   8.234 +            *reg_p = ed->thread.debugreg[reg];
   8.235 +        else {
   8.236 +            __vmwrite(GUEST_ESP, ed->thread.debugreg[reg]);
   8.237 +        }
   8.238 +        break;
   8.239 +    }
   8.240 +}
   8.241 +
   8.242 +/*
   8.243 + * Invalidate the TLB for va. Invalidate the shadow page corresponding
   8.244 + * the address va.
   8.245 + */
   8.246 +static void vmx_vmexit_do_invlpg(unsigned long va) 
   8.247 +{
   8.248 +    unsigned long eip;
   8.249 +    struct exec_domain *d = current;
   8.250 +    unsigned int index;
   8.251 +
   8.252 +    __vmread(GUEST_EIP, &eip);
   8.253 +
   8.254 +    VMX_DBG_LOG(DBG_LEVEL_VMMU, "vmx_vmexit_do_invlpg:eip=%08lx, va=%08lx\n",
   8.255 +            eip, va);
   8.256 +
   8.257 +    /*
   8.258 +     * We do the safest things first, then try to update the shadow
   8.259 +     * copying from guest
   8.260 +     */
   8.261 +    vmx_shadow_invlpg(&d->mm, va);
   8.262 +    index = (va >> L2_PAGETABLE_SHIFT);
   8.263 +    d->mm.guest_pl2e_cache[index] = mk_l2_pgentry(0); /* invalidate pgd cache */
   8.264 +}
   8.265 +
   8.266 +static inline void guest_pl2e_cache_invalidate(struct mm_struct *m) 
   8.267 +{
   8.268 +    /*
   8.269 +     * Need to optimize this
   8.270 +     */
   8.271 +    memset(m->guest_pl2e_cache, 0, PAGE_SIZE);
   8.272 +}
   8.273 +
   8.274 +static inline unsigned long gva_to_gpa(unsigned long gva)
   8.275 +{
   8.276 +    unsigned long gpde, gpte, pfn, index;
   8.277 +    struct exec_domain *d = current;
   8.278 +    struct mm_struct *m = &d->mm;
   8.279 +
   8.280 +    __guest_get_pl2e(m, gva, &gpde);
   8.281 +    index = (gva >> L2_PAGETABLE_SHIFT);
   8.282 +
   8.283 +    pfn = phys_to_machine_mapping[gpde >> PAGE_SHIFT];
   8.284 +
   8.285 +    m->guest_pl2e_cache[index] = 
   8.286 +            mk_l2_pgentry((pfn << PAGE_SHIFT) | __PAGE_HYPERVISOR);
   8.287 +
   8.288 +    if ( unlikely(__get_user(gpte, (unsigned long *)
   8.289 +                             &linear_pg_table[gva >> PAGE_SHIFT])) )
   8.290 +    {
   8.291 +        printk("gva_to_gpa EXIT: read gpte faulted" );
   8.292 +        return 0;
   8.293 +    }
   8.294 +
   8.295 +    if ( !(gpte & _PAGE_PRESENT) )
   8.296 +    {
   8.297 +        printk("gva_to_gpa - EXIT: gpte not present (%lx)",gpte );
   8.298 +        return 0;
   8.299 +    }
   8.300 +
   8.301 +    return (gpte & PAGE_MASK) + (gva & ~PAGE_MASK); 
   8.302 +}
   8.303 +
   8.304 +static void vmx_io_instruction(struct xen_regs *regs, 
   8.305 +                   unsigned long exit_qualification, unsigned long inst_len) 
   8.306 +{
   8.307 +    struct exec_domain *d = current;
   8.308 +    vcpu_iodata_t *vio;
   8.309 +    ioreq_t *p;
   8.310 +    unsigned long addr;
   8.311 +    unsigned long eip;
   8.312 +
   8.313 +    extern long evtchn_send(int lport);
   8.314 +    extern long do_block(void);
   8.315 +
   8.316 +    __vmread(GUEST_EIP, &eip);
   8.317 +
   8.318 +    VMX_DBG_LOG(DBG_LEVEL_1, 
   8.319 +            "vmx_io_instruction: eip=%08lx, exit_qualification = %lx\n",
   8.320 +            eip, exit_qualification);
   8.321 +
   8.322 +    if (test_bit(6, &exit_qualification))
   8.323 +        addr = (exit_qualification >> 16) & (0xffff);
   8.324 +    else
   8.325 +        addr = regs->edx & 0xffff;
   8.326 +
   8.327 +    if (addr == 0x80) {
   8.328 +        __update_guest_eip(inst_len);
   8.329 +        return;
   8.330 +    }
   8.331 +
   8.332 +    vio = (vcpu_iodata_t *) d->thread.arch_vmx.vmx_platform.shared_page_va;
   8.333 +    if (vio == 0) {
   8.334 +        VMX_DBG_LOG(DBG_LEVEL_1, "bad shared page: %lx\n", (unsigned long) vio);
   8.335 +        domain_crash(); 
   8.336 +    }
   8.337 +    p = &vio->vp_ioreq;
   8.338 +    p->dir = test_bit(3, &exit_qualification);  
   8.339 +    set_bit(ARCH_VMX_IO_WAIT, &d->thread.arch_vmx.flags);
   8.340 +
   8.341 +    p->pdata_valid = 0;
   8.342 +    p->count = 1;
   8.343 +    p->size = (exit_qualification & 7) + 1;
   8.344 +
   8.345 +    if (test_bit(4, &exit_qualification)) {
   8.346 +        p->pdata_valid = 1;
   8.347 +        p->u.pdata = (void *) ((p->dir == IOREQ_WRITE) ?
   8.348 +            regs->esi
   8.349 +            : regs->edi);
   8.350 +        p->u.pdata = (void *) gva_to_gpa(p->u.data);
   8.351 +        if (test_bit(5, &exit_qualification))
   8.352 +            p->count = regs->ecx;
   8.353 +        if ((p->u.data & PAGE_MASK) != 
   8.354 +            ((p->u.data + p->count * p->size - 1) & PAGE_MASK)) {
   8.355 +            printk("stringio crosses page boundary!\n");
   8.356 +            if (p->u.data & (p->size - 1)) {
   8.357 +                printk("Not aligned I/O!\n");
   8.358 +                domain_crash();     
   8.359 +            }
   8.360 +            p->count = (PAGE_SIZE - (p->u.data & ~PAGE_MASK)) / p->size;
   8.361 +        } else {
   8.362 +            __update_guest_eip(inst_len);
   8.363 +        }
   8.364 +    } else if (p->dir == IOREQ_WRITE) {
   8.365 +        p->u.data = regs->eax;
   8.366 +        __update_guest_eip(inst_len);
   8.367 +    } else
   8.368 +        __update_guest_eip(inst_len);
   8.369 +
   8.370 +    p->addr = addr;
   8.371 +    p->port_mm = 0;
   8.372 +    p->state = STATE_IOREQ_READY;
   8.373 +    evtchn_send(IOPACKET_PORT);
   8.374 +    do_block();
   8.375 +}
   8.376 +
   8.377 +#define CASE_GET_REG(REG, reg)  \
   8.378 +    case REG_ ## REG: value = regs->reg; break
   8.379 +
   8.380 +/*
   8.381 + * Write to control registers
   8.382 + */
   8.383 +static void mov_to_cr(int gp, int cr, struct xen_regs *regs)
   8.384 +{
   8.385 +    unsigned long value;
   8.386 +    unsigned long old_cr;
   8.387 +    struct exec_domain *d = current;
   8.388 +
   8.389 +    switch (gp) {
   8.390 +        CASE_GET_REG(EAX, eax);
   8.391 +        CASE_GET_REG(ECX, ecx);
   8.392 +        CASE_GET_REG(EDX, edx);
   8.393 +        CASE_GET_REG(EBX, ebx);
   8.394 +        CASE_GET_REG(EBP, ebp);
   8.395 +        CASE_GET_REG(ESI, esi);
   8.396 +        CASE_GET_REG(EDI, edi);
   8.397 +    case REG_ESP:
   8.398 +        __vmread(GUEST_ESP, &value);
   8.399 +        break;
   8.400 +    default:
   8.401 +        printk("invalid gp: %d\n", gp);
   8.402 +        __vmx_bug(regs);
   8.403 +    }
   8.404 +    
   8.405 +    VMX_DBG_LOG(DBG_LEVEL_1, "mov_to_cr: CR%d, value = %lx, \n", cr, value);
   8.406 +    VMX_DBG_LOG(DBG_LEVEL_1, "current = %lx, \n", (unsigned long) current);
   8.407 +
   8.408 +    switch(cr) {
   8.409 +    case 0: 
   8.410 +    {
   8.411 +        unsigned long old_base_pfn = 0, pfn;
   8.412 +
   8.413 +        /* 
   8.414 +         * CR0:
   8.415 +         * We don't want to lose PE and PG.
   8.416 +         */
   8.417 +        __vmwrite(GUEST_CR0, (value | X86_CR0_PE | X86_CR0_PG));
   8.418 +        __vmwrite(CR0_READ_SHADOW, value);
   8.419 +
   8.420 +        if (value & (X86_CR0_PE | X86_CR0_PG) &&
   8.421 +            !test_bit(VMX_CPU_STATE_PG_ENABLED, &d->thread.arch_vmx.cpu_state)) {
   8.422 +            /*
   8.423 +             * Enable paging
   8.424 +             */
   8.425 +            set_bit(VMX_CPU_STATE_PG_ENABLED, &d->thread.arch_vmx.cpu_state);
   8.426 +            /*
   8.427 +             * The guest CR3 must be pointing to the guest physical.
   8.428 +             */
   8.429 +            if (!(pfn = phys_to_machine_mapping[
   8.430 +                      d->thread.arch_vmx.cpu_cr3 >> PAGE_SHIFT])) 
   8.431 +            {
   8.432 +                VMX_DBG_LOG(DBG_LEVEL_VMMU, "Invalid CR3 value = %lx\n", 
   8.433 +                        d->thread.arch_vmx.cpu_cr3);
   8.434 +                domain_crash(); /* need to take a clean path */
   8.435 +            }
   8.436 +            old_base_pfn = pagetable_val(d->mm.pagetable) >> PAGE_SHIFT;
   8.437 +            /*
   8.438 +             * Now mm.pagetable points to machine physical.
   8.439 +             */
   8.440 +            d->mm.pagetable = mk_pagetable(pfn << PAGE_SHIFT);
   8.441 +
   8.442 +            VMX_DBG_LOG(DBG_LEVEL_VMMU, "New mm.pagetable = %lx\n", 
   8.443 +                    (unsigned long) (pfn << PAGE_SHIFT));
   8.444 +
   8.445 +            shadow_lock(&d->mm);
   8.446 +            shadow_mode_enable(d->domain, SHM_full_32); 
   8.447 +            shadow_unlock(&d->mm);
   8.448 +
   8.449 +            __vmwrite(GUEST_CR3, pagetable_val(d->mm.shadow_table));
   8.450 +            /* 
   8.451 +             * mm->shadow_table should hold the next CR3 for shadow
   8.452 +             */
   8.453 +            VMX_DBG_LOG(DBG_LEVEL_VMMU, "Update CR3 value = %lx, pfn = %lx\n", 
   8.454 +                    d->thread.arch_vmx.cpu_cr3, pfn);
   8.455 +            put_page_and_type(&frame_table[old_base_pfn]);
   8.456 +
   8.457 +        }
   8.458 +        break;
   8.459 +    }
   8.460 +    case 3: 
   8.461 +    {
   8.462 +        unsigned long pfn;
   8.463 +
   8.464 +        /*
   8.465 +         * If paging is not enabled yet, simply copy the valut to CR3.
   8.466 +         */
   8.467 +        if (!test_bit(VMX_CPU_STATE_PG_ENABLED, &d->thread.arch_vmx.cpu_state)) {
   8.468 +            d->thread.arch_vmx.cpu_cr3 = value;
   8.469 +            return;
   8.470 +        }
   8.471 +        
   8.472 +        guest_pl2e_cache_invalidate(&d->mm);
   8.473 +        /*
   8.474 +         * We make a new one if the shadow does not exist.
   8.475 +         */
   8.476 +        if (value == d->thread.arch_vmx.cpu_cr3) {
   8.477 +            /* 
   8.478 +             * This is simple TLB flush, implying the guest has 
   8.479 +             * removed some translation or changed page attributes.
   8.480 +             * We simply invalidate the shadow.
   8.481 +             */
   8.482 +            pfn = phys_to_machine_mapping[value >> PAGE_SHIFT];
   8.483 +            if ((pfn << PAGE_SHIFT) != pagetable_val(d->mm.pagetable))
   8.484 +                __vmx_bug(regs);
   8.485 +            vmx_shadow_clear_state(&d->mm);
   8.486 +            shadow_invalidate(&d->mm);
   8.487 +        } else {
   8.488 +            /*
   8.489 +             * If different, make a shadow. Check if the PDBR is valid
   8.490 +             * first.
   8.491 +             */
   8.492 +            VMX_DBG_LOG(DBG_LEVEL_VMMU, "CR3 value = %lx\n", value);
   8.493 +            if ((value >> PAGE_SHIFT) > d->domain->max_pages)
   8.494 +            {
   8.495 +                VMX_DBG_LOG(DBG_LEVEL_VMMU, 
   8.496 +                        "Invalid CR3 value=%lx\n", value);
   8.497 +                domain_crash(); /* need to take a clean path */
   8.498 +            }
   8.499 +            pfn = phys_to_machine_mapping[value >> PAGE_SHIFT];
   8.500 +            vmx_shadow_clear_state(&d->mm);
   8.501 +            d->mm.pagetable = mk_pagetable(pfn << PAGE_SHIFT);
   8.502 +            shadow_mk_pagetable(&d->mm);
   8.503 +            /* 
   8.504 +             * mm->shadow_table should hold the next CR3 for shadow
   8.505 +             */
   8.506 +            d->thread.arch_vmx.cpu_cr3 = value;
   8.507 +            VMX_DBG_LOG(DBG_LEVEL_VMMU, "Update CR3 value = %lx\n", 
   8.508 +                    value);
   8.509 +            __vmwrite(GUEST_CR3, pagetable_val(d->mm.shadow_table));
   8.510 +        }
   8.511 +        break;
   8.512 +    }
   8.513 +    case 4:         
   8.514 +        /* CR4 */
   8.515 +        if (value & X86_CR4_PAE)
   8.516 +            __vmx_bug(regs);    /* not implemented */
   8.517 +        __vmread(CR4_READ_SHADOW, &old_cr);
   8.518 +        
   8.519 +        __vmwrite(GUEST_CR4, (value | X86_CR4_VMXE));
   8.520 +        __vmwrite(CR4_READ_SHADOW, value);
   8.521 +
   8.522 +        /*
   8.523 +         * Writing to CR4 to modify the PSE, PGE, or PAE flag invalidates
   8.524 +         * all TLB entries except global entries.
   8.525 +         */
   8.526 +        if ((old_cr ^ value) & (X86_CR4_PSE | X86_CR4_PGE | X86_CR4_PAE)) {
   8.527 +            vmx_shadow_clear_state(&d->mm);
   8.528 +            shadow_invalidate(&d->mm);
   8.529 +            guest_pl2e_cache_invalidate(&d->mm);
   8.530 +        }
   8.531 +        break;
   8.532 +    default:
   8.533 +        printk("invalid cr: %d\n", gp);
   8.534 +        __vmx_bug(regs);
   8.535 +    }
   8.536 +}   
   8.537 +
   8.538 +#define CASE_SET_REG(REG, reg)      \
   8.539 +    case REG_ ## REG:       \
   8.540 +    regs->reg = value;      \
   8.541 +    break
   8.542 +
   8.543 +/*
   8.544 + * Read from control registers. CR0 and CR4 are read from the shadow.
   8.545 + */
   8.546 +static void mov_from_cr(int cr, int gp, struct xen_regs *regs)
   8.547 +{
   8.548 +    unsigned long value;
   8.549 +    struct exec_domain *d = current;
   8.550 +
   8.551 +    if (cr != 3)
   8.552 +        __vmx_bug(regs);
   8.553 +
   8.554 +    value = (unsigned long) d->thread.arch_vmx.cpu_cr3;
   8.555 +    ASSERT(value);
   8.556 +
   8.557 +    switch (gp) {
   8.558 +        CASE_SET_REG(EAX, eax);
   8.559 +        CASE_SET_REG(ECX, ecx);
   8.560 +        CASE_SET_REG(EDX, edx);
   8.561 +        CASE_SET_REG(EBX, ebx);
   8.562 +        CASE_SET_REG(EBP, ebp);
   8.563 +        CASE_SET_REG(ESI, esi);
   8.564 +        CASE_SET_REG(EDI, edi);
   8.565 +    case REG_ESP:
   8.566 +        __vmwrite(GUEST_ESP, value);
   8.567 +        regs->esp = value;
   8.568 +        break;
   8.569 +    default:
   8.570 +        printk("invalid gp: %d\n", gp);
   8.571 +        __vmx_bug(regs);
   8.572 +    }
   8.573 +
   8.574 +    VMX_DBG_LOG(DBG_LEVEL_VMMU, "mov_from_cr: CR%d, value = %lx, \n", cr, value);
   8.575 +}
   8.576 +
   8.577 +static void vmx_cr_access (unsigned long exit_qualification, struct xen_regs *regs)
   8.578 +{
   8.579 +    unsigned int gp, cr;
   8.580 +    unsigned long value;
   8.581 +
   8.582 +    switch (exit_qualification & CONTROL_REG_ACCESS_TYPE) {
   8.583 +    case TYPE_MOV_TO_CR:
   8.584 +        gp = exit_qualification & CONTROL_REG_ACCESS_REG;
   8.585 +        cr = exit_qualification & CONTROL_REG_ACCESS_NUM;
   8.586 +        mov_to_cr(gp, cr, regs);
   8.587 +        break;
   8.588 +    case TYPE_MOV_FROM_CR:
   8.589 +        gp = exit_qualification & CONTROL_REG_ACCESS_REG;
   8.590 +        cr = exit_qualification & CONTROL_REG_ACCESS_NUM;
   8.591 +        mov_from_cr(cr, gp, regs);
   8.592 +        break;
   8.593 +    case TYPE_CLTS:
   8.594 +        __vmread(GUEST_CR0, &value);
   8.595 +        value &= ~X86_CR0_TS; /* clear TS */
   8.596 +        __vmwrite(GUEST_CR0, value);
   8.597 +
   8.598 +        __vmread(CR0_READ_SHADOW, &value);
   8.599 +        value &= ~X86_CR0_TS; /* clear TS */
   8.600 +        __vmwrite(CR0_READ_SHADOW, value);
   8.601 +        break;
   8.602 +    default:
   8.603 +        __vmx_bug(regs);
   8.604 +        break;
   8.605 +    }
   8.606 +}
   8.607 +
   8.608 +static inline void vmx_do_msr_read(struct xen_regs *regs)
   8.609 +{
   8.610 +    VMX_DBG_LOG(DBG_LEVEL_1, "vmx_do_msr_read: ecx=%x, eax=%x, edx=%x",
   8.611 +            regs->ecx, regs->eax, regs->edx);
   8.612 +
   8.613 +    rdmsr(regs->ecx, regs->eax, regs->edx);
   8.614 +
   8.615 +    VMX_DBG_LOG(DBG_LEVEL_1, "vmx_do_msr_read returns: ecx=%x, eax=%x, edx=%x",
   8.616 +            regs->ecx, regs->eax, regs->edx);
   8.617 +}
   8.618 +
   8.619 +/*
   8.620 + * Need to use this exit to rescheule
   8.621 + */
   8.622 +static inline void vmx_vmexit_do_hlt()
   8.623 +{
   8.624 +    extern long do_block(void);
   8.625 +#if VMX_DEBUG
   8.626 +    unsigned long eip;
   8.627 +    __vmread(GUEST_EIP, &eip);
   8.628 +#endif
   8.629 +    VMX_DBG_LOG(DBG_LEVEL_1, "vmx_vmexit_do_hlt:eip=%08lx\n", eip);
   8.630 +    __enter_scheduler();
   8.631 +}
   8.632 +
   8.633 +static inline void vmx_vmexit_do_mwait()
   8.634 +{
   8.635 +#if VMX_DEBUG
   8.636 +    unsigned long eip;
   8.637 +    __vmread(GUEST_EIP, &eip);
   8.638 +#endif
   8.639 +    VMX_DBG_LOG(DBG_LEVEL_1, "vmx_vmexit_do_mwait:eip=%08lx\n", eip);
   8.640 +    __enter_scheduler();
   8.641 +}
   8.642 +
   8.643 +#define BUF_SIZ     256
   8.644 +#define MAX_LINE    80
   8.645 +char print_buf[BUF_SIZ];
   8.646 +static int index;
   8.647 +
   8.648 +static void vmx_print_line(const char c, struct exec_domain *d) 
   8.649 +{
   8.650 +
   8.651 +    if (index == MAX_LINE || c == '\n') {
   8.652 +        if (index == MAX_LINE) {
   8.653 +            print_buf[index++] = c;
   8.654 +        }
   8.655 +        print_buf[index] = '\0';
   8.656 +        printk("(GUEST: %u) %s\n", d->domain->id, (char *) &print_buf);
   8.657 +        index = 0;
   8.658 +    }
   8.659 +    else
   8.660 +        print_buf[index++] = c;
   8.661 +}
   8.662 +
   8.663 +#ifdef XEN_DEBUGGER
   8.664 +void save_xen_regs(struct xen_regs *regs)
   8.665 +{
   8.666 +    __vmread(GUEST_SS_SELECTOR, &regs->xss);
   8.667 +    __vmread(GUEST_ESP, &regs->esp);
   8.668 +    __vmread(GUEST_EFLAGS, &regs->eflags);
   8.669 +    __vmread(GUEST_CS_SELECTOR, &regs->xcs);
   8.670 +    __vmread(GUEST_EIP, &regs->eip);
   8.671 +
   8.672 +    __vmread(GUEST_GS_SELECTOR, &regs->xgs);
   8.673 +    __vmread(GUEST_FS_SELECTOR, &regs->xfs);
   8.674 +    __vmread(GUEST_ES_SELECTOR, &regs->xes);
   8.675 +    __vmread(GUEST_DS_SELECTOR, &regs->xds);
   8.676 +}
   8.677 +
   8.678 +void restore_xen_regs(struct xen_regs *regs)
   8.679 +{
   8.680 +    __vmwrite(GUEST_SS_SELECTOR, regs->xss);
   8.681 +    __vmwrite(GUEST_ESP, regs->esp);
   8.682 +    __vmwrite(GUEST_EFLAGS, regs->eflags);
   8.683 +    __vmwrite(GUEST_CS_SELECTOR, regs->xcs);
   8.684 +    __vmwrite(GUEST_EIP, regs->eip);
   8.685 +
   8.686 +    __vmwrite(GUEST_GS_SELECTOR, regs->xgs);
   8.687 +    __vmwrite(GUEST_FS_SELECTOR, regs->xfs);
   8.688 +    __vmwrite(GUEST_ES_SELECTOR, regs->xes);
   8.689 +    __vmwrite(GUEST_DS_SELECTOR, regs->xds);
   8.690 +}
   8.691 +#endif
   8.692 +
   8.693 +asmlinkage void vmx_vmexit_handler(struct xen_regs regs)
   8.694 +{
   8.695 +    unsigned int exit_reason, idtv_info_field;
   8.696 +    unsigned long exit_qualification, eip, inst_len = 0;
   8.697 +    struct exec_domain *d = current;
   8.698 +    int error;
   8.699 +
   8.700 +    if ((error = __vmread(VM_EXIT_REASON, &exit_reason)))
   8.701 +        __vmx_bug(&regs);
   8.702 +    
   8.703 +    __vmread(IDT_VECTORING_INFO_FIELD, &idtv_info_field);
   8.704 +    if (idtv_info_field & INTR_INFO_VALID_MASK) {
   8.705 +        __vmwrite(VM_ENTRY_INTR_INFO_FIELD, idtv_info_field);
   8.706 +        if ((idtv_info_field & 0xff) == 14) {
   8.707 +            unsigned long error_code;
   8.708 +
   8.709 +            __vmread(VM_EXIT_INTR_ERROR_CODE, &error_code);
   8.710 +            printk("#PG error code: %lx\n", error_code);
   8.711 +        }
   8.712 +        VMX_DBG_LOG(DBG_LEVEL_1, "idtv_info_field=%x\n", 
   8.713 +                idtv_info_field);
   8.714 +    }
   8.715 +
   8.716 +    /* don't bother H/W interrutps */
   8.717 +    if (exit_reason != EXIT_REASON_EXTERNAL_INTERRUPT &&
   8.718 +        exit_reason != EXIT_REASON_VMCALL &&
   8.719 +        exit_reason != EXIT_REASON_IO_INSTRUCTION)
   8.720 +        VMX_DBG_LOG(DBG_LEVEL_0, "exit reason = %x\n", exit_reason);
   8.721 +
   8.722 +    if (exit_reason & VMX_EXIT_REASONS_FAILED_VMENTRY) {
   8.723 +        __vmread(EXIT_QUALIFICATION, &exit_qualification);
   8.724 +        __vmread(GUEST_EIP, &eip);
   8.725 +        domain_crash();         
   8.726 +        return;
   8.727 +    }
   8.728 +
   8.729 +    switch (exit_reason) {
   8.730 +    case EXIT_REASON_EXCEPTION_NMI:
   8.731 +    {
   8.732 +#define VECTOR_DB   1
   8.733 +#define VECTOR_BP   3
   8.734 +#define VECTOR_GP   13
   8.735 +#define VECTOR_PG   14
   8.736 +
   8.737 +        /*
   8.738 +         * We don't set the software-interrupt exiting (INT n). 
   8.739 +         * (1) We can get an exception (e.g. #PG) in the guest, or
   8.740 +         * (2) NMI
   8.741 +         */
   8.742 +        int error;
   8.743 +        unsigned int vector;
   8.744 +        unsigned long va;
   8.745 +        unsigned long error_code;
   8.746 +
   8.747 +        if ((error = __vmread(VM_EXIT_INTR_INFO, &vector))
   8.748 +            && !(vector & INTR_INFO_VALID_MASK))
   8.749 +            __vmx_bug(&regs);
   8.750 +        vector &= 0xff;
   8.751 +
   8.752 +        switch (vector) {
   8.753 +#ifdef XEN_DEBUGGER
   8.754 +        case VECTOR_DB:
   8.755 +        {
   8.756 +            save_xen_regs(&regs);
   8.757 +            pdb_handle_exception(1, &regs, 1);
   8.758 +            restore_xen_regs(&regs);
   8.759 +            break;
   8.760 +        }
   8.761 +        case VECTOR_BP:
   8.762 +        {
   8.763 +            save_xen_regs(&regs);
   8.764 +            pdb_handle_exception(3, &regs, 1);
   8.765 +            restore_xen_regs(&regs);
   8.766 +            break;
   8.767 +        }
   8.768 +#endif
   8.769 +        case VECTOR_GP:
   8.770 +        {
   8.771 +            vmx_do_general_protection_fault(&regs);
   8.772 +            break;  
   8.773 +        }
   8.774 +        case VECTOR_PG:
   8.775 +        {
   8.776 +            __vmread(EXIT_QUALIFICATION, &va);
   8.777 +            __vmread(VM_EXIT_INTR_ERROR_CODE, &error_code);
   8.778 +            VMX_DBG_LOG(DBG_LEVEL_VMMU, 
   8.779 +                    "eax=%x, ebx=%x, ecx=%x, edx=%x, esi=%x, edi=%x\n", regs.eax, regs.ebx, regs.ecx, regs.edx, regs.esi, regs.edi);
   8.780 +
   8.781 +            if (!(error = vmx_do_page_fault(va, error_code))) {
   8.782 +                /*
   8.783 +                 * Inject #PG using Interruption-Information Fields
   8.784 +                 */
   8.785 +                unsigned long intr_fields;
   8.786 +
   8.787 +                intr_fields = (INTR_INFO_VALID_MASK | 
   8.788 +                           INTR_TYPE_EXCEPTION |
   8.789 +                           INTR_INFO_DELIEVER_CODE_MASK |
   8.790 +                           VECTOR_PG);
   8.791 +                __vmwrite(VM_ENTRY_INTR_INFO_FIELD, intr_fields);
   8.792 +                __vmwrite(VM_ENTRY_EXCEPTION_ERROR_CODE, error_code);
   8.793 +                d->thread.arch_vmx.cpu_cr2 = va;
   8.794 +            }
   8.795 +            break;
   8.796 +        }
   8.797 +        default:
   8.798 +            __vmx_bug(&regs);
   8.799 +            break;
   8.800 +        }
   8.801 +        break;
   8.802 +    }
   8.803 +    case EXIT_REASON_EXTERNAL_INTERRUPT: 
   8.804 +    {
   8.805 +        extern int vector_irq[];
   8.806 +        extern asmlinkage void do_IRQ(struct xen_regs);
   8.807 +        extern void smp_apic_timer_interrupt(struct xen_regs *);
   8.808 +        extern void timer_interrupt(int, void *, struct xen_regs *);
   8.809 +        unsigned int    vector;
   8.810 +
   8.811 +        if ((error = __vmread(VM_EXIT_INTR_INFO, &vector))
   8.812 +            && !(vector & INTR_INFO_VALID_MASK))
   8.813 +            __vmx_bug(&regs);
   8.814 +
   8.815 +        vector &= 0xff;
   8.816 +        local_irq_disable();
   8.817 +
   8.818 +        if (vector == LOCAL_TIMER_VECTOR) {
   8.819 +            smp_apic_timer_interrupt(&regs);
   8.820 +        } else {
   8.821 +            regs.entry_vector = (vector == FIRST_DEVICE_VECTOR?
   8.822 +                     0 : vector_irq[vector]);
   8.823 +            do_IRQ(regs);
   8.824 +        }
   8.825 +        break;
   8.826 +    }
   8.827 +    case EXIT_REASON_PENDING_INTERRUPT:
   8.828 +        __vmwrite(CPU_BASED_VM_EXEC_CONTROL, 
   8.829 +              MONITOR_CPU_BASED_EXEC_CONTROLS);
   8.830 +        vmx_intr_assist(d);
   8.831 +        break;
   8.832 +    case EXIT_REASON_TASK_SWITCH:
   8.833 +        __vmx_bug(&regs);
   8.834 +        break;
   8.835 +    case EXIT_REASON_CPUID:
   8.836 +        __get_instruction_length(inst_len);
   8.837 +        vmx_vmexit_do_cpuid(regs.eax, &regs);
   8.838 +        __update_guest_eip(inst_len);
   8.839 +        break;
   8.840 +    case EXIT_REASON_HLT:
   8.841 +        __get_instruction_length(inst_len);
   8.842 +        __update_guest_eip(inst_len);
   8.843 +        vmx_vmexit_do_hlt();
   8.844 +        break;
   8.845 +    case EXIT_REASON_INVLPG:
   8.846 +    {
   8.847 +        unsigned long   va;
   8.848 +
   8.849 +        __vmread(EXIT_QUALIFICATION, &va);
   8.850 +        vmx_vmexit_do_invlpg(va);
   8.851 +        __get_instruction_length(inst_len);
   8.852 +        __update_guest_eip(inst_len);
   8.853 +        break;
   8.854 +    }
   8.855 +    case EXIT_REASON_VMCALL:
   8.856 +        __get_instruction_length(inst_len);
   8.857 +        __vmread(GUEST_EIP, &eip);
   8.858 +        __vmread(EXIT_QUALIFICATION, &exit_qualification);
   8.859 +
   8.860 +        vmx_print_line(regs.eax, d); /* provides the current domain */
   8.861 +        __update_guest_eip(inst_len);
   8.862 +        break;
   8.863 +    case EXIT_REASON_CR_ACCESS:
   8.864 +    {
   8.865 +        __vmread(GUEST_EIP, &eip);
   8.866 +        __get_instruction_length(inst_len);
   8.867 +        __vmread(EXIT_QUALIFICATION, &exit_qualification);
   8.868 +
   8.869 +        VMX_DBG_LOG(DBG_LEVEL_1, "eip = %lx, inst_len =%lx, exit_qualification = %lx\n", 
   8.870 +                eip, inst_len, exit_qualification);
   8.871 +        vmx_cr_access(exit_qualification, &regs);
   8.872 +        __update_guest_eip(inst_len);
   8.873 +        break;
   8.874 +    }
   8.875 +    case EXIT_REASON_DR_ACCESS:
   8.876 +        __vmread(EXIT_QUALIFICATION, &exit_qualification);  
   8.877 +        vmx_dr_access(exit_qualification, &regs);
   8.878 +        __get_instruction_length(inst_len);
   8.879 +        __update_guest_eip(inst_len);
   8.880 +        break;
   8.881 +    case EXIT_REASON_IO_INSTRUCTION:
   8.882 +        __vmread(EXIT_QUALIFICATION, &exit_qualification);
   8.883 +        __get_instruction_length(inst_len);
   8.884 +        vmx_io_instruction(&regs, exit_qualification, inst_len);
   8.885 +        break;
   8.886 +    case EXIT_REASON_MSR_READ:
   8.887 +        __get_instruction_length(inst_len);
   8.888 +        vmx_do_msr_read(&regs);
   8.889 +        __update_guest_eip(inst_len);
   8.890 +        break;
   8.891 +    case EXIT_REASON_MSR_WRITE:
   8.892 +        __vmread(GUEST_EIP, &eip);
   8.893 +        VMX_DBG_LOG(DBG_LEVEL_1, "MSR_WRITE: eip=%08lx, eax=%08x, edx=%08x",
   8.894 +                eip, regs.eax, regs.edx);
   8.895 +        /* just ignore this point */
   8.896 +        __get_instruction_length(inst_len);
   8.897 +        __update_guest_eip(inst_len);
   8.898 +        break;
   8.899 +    case EXIT_REASON_MWAIT_INSTRUCTION:
   8.900 +        __get_instruction_length(inst_len);
   8.901 +        __update_guest_eip(inst_len);
   8.902 +        vmx_vmexit_do_mwait();
   8.903 +        break;
   8.904 +    default:
   8.905 +        __vmx_bug(&regs);       /* should not happen */
   8.906 +    }
   8.907 +    return;
   8.908 +}
   8.909 +
   8.910 +asmlinkage void load_cr2(void)
   8.911 +{
   8.912 +    struct exec_domain *d = current;
   8.913 +
   8.914 +    local_irq_disable();        
   8.915 +    asm volatile("movl %0,%%cr2": :"r" (d->thread.arch_vmx.cpu_cr2));
   8.916 +}
     9.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
     9.2 +++ b/xen/arch/x86/vmx_io.c	Wed Dec 15 23:09:11 2004 +0000
     9.3 @@ -0,0 +1,234 @@
     9.4 +/*
     9.5 + * vmx_io.c: handling I/O, interrupts related VMX entry/exit 
     9.6 + * Copyright (c) 2004, Intel Corporation.
     9.7 + *
     9.8 + * This program is free software; you can redistribute it and/or modify it
     9.9 + * under the terms and conditions of the GNU General Public License,
    9.10 + * version 2, as published by the Free Software Foundation.
    9.11 + *
    9.12 + * This program is distributed in the hope it will be useful, but WITHOUT
    9.13 + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
    9.14 + * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
    9.15 + * more details.
    9.16 + *
    9.17 + * You should have received a copy of the GNU General Public License along with
    9.18 + * this program; if not, write to the Free Software Foundation, Inc., 59 Temple
    9.19 + * Place - Suite 330, Boston, MA 02111-1307 USA.
    9.20 + *
    9.21 + */
    9.22 +#include <xen/config.h>
    9.23 +#include <xen/init.h>
    9.24 +#include <xen/mm.h>
    9.25 +#include <xen/lib.h>
    9.26 +#include <xen/errno.h>
    9.27 +
    9.28 +#include <asm/cpufeature.h>
    9.29 +#include <asm/processor.h>
    9.30 +#include <asm/msr.h>
    9.31 +#include <asm/vmx.h>
    9.32 +#include <asm/vmx_vmcs.h>
    9.33 +#include <xen/event.h>
    9.34 +#include <public/io/ioreq.h>
    9.35 +
    9.36 +void vmx_io_assist(struct exec_domain *ed) 
    9.37 +{
    9.38 +    vcpu_iodata_t *vio;
    9.39 +    ioreq_t *p;
    9.40 +    struct domain *d = ed->domain;
    9.41 +    execution_context_t *ec = get_execution_context();
    9.42 +    unsigned long old_eax;
    9.43 +    extern long do_block();
    9.44 +    unsigned long eflags;
    9.45 +    int dir;
    9.46 +
    9.47 +    /* clear the pending event */
    9.48 +    ed->vcpu_info->evtchn_upcall_pending = 0;
    9.49 +    /* clear the pending bit for port 2 */
    9.50 +    clear_bit(IOPACKET_PORT>>5, &ed->vcpu_info->evtchn_pending_sel);
    9.51 +    clear_bit(IOPACKET_PORT, &d->shared_info->evtchn_pending[0]);
    9.52 +
    9.53 +    vio = (vcpu_iodata_t *) ed->thread.arch_vmx.vmx_platform.shared_page_va;
    9.54 +    if (vio == 0) {
    9.55 +        VMX_DBG_LOG(DBG_LEVEL_1, 
    9.56 +                    "bad shared page: %lx\n", (unsigned long) vio);
    9.57 +        domain_crash();
    9.58 +    }
    9.59 +    p = &vio->vp_ioreq;
    9.60 +    /* clear IO wait VMX flag */
    9.61 +    if (test_bit(ARCH_VMX_IO_WAIT, &ed->thread.arch_vmx.flags)) {
    9.62 +        if (p->state != STATE_IORESP_READY) {
    9.63 +            printk("got a false I/O reponse\n");
    9.64 +            do_block();
    9.65 +        } else {
    9.66 +            p->state = STATE_INVALID;
    9.67 +        }
    9.68 +        clear_bit(ARCH_VMX_IO_WAIT, &ed->thread.arch_vmx.flags);
    9.69 +    } else {
    9.70 +        return;
    9.71 +    }
    9.72 +
    9.73 +    __vmread(GUEST_EFLAGS, &eflags);
    9.74 +    dir = (eflags & X86_EFLAGS_DF);
    9.75 +
    9.76 +    if (p->dir == IOREQ_WRITE) {
    9.77 +        if (p->pdata_valid) {
    9.78 +            if (!dir)
    9.79 +                ec->esi += p->count * p->size;
    9.80 +            else
    9.81 +                ec->esi -= p->count * p->size;
    9.82 +            ec->ecx -= p->count;
    9.83 +        }
    9.84 +        return;
    9.85 +    } else {
    9.86 +        if (p->pdata_valid) {
    9.87 +            if (!dir)
    9.88 +                ec->edi += p->count * p->size;
    9.89 +            else
    9.90 +                ec->edi -= p->count * p->size;
    9.91 +            ec->ecx -= p->count;
    9.92 +            return;
    9.93 +        }
    9.94 +    }
    9.95 +
    9.96 +    old_eax = ec->eax;
    9.97 +
    9.98 +    switch(p->size) {
    9.99 +    case 1:
   9.100 +        ec->eax = (old_eax & 0xffffff00) | (p->u.data & 0xff);
   9.101 +        break;
   9.102 +    case 2:
   9.103 +        ec->eax = (old_eax & 0xffff0000) | (p->u.data & 0xffff);
   9.104 +        break;
   9.105 +    case 4:
   9.106 +        ec->eax = (p->u.data & 0xffffffff);
   9.107 +        break;
   9.108 +    default:
   9.109 +        BUG();
   9.110 +    }
   9.111 +}
   9.112 +
   9.113 +static inline int __fls(unsigned long word)
   9.114 +{
   9.115 +    int bit;
   9.116 +
   9.117 +    __asm__("bsrl %1,%0"
   9.118 +            :"=r" (bit)
   9.119 +            :"rm" (word));
   9.120 +    return word ? bit : -1;
   9.121 +}
   9.122 +
   9.123 +
   9.124 +/* Simple minded Local APIC priority implementation. Fix later */
   9.125 +static __inline__ int find_highest_irq(unsigned long *pintr)
   9.126 +{
   9.127 +    if (pintr[7])
   9.128 +        return __fls(pintr[7]) + (256-32*1);
   9.129 +    if (pintr[6])
   9.130 +        return __fls(pintr[6]) + (256-32*2);
   9.131 +    if (pintr[5])
   9.132 +        return __fls(pintr[5]) + (256-32*3);
   9.133 +    if (pintr[4])
   9.134 +        return __fls(pintr[4]) + (256-32*4);
   9.135 +    if (pintr[3])
   9.136 +        return __fls(pintr[3]) + (256-32*5);
   9.137 +    if (pintr[2])
   9.138 +        return __fls(pintr[2]) + (256-32*6);
   9.139 +    if (pintr[1])
   9.140 +        return __fls(pintr[1]) + (256-32*7);
   9.141 +    return __fls(pintr[0]);
   9.142 +}
   9.143 +
   9.144 +/*
   9.145 + * Return 0-255 for pending irq.
   9.146 + *        -1 when no pending.
   9.147 + */
   9.148 +static inline int find_highest_pending_irq(struct exec_domain *d)
   9.149 +{
   9.150 +    vcpu_iodata_t *vio;
   9.151 +
   9.152 +    vio = (vcpu_iodata_t *) d->thread.arch_vmx.vmx_platform.shared_page_va;
   9.153 +    if (vio == 0) {
   9.154 +        VMX_DBG_LOG(DBG_LEVEL_1, 
   9.155 +                    "bad shared page: %lx\n", (unsigned long) vio);
   9.156 +        domain_crash();
   9.157 +    }
   9.158 +        
   9.159 +    return find_highest_irq(&vio->vp_intr[0]);
   9.160 +}
   9.161 +
   9.162 +static inline void clear_highest_bit(struct exec_domain *d, int vector)
   9.163 +{
   9.164 +    vcpu_iodata_t *vio;
   9.165 +
   9.166 +    vio = (vcpu_iodata_t *) d->thread.arch_vmx.vmx_platform.shared_page_va;
   9.167 +    if (vio == 0) {
   9.168 +        VMX_DBG_LOG(DBG_LEVEL_1, 
   9.169 +                    "bad shared page: %lx\n", (unsigned long) vio);
   9.170 +        domain_crash();
   9.171 +    }
   9.172 +        
   9.173 +    clear_bit(vector, &vio->vp_intr[0]);
   9.174 +}
   9.175 +
   9.176 +static inline int irq_masked(unsigned long eflags)
   9.177 +{
   9.178 +    return ((eflags & X86_EFLAGS_IF) == 0);
   9.179 +}
   9.180 +
   9.181 +void vmx_intr_assist(struct exec_domain *d) 
   9.182 +{
   9.183 +    int highest_vector = find_highest_pending_irq(d);
   9.184 +    unsigned long intr_fields, eflags;
   9.185 +
   9.186 +    if (highest_vector == -1)
   9.187 +        return;
   9.188 +
   9.189 +    __vmread(VM_ENTRY_INTR_INFO_FIELD, &intr_fields);
   9.190 +    if (intr_fields & INTR_INFO_VALID_MASK) {
   9.191 +        VMX_DBG_LOG(DBG_LEVEL_1, "vmx_intr_assist: intr_fields: %lx\n", 
   9.192 +                    intr_fields);
   9.193 +        return;
   9.194 +    }
   9.195 +
   9.196 +    __vmread(GUEST_EFLAGS, &eflags);
   9.197 +    if (irq_masked(eflags)) {
   9.198 +        VMX_DBG_LOG(DBG_LEVEL_1, "guesting pending: %x, eflags: %lx\n", 
   9.199 +                    highest_vector, eflags);
   9.200 +        return;
   9.201 +    }
   9.202 +        
   9.203 +    clear_highest_bit(d, highest_vector); 
   9.204 +    intr_fields = (INTR_INFO_VALID_MASK | INTR_TYPE_EXT_INTR | highest_vector);
   9.205 +    __vmwrite(VM_ENTRY_INTR_INFO_FIELD, intr_fields);
   9.206 +
   9.207 +    __vmwrite(GUEST_INTERRUPTIBILITY_INFO, 0);
   9.208 +
   9.209 +    return;
   9.210 +}
   9.211 +
   9.212 +void vmx_do_resume(struct exec_domain *d) 
   9.213 +{
   9.214 +    extern long do_block();
   9.215 +
   9.216 +    __vmwrite(HOST_CR3, pagetable_val(d->mm.monitor_table));
   9.217 +    __vmwrite(GUEST_CR3, pagetable_val(d->mm.shadow_table));
   9.218 +    __vmwrite(HOST_ESP, (unsigned long) get_stack_top());
   9.219 +
   9.220 +    if (event_pending(d)) {
   9.221 +        if (test_bit(IOPACKET_PORT, &d->domain->shared_info->evtchn_pending[0])) 
   9.222 +            vmx_io_assist(d);
   9.223 +
   9.224 +        else if (test_bit(ARCH_VMX_IO_WAIT, &d->thread.arch_vmx.flags)) {
   9.225 +            printk("got an event while blocked on I/O\n");
   9.226 +            do_block();
   9.227 +        }
   9.228 +                
   9.229 +        /* Assumption: device model will not inject an interrupt
   9.230 +         * while an ioreq_t is pending i.e. the response and 
   9.231 +         * interrupt can come together. But an interrupt without 
   9.232 +         * a response to ioreq_t is not ok.
   9.233 +         */
   9.234 +    }
   9.235 +    if (!test_bit(ARCH_VMX_IO_WAIT, &d->thread.arch_vmx.flags))
   9.236 +        vmx_intr_assist(d);
   9.237 +}
    10.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
    10.2 +++ b/xen/arch/x86/vmx_vmcs.c	Wed Dec 15 23:09:11 2004 +0000
    10.3 @@ -0,0 +1,503 @@
    10.4 +/*
    10.5 + * vmx_vmcs.c: VMCS management
    10.6 + * Copyright (c) 2004, Intel Corporation.
    10.7 + *
    10.8 + * This program is free software; you can redistribute it and/or modify it
    10.9 + * under the terms and conditions of the GNU General Public License,
   10.10 + * version 2, as published by the Free Software Foundation.
   10.11 + *
   10.12 + * This program is distributed in the hope it will be useful, but WITHOUT
   10.13 + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
   10.14 + * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
   10.15 + * more details.
   10.16 + *
   10.17 + * You should have received a copy of the GNU General Public License along with
   10.18 + * this program; if not, write to the Free Software Foundation, Inc., 59 Temple
   10.19 + * Place - Suite 330, Boston, MA 02111-1307 USA.
   10.20 + *
   10.21 + */
   10.22 +
   10.23 +#include <xen/config.h>
   10.24 +#include <xen/init.h>
   10.25 +#include <xen/mm.h>
   10.26 +#include <xen/lib.h>
   10.27 +#include <xen/errno.h>
   10.28 +
   10.29 +#include <asm/cpufeature.h>
   10.30 +#include <asm/processor.h>
   10.31 +#include <asm/msr.h>
   10.32 +#include <asm/vmx.h>
   10.33 +#include <xen/event.h>
   10.34 +#include <xen/kernel.h>
   10.35 +#include <public/io/ioreq.h>
   10.36 +#include <asm/domain_page.h>
   10.37 +
   10.38 +struct vmcs_struct *alloc_vmcs(void) 
   10.39 +{
   10.40 +    struct vmcs_struct *vmcs;
   10.41 +    unsigned int cpu_sig = cpuid_eax(0x00000001);
   10.42 +
   10.43 +    vmcs = (struct vmcs_struct *) alloc_xenheap_pages(get_order(vmcs_size)); 
   10.44 +    memset((char *) vmcs, 0, vmcs_size); /* don't remove this */
   10.45 +
   10.46 +    vmcs->vmcs_revision_id = (cpu_sig > 0xf41)? 3 : 1;
   10.47 +    return vmcs;
   10.48 +} 
   10.49 +
   10.50 +void free_vmcs(struct vmcs_struct *vmcs)
   10.51 +{
   10.52 +    int order;
   10.53 +
   10.54 +    order = (vmcs_size >> PAGE_SHIFT) - 1;
   10.55 +    free_xenheap_pages((unsigned long) vmcs, order);
   10.56 +}
   10.57 +
   10.58 +static inline int construct_vmcs_controls(void)
   10.59 +{
   10.60 +    int error = 0;
   10.61 +        
   10.62 +    error |= __vmwrite(PIN_BASED_VM_EXEC_CONTROL, 
   10.63 +                       MONITOR_PIN_BASED_EXEC_CONTROLS);
   10.64 +
   10.65 +    error |= __vmwrite(CPU_BASED_VM_EXEC_CONTROL, 
   10.66 +                       MONITOR_CPU_BASED_EXEC_CONTROLS);
   10.67 +
   10.68 +    error |= __vmwrite(VM_EXIT_CONTROLS, MONITOR_VM_EXIT_CONTROLS);
   10.69 +    error |= __vmwrite(VM_ENTRY_CONTROLS, MONITOR_VM_ENTRY_CONTROLS);
   10.70 +
   10.71 +    return error;
   10.72 +}
   10.73 +
   10.74 +#define GUEST_SEGMENT_LIMIT     0xffffffff      
   10.75 +#define HOST_SEGMENT_LIMIT      0xffffffff      
   10.76 +
   10.77 +struct host_execution_env {
   10.78 +    /* selectors */
   10.79 +    unsigned short ldtr_selector;
   10.80 +    unsigned short tr_selector;
   10.81 +    unsigned short ds_selector;
   10.82 +    unsigned short cs_selector;
   10.83 +    /* limits */
   10.84 +    unsigned short gdtr_limit;
   10.85 +    unsigned short ldtr_limit;
   10.86 +    unsigned short idtr_limit;
   10.87 +    unsigned short tr_limit;
   10.88 +    /* base */
   10.89 +    unsigned long gdtr_base;
   10.90 +    unsigned long ldtr_base;
   10.91 +    unsigned long idtr_base;
   10.92 +    unsigned long tr_base;
   10.93 +    unsigned long ds_base;
   10.94 +    unsigned long cs_base;
   10.95 +    /* control registers */
   10.96 +    unsigned long cr3;
   10.97 +    unsigned long cr0;
   10.98 +    unsigned long cr4;
   10.99 +    unsigned long dr7;
  10.100 +};
  10.101 +
  10.102 +#define round_pgdown(_p) ((_p)&PAGE_MASK) /* coped from domain.c */
  10.103 +
  10.104 +int vmx_setup_platform(struct exec_domain *d, execution_context_t *context)
  10.105 +{
  10.106 +    int i;
  10.107 +    unsigned int n;
  10.108 +    unsigned long *p, mpfn, offset, addr;
  10.109 +    struct e820entry *e820p;
  10.110 +    unsigned long gpfn = 0;
  10.111 +
  10.112 +    context->ebx = 0;   /* Linux expects ebx to be 0 for boot proc */
  10.113 +
  10.114 +    n = context->ecx;
  10.115 +    if (n > 32) {
  10.116 +        VMX_DBG_LOG(DBG_LEVEL_1, "Too many e820 entries: %d\n", n);
  10.117 +        return -1;
  10.118 +    }
  10.119 +
  10.120 +    addr = context->edi;
  10.121 +    offset = (addr & ~PAGE_MASK);
  10.122 +    addr = round_pgdown(addr);
  10.123 +    mpfn = phys_to_machine_mapping[addr >> PAGE_SHIFT];
  10.124 +    p = map_domain_mem(mpfn << PAGE_SHIFT);
  10.125 +
  10.126 +    e820p = (struct e820entry *) ((unsigned long) p + offset); 
  10.127 +
  10.128 +    for (i = 0; i < n; i++) {
  10.129 +        if (e820p[i].type == E820_SHARED_PAGE) {
  10.130 +            gpfn = (e820p[i].addr >> PAGE_SHIFT);
  10.131 +            break;
  10.132 +        }
  10.133 +    }
  10.134 +
  10.135 +    if (gpfn == 0) {
  10.136 +        VMX_DBG_LOG(DBG_LEVEL_1, "No shared Page ?\n");
  10.137 +        return -1;
  10.138 +    }   
  10.139 +    unmap_domain_mem(p);        
  10.140 +
  10.141 +    mpfn = phys_to_machine_mapping[gpfn];
  10.142 +    p = map_domain_mem(mpfn << PAGE_SHIFT);
  10.143 +    d->thread.arch_vmx.vmx_platform.shared_page_va = (unsigned long) p;
  10.144 +
  10.145 +    return 0;
  10.146 +}
  10.147 +
  10.148 +
  10.149 +/*
  10.150 + * Add <guest pfn, machine pfn> mapping to per-domain mapping. Full
  10.151 + * virtualization does not need per-domain mapping.
  10.152 + */
  10.153 +static int add_mapping_perdomain(struct exec_domain *d, unsigned long gpfn, 
  10.154 +                                 unsigned long mpfn)
  10.155 +{
  10.156 +    struct pfn_info *page;
  10.157 +    unsigned long pfn = 0;
  10.158 +
  10.159 +    /*
  10.160 +     * We support up to 4GB memory for a guest at this point
  10.161 +     */
  10.162 +    if (gpfn > ENTRIES_PER_L2_PAGETABLE * ENTRIES_PER_L1_PAGETABLE)
  10.163 +        return -1;
  10.164 +
  10.165 +    if (!(l1_pgentry_val(d->domain->mm_perdomain_pt[
  10.166 +            gpfn >> (L2_PAGETABLE_SHIFT - L1_PAGETABLE_SHIFT)]) & _PAGE_PRESENT))
  10.167 +    {
  10.168 +        page = (struct pfn_info *) alloc_domheap_page(NULL);
  10.169 +        if (!page) {
  10.170 +            return -1;
  10.171 +        }
  10.172 +
  10.173 +        pfn = (unsigned long) (page - frame_table);
  10.174 +        d->domain->mm_perdomain_pt[gpfn >> (L2_PAGETABLE_SHIFT - L1_PAGETABLE_SHIFT)] = 
  10.175 +            mk_l1_pgentry((pfn << PAGE_SHIFT) | __PAGE_HYPERVISOR);
  10.176 +    }
  10.177 +    phys_to_machine_mapping[gpfn] = mpfn;
  10.178 +
  10.179 +    return 0;
  10.180 +}
  10.181 +
  10.182 +void vmx_do_launch(struct exec_domain *ed) 
  10.183 +{
  10.184 +/* Update CR3, GDT, LDT, TR */
  10.185 +    unsigned int tr, cpu, error = 0;
  10.186 +    struct host_execution_env host_env;
  10.187 +    struct Xgt_desc_struct desc;
  10.188 +    struct list_head *list_ent;
  10.189 +    l2_pgentry_t *mpl2e, *guest_pl2e_cache;
  10.190 +    unsigned long i, pfn = 0;
  10.191 +    struct pfn_info *page;
  10.192 +    execution_context_t *ec = get_execution_context();
  10.193 +    struct domain *d = ed->domain;
  10.194 +
  10.195 +    cpu =  smp_processor_id();
  10.196 +    ed->mm.min_pfn = ed->mm.max_pfn = 0;
  10.197 +
  10.198 +    spin_lock(&d->page_alloc_lock);
  10.199 +    list_ent = d->page_list.next;
  10.200 +
  10.201 +    mpl2e = (l2_pgentry_t *) map_domain_mem(pagetable_val(ed->mm.monitor_table));
  10.202 +    ASSERT(mpl2e[PERDOMAIN_VIRT_START >> L2_PAGETABLE_SHIFT]);
  10.203 +
  10.204 +    for (i = 0; list_ent != &d->page_list; i++ ) {
  10.205 +        pfn = list_entry(list_ent, struct pfn_info, list) - frame_table;
  10.206 +        ed->mm.min_pfn = min(ed->mm.min_pfn, pfn);
  10.207 +        ed->mm.max_pfn = max(ed->mm.max_pfn, pfn);
  10.208 +        list_ent = frame_table[pfn].list.next;
  10.209 +        add_mapping_perdomain(ed, i, pfn);
  10.210 +    }
  10.211 +
  10.212 +    spin_unlock(&d->page_alloc_lock);
  10.213 +
  10.214 +    page = (struct pfn_info *) alloc_domheap_page(NULL);
  10.215 +    pfn = (unsigned long) (page - frame_table);
  10.216 +
  10.217 +    /*
  10.218 +     * make linear_pt_table work for guest ptes
  10.219 +     */
  10.220 +    mpl2e[LINEAR_PT_VIRT_START >> L2_PAGETABLE_SHIFT] =
  10.221 +        mk_l2_pgentry((pfn << PAGE_SHIFT)| __PAGE_HYPERVISOR);
  10.222 +
  10.223 +    guest_pl2e_cache = map_domain_mem(pfn << PAGE_SHIFT);
  10.224 +    memset(guest_pl2e_cache, 0, PAGE_SIZE); /* clean it up */
  10.225 +    ed->mm.guest_pl2e_cache = guest_pl2e_cache; 
  10.226 +        
  10.227 +    unmap_domain_mem(mpl2e);
  10.228 +
  10.229 +    vmx_setup_platform(ed, ec);
  10.230 +
  10.231 +    __asm__ __volatile__ ("sgdt  (%%eax) \n" :: "a"(&desc) : "memory");
  10.232 +    host_env.gdtr_limit = desc.size;
  10.233 +    host_env.gdtr_base = desc.address;
  10.234 +
  10.235 +    error |= __vmwrite(HOST_GDTR_BASE, host_env.gdtr_base);
  10.236 +
  10.237 +    error |= __vmwrite(GUEST_LDTR_SELECTOR, 0);
  10.238 +    error |= __vmwrite(GUEST_LDTR_BASE, 0);
  10.239 +    error |= __vmwrite(GUEST_LDTR_LIMIT, 0);
  10.240 +        
  10.241 +    __asm__ __volatile__ ("str  (%%eax) \n" :: "a"(&tr) : "memory");
  10.242 +    host_env.tr_selector = tr;
  10.243 +    host_env.tr_limit = sizeof(struct tss_struct);
  10.244 +    host_env.tr_base = (unsigned long) &init_tss[cpu];
  10.245 +
  10.246 +    error |= __vmwrite(HOST_TR_SELECTOR, host_env.tr_selector);
  10.247 +    error |= __vmwrite(HOST_TR_BASE, host_env.tr_base);
  10.248 +    error |= __vmwrite(GUEST_TR_BASE, 0);
  10.249 +    error |= __vmwrite(GUEST_TR_LIMIT, 0xff);
  10.250 +
  10.251 +    ed->mm.shadow_table = ed->mm.pagetable;
  10.252 +    __vmwrite(GUEST_CR3, pagetable_val(ed->mm.pagetable));
  10.253 +    __vmwrite(HOST_CR3, pagetable_val(ed->mm.monitor_table));
  10.254 +    __vmwrite(HOST_ESP, (unsigned long) get_stack_top());
  10.255 +
  10.256 +    ed->thread.schedule_tail = arch_vmx_do_resume;
  10.257 +}
  10.258 +
  10.259 +/*
  10.260 + * Initially set the same environement as host.
  10.261 + */
  10.262 +static inline int 
  10.263 +construct_init_vmcs_guest(execution_context_t *context, 
  10.264 +                          full_execution_context_t *full_context,
  10.265 +                          struct host_execution_env *host_env)
  10.266 +{
  10.267 +    int error = 0;
  10.268 +    union vmcs_arbytes arbytes;
  10.269 +    unsigned long dr7;
  10.270 +    unsigned long eflags, shadow_cr;
  10.271 +
  10.272 +    /* MSR */
  10.273 +    error |= __vmwrite(VM_EXIT_MSR_LOAD_ADDR, 0);
  10.274 +    error |= __vmwrite(VM_EXIT_MSR_STORE_ADDR, 0);
  10.275 +
  10.276 +    error |= __vmwrite(VM_EXIT_MSR_STORE_COUNT, 0);
  10.277 +    error |= __vmwrite(VM_EXIT_MSR_LOAD_COUNT, 0);
  10.278 +    error |= __vmwrite(VM_ENTRY_MSR_LOAD_COUNT, 0);
  10.279 +    /* interrupt */
  10.280 +    error |= __vmwrite(VM_ENTRY_INTR_INFO_FIELD, 0);
  10.281 +    /* mask */
  10.282 +    error |= __vmwrite(CR0_GUEST_HOST_MASK, 0xffffffff);
  10.283 +    error |= __vmwrite(CR4_GUEST_HOST_MASK, 0xffffffff);
  10.284 +
  10.285 +    error |= __vmwrite(PAGE_FAULT_ERROR_CODE_MASK, 0);
  10.286 +    error |= __vmwrite(PAGE_FAULT_ERROR_CODE_MATCH, 0);
  10.287 +
  10.288 +    /* TSC */
  10.289 +    error |= __vmwrite(TSC_OFFSET, 0);
  10.290 +    error |= __vmwrite(CR3_TARGET_COUNT, 0);
  10.291 +
  10.292 +    /* Guest Selectors */
  10.293 +    error |= __vmwrite(GUEST_CS_SELECTOR, context->cs);
  10.294 +    error |= __vmwrite(GUEST_ES_SELECTOR, context->es);
  10.295 +    error |= __vmwrite(GUEST_SS_SELECTOR, context->ss);
  10.296 +    error |= __vmwrite(GUEST_DS_SELECTOR, context->ds);
  10.297 +    error |= __vmwrite(GUEST_FS_SELECTOR, context->fs);
  10.298 +    error |= __vmwrite(GUEST_GS_SELECTOR, context->gs);
  10.299 +
  10.300 +    /* Guest segment Limits */
  10.301 +    error |= __vmwrite(GUEST_CS_LIMIT, GUEST_SEGMENT_LIMIT);
  10.302 +    error |= __vmwrite(GUEST_ES_LIMIT, GUEST_SEGMENT_LIMIT);
  10.303 +    error |= __vmwrite(GUEST_SS_LIMIT, GUEST_SEGMENT_LIMIT);
  10.304 +    error |= __vmwrite(GUEST_DS_LIMIT, GUEST_SEGMENT_LIMIT);
  10.305 +    error |= __vmwrite(GUEST_FS_LIMIT, GUEST_SEGMENT_LIMIT);
  10.306 +    error |= __vmwrite(GUEST_GS_LIMIT, GUEST_SEGMENT_LIMIT);
  10.307 +
  10.308 +    error |= __vmwrite(GUEST_IDTR_LIMIT, host_env->idtr_limit);
  10.309 +
  10.310 +    /* AR bytes */
  10.311 +    arbytes.bytes = 0;
  10.312 +    arbytes.fields.seg_type = 0x3;          /* type = 3 */
  10.313 +    arbytes.fields.s = 1;                   /* code or data, i.e. not system */
  10.314 +    arbytes.fields.dpl = 0;                 /* DPL = 3 */
  10.315 +    arbytes.fields.p = 1;                   /* segment present */
  10.316 +    arbytes.fields.default_ops_size = 1;    /* 32-bit */
  10.317 +    arbytes.fields.g = 1;   
  10.318 +    arbytes.fields.null_bit = 0;            /* not null */
  10.319 +
  10.320 +    error |= __vmwrite(GUEST_ES_AR_BYTES, arbytes.bytes);
  10.321 +    error |= __vmwrite(GUEST_SS_AR_BYTES, arbytes.bytes);
  10.322 +    error |= __vmwrite(GUEST_DS_AR_BYTES, arbytes.bytes);
  10.323 +    error |= __vmwrite(GUEST_FS_AR_BYTES, arbytes.bytes);
  10.324 +    error |= __vmwrite(GUEST_GS_AR_BYTES, arbytes.bytes);
  10.325 +
  10.326 +    arbytes.fields.seg_type = 0xb;          /* type = 0xb */
  10.327 +    error |= __vmwrite(GUEST_CS_AR_BYTES, arbytes.bytes);
  10.328 +
  10.329 +    error |= __vmwrite(GUEST_GDTR_BASE, context->edx);
  10.330 +    context->edx = 0;
  10.331 +    error |= __vmwrite(GUEST_GDTR_LIMIT, context->eax);
  10.332 +    context->eax = 0;
  10.333 +
  10.334 +    arbytes.fields.s = 0;                   /* not code or data segement */
  10.335 +    arbytes.fields.seg_type = 0x2;          /* LTD */
  10.336 +    arbytes.fields.default_ops_size = 0;    /* 16-bit */
  10.337 +    arbytes.fields.g = 0;   
  10.338 +    error |= __vmwrite(GUEST_LDTR_AR_BYTES, arbytes.bytes);
  10.339 +
  10.340 +    arbytes.fields.seg_type = 0xb;          /* 32-bit TSS (busy) */
  10.341 +    error |= __vmwrite(GUEST_TR_AR_BYTES, arbytes.bytes);
  10.342 +
  10.343 +    error |= __vmwrite(GUEST_CR0, host_env->cr0); /* same CR0 */
  10.344 +
  10.345 +    /* Initally PG, PE are not set*/
  10.346 +    shadow_cr = host_env->cr0;
  10.347 +    shadow_cr &= ~(X86_CR0_PE | X86_CR0_PG);
  10.348 +    error |= __vmwrite(CR0_READ_SHADOW, shadow_cr);
  10.349 +    /* CR3 is set in vmx_final_setup_guestos */
  10.350 +    error |= __vmwrite(GUEST_CR4, host_env->cr4);
  10.351 +    shadow_cr = host_env->cr4;
  10.352 +    shadow_cr &= ~(X86_CR4_PGE | X86_CR4_VMXE);
  10.353 +    error |= __vmwrite(CR4_READ_SHADOW, shadow_cr);
  10.354 +
  10.355 +    error |= __vmwrite(GUEST_ES_BASE, host_env->ds_base);
  10.356 +    error |= __vmwrite(GUEST_CS_BASE, host_env->cs_base);
  10.357 +    error |= __vmwrite(GUEST_SS_BASE, host_env->ds_base);
  10.358 +    error |= __vmwrite(GUEST_DS_BASE, host_env->ds_base);
  10.359 +    error |= __vmwrite(GUEST_FS_BASE, host_env->ds_base);
  10.360 +    error |= __vmwrite(GUEST_GS_BASE, host_env->ds_base);
  10.361 +    error |= __vmwrite(GUEST_IDTR_BASE, host_env->idtr_base);
  10.362 +
  10.363 +    error |= __vmwrite(GUEST_ESP, context->esp);
  10.364 +    error |= __vmwrite(GUEST_EIP, context->eip);
  10.365 +
  10.366 +    eflags = context->eflags & ~VMCS_EFLAGS_RESERVED_0; /* clear 0s */
  10.367 +    eflags |= VMCS_EFLAGS_RESERVED_1; /* set 1s */
  10.368 +
  10.369 +    error |= __vmwrite(GUEST_EFLAGS, eflags);
  10.370 +
  10.371 +    error |= __vmwrite(GUEST_INTERRUPTIBILITY_INFO, 0);
  10.372 +    __asm__ __volatile__ ("mov %%dr7, %0\n" : "=r" (dr7));
  10.373 +    error |= __vmwrite(GUEST_DR7, dr7);
  10.374 +    error |= __vmwrite(GUEST_VMCS0, 0xffffffff);
  10.375 +    error |= __vmwrite(GUEST_VMCS1, 0xffffffff);
  10.376 +
  10.377 +    return error;
  10.378 +}
  10.379 +
  10.380 +static inline int construct_vmcs_host(struct host_execution_env *host_env)
  10.381 +{
  10.382 +    int error = 0;
  10.383 +    unsigned long crn;
  10.384 +    struct Xgt_desc_struct desc;
  10.385 +
  10.386 +    /* Host Selectors */
  10.387 +    host_env->ds_selector = __HYPERVISOR_DS;
  10.388 +    error |= __vmwrite(HOST_ES_SELECTOR, host_env->ds_selector);
  10.389 +    error |= __vmwrite(HOST_SS_SELECTOR, host_env->ds_selector);
  10.390 +    error |= __vmwrite(HOST_DS_SELECTOR, host_env->ds_selector);
  10.391 +    error |= __vmwrite(HOST_FS_SELECTOR, host_env->ds_selector);
  10.392 +    error |= __vmwrite(HOST_GS_SELECTOR, host_env->ds_selector);
  10.393 +
  10.394 +    host_env->cs_selector = __HYPERVISOR_CS;
  10.395 +    error |= __vmwrite(HOST_CS_SELECTOR, host_env->cs_selector);
  10.396 +
  10.397 +    host_env->ds_base = 0;
  10.398 +    host_env->cs_base = 0;
  10.399 +    error |= __vmwrite(HOST_FS_BASE, host_env->ds_base);
  10.400 +    error |= __vmwrite(HOST_GS_BASE, host_env->ds_base);
  10.401 +
  10.402 +/* Debug */
  10.403 +    __asm__ __volatile__ ("sidt  (%%eax) \n" :: "a"(&desc) : "memory");
  10.404 +    host_env->idtr_limit = desc.size;
  10.405 +    host_env->idtr_base = desc.address;
  10.406 +    error |= __vmwrite(HOST_IDTR_BASE, host_env->idtr_base);
  10.407 +
  10.408 +    __asm__ __volatile__ ("movl %%cr0,%0" : "=r" (crn) : );
  10.409 +    host_env->cr0 = crn;
  10.410 +    error |= __vmwrite(HOST_CR0, crn); /* same CR0 */
  10.411 +
  10.412 +    /* CR3 is set in vmx_final_setup_hostos */
  10.413 +    __asm__ __volatile__ ("movl %%cr4,%0" : "=r" (crn) : ); 
  10.414 +    host_env->cr4 = crn;
  10.415 +    error |= __vmwrite(HOST_CR4, crn);
  10.416 +    error |= __vmwrite(HOST_EIP, (unsigned long) vmx_asm_vmexit_handler);
  10.417 +
  10.418 +    return error;
  10.419 +}
  10.420 +
  10.421 +/*
  10.422 + * Need to extend to support full virtualization.
  10.423 + * The variable use_host_env indicates if the new VMCS needs to use
  10.424 + * the same setups as the host has (xenolinux).
  10.425 + */
  10.426 +
  10.427 +int construct_vmcs(struct arch_vmx_struct *arch_vmx,
  10.428 +                   execution_context_t *context,
  10.429 +                   full_execution_context_t *full_context,
  10.430 +                   int use_host_env)
  10.431 +{
  10.432 +    int error;
  10.433 +    u64 vmcs_phys_ptr;
  10.434 +
  10.435 +    struct host_execution_env host_env;
  10.436 +
  10.437 +    if (use_host_env != VMCS_USE_HOST_ENV)
  10.438 +        return -EINVAL;
  10.439 +
  10.440 +    memset(&host_env, 0, sizeof(struct host_execution_env));
  10.441 +
  10.442 +    vmcs_phys_ptr = (u64) virt_to_phys(arch_vmx->vmcs);
  10.443 +
  10.444 +    if ((error = __vmpclear (vmcs_phys_ptr))) {
  10.445 +        printk("construct_vmcs: VMCLEAR failed\n");
  10.446 +        return -EINVAL;         
  10.447 +    }
  10.448 +    if ((error = load_vmcs(arch_vmx, vmcs_phys_ptr))) {
  10.449 +        printk("construct_vmcs: load_vmcs failed: VMCS = %lx\n",
  10.450 +               (unsigned long) vmcs_phys_ptr);
  10.451 +        return -EINVAL; 
  10.452 +    }
  10.453 +    if ((error = construct_vmcs_controls())) {
  10.454 +        printk("construct_vmcs: construct_vmcs_controls failed\n");
  10.455 +        return -EINVAL;         
  10.456 +    }
  10.457 +    /* host selectors */
  10.458 +    if ((error = construct_vmcs_host(&host_env))) {
  10.459 +        printk("construct_vmcs: construct_vmcs_host failed\n");
  10.460 +        return -EINVAL;         
  10.461 +    }
  10.462 +    /* guest selectors */
  10.463 +    if ((error = construct_init_vmcs_guest(context, full_context, &host_env))) {
  10.464 +        printk("construct_vmcs: construct_vmcs_guest failed\n");
  10.465 +        return -EINVAL;         
  10.466 +    }       
  10.467 +
  10.468 +    if ((error |= __vmwrite(EXCEPTION_BITMAP, 
  10.469 +                            MONITOR_DEFAULT_EXCEPTION_BITMAP))) {
  10.470 +        printk("construct_vmcs: setting Exception bitmap failed\n");
  10.471 +        return -EINVAL;         
  10.472 +    }
  10.473 +
  10.474 +    return 0;
  10.475 +}
  10.476 +
  10.477 +int load_vmcs(struct arch_vmx_struct *arch_vmx, u64 phys_ptr) 
  10.478 +{
  10.479 +    int error;
  10.480 +
  10.481 +    if ((error = __vmptrld(phys_ptr))) {
  10.482 +        clear_bit(ARCH_VMX_VMCS_LOADED, &arch_vmx->flags); 
  10.483 +        return error;
  10.484 +    }
  10.485 +    set_bit(ARCH_VMX_VMCS_LOADED, &arch_vmx->flags); 
  10.486 +    return 0;
  10.487 +}
  10.488 +
  10.489 +int store_vmcs(struct arch_vmx_struct *arch_vmx, u64 phys_ptr) 
  10.490 +{
  10.491 +    /* take the current VMCS */
  10.492 +    __vmptrst(phys_ptr);
  10.493 +    clear_bit(ARCH_VMX_VMCS_LOADED, &arch_vmx->flags); 
  10.494 +    return 0;
  10.495 +}
  10.496 +
  10.497 +void vm_launch_fail(unsigned long eflags)
  10.498 +{
  10.499 +    BUG();
  10.500 +}
  10.501 +
  10.502 +void vm_resume_fail(unsigned long eflags)
  10.503 +{
  10.504 +    BUG();
  10.505 +}
  10.506 +
    11.1 --- a/xen/arch/x86/x86_32/entry.S	Wed Dec 15 18:19:36 2004 +0000
    11.2 +++ b/xen/arch/x86/x86_32/entry.S	Wed Dec 15 23:09:11 2004 +0000
    11.3 @@ -65,6 +65,104 @@
    11.4          andl $~3,reg;      \
    11.5          movl (reg),reg;
    11.6  
    11.7 +#ifdef CONFIG_VMX
    11.8 +/*
    11.9 + * At VMExit time the processor saves the guest selectors, esp, eip, 
   11.10 + * and eflags. Therefore we don't save them, but simply decrement 
   11.11 + * the kernel stack pointer to make it consistent with the stack frame 
   11.12 + * at usual interruption time. The eflags of the host is not saved by VMX, 
   11.13 + * and we set it to the fixed value.
   11.14 + *
   11.15 + * We also need the room, especially because orig_eax field is used 
   11.16 + * by do_IRQ(). Compared the xen_regs, we skip pushing for the following:
   11.17 + * (1/1)  u16 error_code;
   11.18 + * (2/1)  u16 entry_vector;
   11.19 + *   (2)  u32 eip;
   11.20 + *   (3)  u32 cs;
   11.21 + *   (4)  u32 eflags;
   11.22 + */
   11.23 +#define VMX_MONITOR_EFLAGS	0x202 /* IF on */
   11.24 +#define NR_SKIPPED_REGS	4	/* See the above explanation */
   11.25 +#define VMX_SAVE_ALL_NOSEGREGS \
   11.26 +        pushl $VMX_MONITOR_EFLAGS; \
   11.27 +        popf; \
   11.28 +        subl $(NR_SKIPPED_REGS*4), %esp; \
   11.29 +        pushl %eax; \
   11.30 +        pushl %ebp; \
   11.31 +        pushl %edi; \
   11.32 +        pushl %esi; \
   11.33 +        pushl %edx; \
   11.34 +        pushl %ecx; \
   11.35 +        pushl %ebx;
   11.36 +
   11.37 +ENTRY(vmx_asm_vmexit_handler)
   11.38 +        /* selectors are restored/saved by VMX */
   11.39 +        VMX_SAVE_ALL_NOSEGREGS
   11.40 +        call SYMBOL_NAME(vmx_vmexit_handler)
   11.41 +        jmp vmx_asm_do_resume
   11.42 +
   11.43 +ENTRY(vmx_asm_do_launch)
   11.44 +        popl %ebx
   11.45 +        popl %ecx
   11.46 +        popl %edx
   11.47 +        popl %esi
   11.48 +        popl %edi
   11.49 +        popl %ebp
   11.50 +        popl %eax
   11.51 +        addl $(NR_SKIPPED_REGS*4), %esp
   11.52 +        /* VMLUANCH */
   11.53 +        .byte 0x0f,0x01,0xc2
   11.54 +        pushf
   11.55 +        call SYMBOL_NAME(vm_launch_fail)
   11.56 +        hlt
   11.57 +        
   11.58 +        ALIGN
   11.59 +        
   11.60 +ENTRY(vmx_asm_do_resume)
   11.61 +vmx_test_all_events:
   11.62 +        GET_CURRENT(%ebx)
   11.63 +/* test_all_events: */
   11.64 +        xorl %ecx,%ecx
   11.65 +        notl %ecx
   11.66 +        cli                             # tests must not race interrupts
   11.67 +/*test_softirqs:*/  
   11.68 +        movl EDOMAIN_processor(%ebx),%eax
   11.69 +        shl  $6,%eax                    # sizeof(irq_cpustat) == 64
   11.70 +        test %ecx,SYMBOL_NAME(irq_stat)(%eax,1)
   11.71 +        jnz  vmx_process_softirqs
   11.72 +
   11.73 +vmx_restore_all_guest:
   11.74 +        call SYMBOL_NAME(load_cr2)
   11.75 +        /* 
   11.76 +         * Check if we are going back to VMX-based VM
   11.77 +         * By this time, all the setups in the VMCS must be complete.
   11.78 +         */
   11.79 +        popl %ebx
   11.80 +        popl %ecx
   11.81 +        popl %edx
   11.82 +        popl %esi
   11.83 +        popl %edi
   11.84 +        popl %ebp
   11.85 +        popl %eax
   11.86 +        addl $(NR_SKIPPED_REGS*4), %esp
   11.87 +        /* VMRESUME */
   11.88 +        .byte 0x0f,0x01,0xc3
   11.89 +        pushf
   11.90 +        call SYMBOL_NAME(vm_resume_fail)
   11.91 +        /* Should never reach here */
   11.92 +        hlt
   11.93 +
   11.94 +        ALIGN
   11.95 +vmx_process_softirqs:
   11.96 +        sti       
   11.97 +        call SYMBOL_NAME(do_softirq)
   11.98 +        jmp  vmx_test_all_events
   11.99 +#endif
  11.100 +        
  11.101 +ENTRY(continue_nonidle_task)
  11.102 +        GET_CURRENT(%ebx)
  11.103 +        jmp test_all_events
  11.104 +
  11.105          ALIGN
  11.106  restore_all_guest:
  11.107          testb $TF_failsafe_return,EDOMAIN_thread_flags(%ebx)
    12.1 --- a/xen/common/event_channel.c	Wed Dec 15 18:19:36 2004 +0000
    12.2 +++ b/xen/common/event_channel.c	Wed Dec 15 23:09:11 2004 +0000
    12.3 @@ -474,7 +474,7 @@ static long evtchn_close(evtchn_close_t 
    12.4  }
    12.5  
    12.6  
    12.7 -static long evtchn_send(int lport)
    12.8 +long evtchn_send(int lport)
    12.9  {
   12.10      struct domain *ld = current->domain;
   12.11      struct exec_domain *rd;
    13.1 --- a/xen/common/kernel.c	Wed Dec 15 18:19:36 2004 +0000
    13.2 +++ b/xen/common/kernel.c	Wed Dec 15 23:09:11 2004 +0000
    13.3 @@ -33,13 +33,13 @@ xmem_cache_t *exec_domain_struct_cachep;
    13.4  struct domain *dom0;
    13.5  
    13.6  vm_assist_info_t vm_assist_info[MAX_VMASST_TYPE + 1];
    13.7 -
    13.8 +#if 0
    13.9  struct e820entry {
   13.10      unsigned long addr_lo, addr_hi;        /* start of memory segment */
   13.11      unsigned long size_lo, size_hi;        /* size of memory segment */
   13.12      unsigned long type;                    /* type of memory segment */
   13.13  };
   13.14 -
   13.15 +#endif
   13.16  void start_of_day(void);
   13.17  
   13.18  /* opt_console: comma-separated list of console outputs. */
    14.1 --- a/xen/common/softirq.c	Wed Dec 15 18:19:36 2004 +0000
    14.2 +++ b/xen/common/softirq.c	Wed Dec 15 23:09:11 2004 +0000
    14.3 @@ -13,6 +13,7 @@
    14.4  #include <xen/init.h>
    14.5  #include <xen/mm.h>
    14.6  #include <xen/sched.h>
    14.7 +#include <asm/vmx_vmcs.h>
    14.8  #include <xen/softirq.h>
    14.9  
   14.10  irq_cpustat_t irq_stat[NR_CPUS];
    15.1 --- a/xen/include/asm-x86/config.h	Wed Dec 15 18:19:36 2004 +0000
    15.2 +++ b/xen/include/asm-x86/config.h	Wed Dec 15 23:09:11 2004 +0000
    15.3 @@ -7,6 +7,7 @@
    15.4  #ifndef __XEN_I386_CONFIG_H__
    15.5  #define __XEN_I386_CONFIG_H__
    15.6  
    15.7 +#define CONFIG_VMX 1
    15.8  #define CONFIG_X86 1
    15.9  
   15.10  #define CONFIG_SMP 1
    16.1 --- a/xen/include/asm-x86/cpufeature.h	Wed Dec 15 18:19:36 2004 +0000
    16.2 +++ b/xen/include/asm-x86/cpufeature.h	Wed Dec 15 23:09:11 2004 +0000
    16.3 @@ -71,6 +71,8 @@
    16.4  #define X86_FEATURE_P4		(3*32+ 7) /* P4 */
    16.5  
    16.6  /* Intel-defined CPU features, CPUID level 0x00000001 (ecx), word 4 */
    16.7 +#define X86_FEATURE_MWAIT	(4*32+ 3) /* Monitor/Mwait support */
    16.8 +#define X86_FEATURE_VMXE	(4*32+ 5) /* Virtual Machine Extensions */
    16.9  #define X86_FEATURE_EST		(4*32+ 7) /* Enhanced SpeedStep */
   16.10  
   16.11  /* VIA/Cyrix/Centaur-defined CPU features, CPUID level 0xC0000001, word 5 */
    17.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
    17.2 +++ b/xen/include/asm-x86/e820.h	Wed Dec 15 23:09:11 2004 +0000
    17.3 @@ -0,0 +1,42 @@
    17.4 +/*
    17.5 + * structures and definitions for the int 15, ax=e820 memory map
    17.6 + * scheme.
    17.7 + *
    17.8 + * In a nutshell, arch/i386/boot/setup.S populates a scratch table
    17.9 + * in the empty_zero_block that contains a list of usable address/size
   17.10 + * duples.   In arch/i386/kernel/setup.c, this information is
   17.11 + * transferred into the e820map, and in arch/i386/mm/init.c, that
   17.12 + * new information is used to mark pages reserved or not.
   17.13 + *
   17.14 + */
   17.15 +#ifndef __E820_HEADER
   17.16 +#define __E820_HEADER
   17.17 +
   17.18 +#define E820MAP	0x2d0		/* our map */
   17.19 +#define E820MAX	32		/* number of entries in E820MAP */
   17.20 +#define E820NR	0x1e8		/* # entries in E820MAP */
   17.21 +
   17.22 +#define E820_RAM	1
   17.23 +#define E820_RESERVED	2
   17.24 +#define E820_ACPI	3 /* usable as RAM once ACPI tables have been read */
   17.25 +#define E820_NVS	4
   17.26 +#define E820_IO			16
   17.27 +#define E820_SHARED_PAGE	17
   17.28 +
   17.29 +#define HIGH_MEMORY	(1024*1024)
   17.30 +
   17.31 +#ifndef __ASSEMBLY__
   17.32 +
   17.33 +struct e820map {
   17.34 +    int nr_map;
   17.35 +    struct e820entry {
   17.36 +        unsigned long long addr;	/* start of memory segment */
   17.37 +        unsigned long long size;	/* size of memory segment */
   17.38 +        unsigned long type;		/* type of memory segment */
   17.39 +    } map[E820MAX];
   17.40 +};
   17.41 +
   17.42 +extern struct e820map e820;
   17.43 +#endif/*!__ASSEMBLY__*/
   17.44 +
   17.45 +#endif/*__E820_HEADER*/
    18.1 --- a/xen/include/asm-x86/mm.h	Wed Dec 15 18:19:36 2004 +0000
    18.2 +++ b/xen/include/asm-x86/mm.h	Wed Dec 15 23:09:11 2004 +0000
    18.3 @@ -215,10 +215,19 @@ void synchronise_pagetables(unsigned lon
    18.4   * contiguous (or near contiguous) physical memory.
    18.5   */
    18.6  #undef  machine_to_phys_mapping
    18.7 +/*
    18.8 + * The phys_to_machine_mapping is the reversed mapping of MPT for full
    18.9 + * virtualization.
   18.10 + */
   18.11 +#undef  phys_to_machine_mapping
   18.12 +
   18.13  #ifdef __x86_64__
   18.14  extern unsigned long *machine_to_phys_mapping;
   18.15  #else
   18.16  #define machine_to_phys_mapping ((unsigned long *)RDWR_MPT_VIRT_START)
   18.17 +#ifdef CONFIG_VMX
   18.18 +#define phys_to_machine_mapping ((unsigned long *)PERDOMAIN_VIRT_START)
   18.19 +#endif
   18.20  #endif
   18.21  
   18.22  #define DEFAULT_GDT_ENTRIES     (LAST_RESERVED_GDT_ENTRY+1)
    19.1 --- a/xen/include/asm-x86/msr.h	Wed Dec 15 18:19:36 2004 +0000
    19.2 +++ b/xen/include/asm-x86/msr.h	Wed Dec 15 23:09:11 2004 +0000
    19.3 @@ -84,6 +84,10 @@
    19.4  #define MSR_MTRRcap		0x0fe
    19.5  #define MSR_IA32_BBL_CR_CTL        0x119
    19.6  
    19.7 +#define MSR_IA32_SYSENTER_CS		0x174
    19.8 +#define MSR_IA32_SYSENTER_ESP		0x175
    19.9 +#define MSR_IA32_SYSENTER_EIP		0x176
   19.10 +
   19.11  #define MSR_IA32_MCG_CAP       0x179
   19.12  #define MSR_IA32_MCG_STATUS        0x17a
   19.13  #define MSR_IA32_MCG_CTL       0x17b
    20.1 --- a/xen/include/asm-x86/processor.h	Wed Dec 15 18:19:36 2004 +0000
    20.2 +++ b/xen/include/asm-x86/processor.h	Wed Dec 15 23:09:11 2004 +0000
    20.3 @@ -16,6 +16,7 @@
    20.4  #include <asm/pdb.h>
    20.5  #include <xen/config.h>
    20.6  #include <xen/spinlock.h>
    20.7 +#include <asm/vmx_vmcs.h>
    20.8  #include <public/xen.h>
    20.9  #endif
   20.10  
   20.11 @@ -84,6 +85,7 @@
   20.12  #define X86_CR4_PCE		0x0100	/* enable performance counters at ipl 3 */
   20.13  #define X86_CR4_OSFXSR		0x0200	/* enable fast FPU save and restore */
   20.14  #define X86_CR4_OSXMMEXCPT	0x0400	/* enable unmasked SSE exceptions */
   20.15 +#define X86_CR4_VMXE		0x2000  /* enable VMX */
   20.16  
   20.17  /*
   20.18   * Trap/fault mnemonics.
   20.19 @@ -429,6 +431,9 @@ struct thread_struct {
   20.20      struct desc_struct fast_trap_desc;
   20.21  #endif
   20.22      trap_info_t        traps[256];
   20.23 +#ifdef CONFIG_VMX
   20.24 +    struct arch_vmx_struct arch_vmx; /* Virtual Machine Extensions */
   20.25 +#endif
   20.26  };
   20.27  
   20.28  #define IDT_ENTRIES 256
   20.29 @@ -473,6 +478,18 @@ struct mm_struct {
   20.30      l1_pgentry_t *perdomain_ptes;
   20.31      pagetable_t  pagetable;
   20.32  
   20.33 +#ifdef CONFIG_VMX
   20.34 +
   20.35 +#define SHM_full_32     (8) /* full virtualization for 32-bit */
   20.36 +
   20.37 +        pagetable_t  monitor_table;
   20.38 +        l2_pgentry_t *vpagetable;	/* virtual address of pagetable */
   20.39 +        l2_pgentry_t *shadow_vtable;	/* virtual address of shadow_table */
   20.40 +        l2_pgentry_t *guest_pl2e_cache;	/* guest page directory cache */
   20.41 +        unsigned long min_pfn;		/* min host physical */
   20.42 +        unsigned long max_pfn;		/* max host physical */
   20.43 +#endif
   20.44 +
   20.45      /* shadow mode status and controls */
   20.46      unsigned int shadow_mode;  /* flags to control shadow table operation */
   20.47      pagetable_t  shadow_table;
   20.48 @@ -502,14 +519,25 @@ struct mm_struct {
   20.49      char gdt[10]; /* NB. 10 bytes needed for x86_64. Use 6 bytes for x86_32. */
   20.50  };
   20.51  
   20.52 +#define SHM_full_32     (8) /* full virtualization for 32-bit */
   20.53 +
   20.54  static inline void write_ptbase(struct mm_struct *mm)
   20.55  {
   20.56      unsigned long pa;
   20.57  
   20.58 +#ifdef CONFIG_VMX
   20.59 +    if ( unlikely(mm->shadow_mode) ) {
   20.60 +            if (mm->shadow_mode == SHM_full_32)
   20.61 +                    pa = pagetable_val(mm->monitor_table);
   20.62 +            else
   20.63 +                    pa = pagetable_val(mm->shadow_table);   
   20.64 +    }
   20.65 +#else
   20.66      if ( unlikely(mm->shadow_mode) )
   20.67 -        pa = pagetable_val(mm->shadow_table);
   20.68 +            pa = pagetable_val(mm->shadow_table);    
   20.69 +#endif
   20.70      else
   20.71 -        pa = pagetable_val(mm->pagetable);
   20.72 +            pa = pagetable_val(mm->pagetable);
   20.73  
   20.74      write_cr3(pa);
   20.75  }
   20.76 @@ -533,18 +561,40 @@ long set_gdt(struct exec_domain *d,
   20.77  
   20.78  long set_debugreg(struct exec_domain *p, int reg, unsigned long value);
   20.79  
   20.80 -struct microcode {
   20.81 -    unsigned int hdrver;
   20.82 -    unsigned int rev;
   20.83 -    unsigned int date;
   20.84 -    unsigned int sig;
   20.85 -    unsigned int cksum;
   20.86 -    unsigned int ldrver;
   20.87 -    unsigned int pf;
   20.88 -    unsigned int reserved[5];
   20.89 -    unsigned int bits[500];
   20.90 +struct microcode_header {
   20.91 +        unsigned int hdrver;
   20.92 +        unsigned int rev;
   20.93 +        unsigned int date;
   20.94 +        unsigned int sig;
   20.95 +        unsigned int cksum;
   20.96 +        unsigned int ldrver;
   20.97 +        unsigned int pf;
   20.98 +        unsigned int datasize;
   20.99 +        unsigned int totalsize;
  20.100 +        unsigned int reserved[3];
  20.101  };
  20.102  
  20.103 +struct microcode {
  20.104 +        struct microcode_header hdr;
  20.105 +        unsigned int bits[0];
  20.106 +};
  20.107 +
  20.108 +typedef struct microcode microcode_t;
  20.109 +typedef struct microcode_header microcode_header_t;
  20.110 +
  20.111 +/* microcode format is extended from prescott processors */
  20.112 +struct extended_signature {
  20.113 +        unsigned int sig;
  20.114 +        unsigned int pf;
  20.115 +        unsigned int cksum;
  20.116 +};
  20.117 +
  20.118 +struct extended_sigtable {
  20.119 +        unsigned int count;
  20.120 +        unsigned int cksum;
  20.121 +        unsigned int reserved[3];
  20.122 +        struct extended_signature sigs[0];
  20.123 +};
  20.124  /* '6' because it used to be for P6 only (but now covers Pentium 4 as well) */
  20.125  #define MICROCODE_IOCFREE	_IO('6',0)
  20.126  
    21.1 --- a/xen/include/asm-x86/shadow.h	Wed Dec 15 18:19:36 2004 +0000
    21.2 +++ b/xen/include/asm-x86/shadow.h	Wed Dec 15 23:09:11 2004 +0000
    21.3 @@ -17,6 +17,7 @@
    21.4  #define SHM_logdirty    (2) /* log pages that are dirtied */
    21.5  #define SHM_translate   (3) /* lookup machine pages in translation table */
    21.6  #define SHM_cow         (4) /* copy on write all dirtied pages */
    21.7 +#define SHM_full_32     (8) /* full virtualization for 32-bit */
    21.8  
    21.9  #define shadow_linear_pg_table ((l1_pgentry_t *)SH_LINEAR_PT_VIRT_START)
   21.10  #define shadow_linear_l2_table ((l2_pgentry_t *)(SH_LINEAR_PT_VIRT_START + \
   21.11 @@ -37,6 +38,23 @@ extern void shadow_l2_normal_pt_update(u
   21.12  extern void unshadow_table(unsigned long gpfn, unsigned int type);
   21.13  extern int shadow_mode_enable(struct domain *p, unsigned int mode);
   21.14  
   21.15 +#ifdef CONFIG_VMX
   21.16 +extern void vmx_shadow_clear_state(struct mm_struct *);
   21.17 +extern void vmx_shadow_invlpg(struct mm_struct *, unsigned long);
   21.18 +#endif
   21.19 +
   21.20 +#define  __get_machine_to_phys(m, guest_gpfn, gpfn)     \
   21.21 +    if ((m)->shadow_mode == SHM_full_32)                \
   21.22 +        (guest_gpfn) = machine_to_phys_mapping[(gpfn)]; \
   21.23 +    else                                                \
   21.24 +        (guest_gpfn) = (gpfn);
   21.25 +
   21.26 +#define  __get_phys_to_machine(m, host_gpfn, gpfn)     \
   21.27 +    if ((m)->shadow_mode == SHM_full_32)               \
   21.28 +        (host_gpfn) = phys_to_machine_mapping[(gpfn)]; \
   21.29 +    else                                               \
   21.30 +        (host_gpfn) = (gpfn);
   21.31 +
   21.32  extern void __shadow_mode_disable(struct domain *d);
   21.33  static inline void shadow_mode_disable(struct domain *d)
   21.34  {
   21.35 @@ -46,8 +64,14 @@ static inline void shadow_mode_disable(s
   21.36  
   21.37  extern unsigned long shadow_l2_table( 
   21.38      struct mm_struct *m, unsigned long gpfn);
   21.39 +  
   21.40 +static inline void shadow_invalidate(struct mm_struct *m) {
   21.41 +    if (m->shadow_mode != SHM_full_32)
   21.42 +        BUG();
   21.43 +    memset(m->shadow_vtable, 0, PAGE_SIZE);
   21.44 +}
   21.45  
   21.46 -#define SHADOW_DEBUG      0
   21.47 +#define SHADOW_DEBUG 0
   21.48  #define SHADOW_HASH_DEBUG 0
   21.49  
   21.50  struct shadow_status {
   21.51 @@ -80,9 +104,55 @@ printk("DOM%u: (file=shadow.c, line=%d) 
   21.52      printk("DOM%u: (file=shadow.c, line=%d) " _f "\n",  \
   21.53             current->id , __LINE__ , ## _a )
   21.54  #else
   21.55 -#define SH_VVLOG(_f, _a...) 
   21.56 +#define SH_VVLOG(_f, _a...)
   21.57  #endif
   21.58  
   21.59 +static inline void __shadow_get_pl2e(struct mm_struct *m, 
   21.60 +                                unsigned long va, unsigned long *sl2e)
   21.61 +{
   21.62 +    if (m->shadow_mode == SHM_full_32) {
   21.63 +        *sl2e = l2_pgentry_val(m->shadow_vtable[va >> L2_PAGETABLE_SHIFT]);
   21.64 +    }
   21.65 +    else
   21.66 +        *sl2e = l2_pgentry_val(linear_l2_table[va >> L2_PAGETABLE_SHIFT]);
   21.67 +}
   21.68 +
   21.69 +static inline void __shadow_set_pl2e(struct mm_struct *m, 
   21.70 +                                unsigned long va, unsigned long value)
   21.71 +{
   21.72 +    if (m->shadow_mode == SHM_full_32) {
   21.73 +        m->shadow_vtable[va >> L2_PAGETABLE_SHIFT] = mk_l2_pgentry(value);
   21.74 +    }
   21.75 +    else
   21.76 +        linear_l2_table[va >> L2_PAGETABLE_SHIFT] = mk_l2_pgentry(value);
   21.77 +}
   21.78 +
   21.79 +static inline void __guest_get_pl2e(struct mm_struct *m, 
   21.80 +                                unsigned long va, unsigned long *l2e)
   21.81 +{
   21.82 +    if (m->shadow_mode == SHM_full_32) {
   21.83 +        *l2e = l2_pgentry_val(m->vpagetable[va >> L2_PAGETABLE_SHIFT]);
   21.84 +    }
   21.85 +    else
   21.86 +        *l2e = l2_pgentry_val(linear_l2_table[va >> L2_PAGETABLE_SHIFT]);
   21.87 +}
   21.88 +
   21.89 +static inline void __guest_set_pl2e(struct mm_struct *m, 
   21.90 +                                unsigned long va, unsigned long value)
   21.91 +{
   21.92 +    if (m->shadow_mode == SHM_full_32) {
   21.93 +        unsigned long pfn;
   21.94 +
   21.95 +        pfn = phys_to_machine_mapping[value >> PAGE_SHIFT];
   21.96 +                m->guest_pl2e_cache[va >> L2_PAGETABLE_SHIFT] =
   21.97 +                        mk_l2_pgentry((pfn << PAGE_SHIFT) | __PAGE_HYPERVISOR);
   21.98 +
   21.99 +        m->vpagetable[va >> L2_PAGETABLE_SHIFT] = mk_l2_pgentry(value);
  21.100 +    }
  21.101 +    else
  21.102 +        linear_l2_table[va >> L2_PAGETABLE_SHIFT] = mk_l2_pgentry(value);
  21.103 +
  21.104 +}
  21.105  
  21.106  /************************************************************************/
  21.107  
  21.108 @@ -151,7 +221,6 @@ static inline void l1pte_write_fault(
  21.109      unsigned long spte = *spte_p;
  21.110  
  21.111      ASSERT(gpte & _PAGE_RW);
  21.112 -
  21.113      gpte |= _PAGE_DIRTY | _PAGE_ACCESSED;
  21.114  
  21.115      switch ( m->shadow_mode )
  21.116 @@ -163,9 +232,19 @@ static inline void l1pte_write_fault(
  21.117      case SHM_logdirty:
  21.118          spte = gpte | _PAGE_RW;
  21.119          __mark_dirty(m, gpte >> PAGE_SHIFT);
  21.120 +
  21.121 +    case SHM_full_32:
  21.122 +    {
  21.123 +        unsigned long host_pfn, host_gpte;
  21.124 +        
  21.125 +        host_pfn = phys_to_machine_mapping[gpte >> PAGE_SHIFT];
  21.126 +        host_gpte = (host_pfn << PAGE_SHIFT) | (gpte & ~PAGE_MASK);
  21.127 +        spte = host_gpte | _PAGE_RW;
  21.128 +    }
  21.129          break;
  21.130      }
  21.131  
  21.132 +    SH_VVLOG("updating spte=%lx gpte=%lx", spte, gpte);
  21.133      *gpte_p = gpte;
  21.134      *spte_p = spte;
  21.135  }
  21.136 @@ -187,6 +266,17 @@ static inline void l1pte_read_fault(
  21.137      case SHM_logdirty:
  21.138          spte = gpte & ~_PAGE_RW;
  21.139          break;
  21.140 +
  21.141 +    case SHM_full_32:
  21.142 +    {
  21.143 +        unsigned long host_pfn, host_gpte;
  21.144 +        
  21.145 +        host_pfn = phys_to_machine_mapping[gpte >> PAGE_SHIFT];
  21.146 +        host_gpte = (host_pfn << PAGE_SHIFT) | (gpte & ~PAGE_MASK);
  21.147 +        spte = (host_gpte & _PAGE_DIRTY) ? host_gpte : (host_gpte & ~_PAGE_RW);
  21.148 +    }
  21.149 +        break;
  21.150 +
  21.151      }
  21.152  
  21.153      *gpte_p = gpte;
  21.154 @@ -214,6 +304,20 @@ static inline void l1pte_propagate_from_
  21.155               (_PAGE_PRESENT|_PAGE_ACCESSED) )
  21.156              spte = gpte & ~_PAGE_RW;
  21.157          break;
  21.158 +
  21.159 +    case SHM_full_32:
  21.160 +    {
  21.161 +        unsigned long host_pfn, host_gpte;
  21.162 +        
  21.163 +        host_pfn = phys_to_machine_mapping[gpte >> PAGE_SHIFT];
  21.164 +        host_gpte = (host_pfn << PAGE_SHIFT) | (gpte & ~PAGE_MASK);
  21.165 +        spte = 0;
  21.166 +
  21.167 +        if ( (host_gpte & (_PAGE_PRESENT|_PAGE_ACCESSED) ) == 
  21.168 +             (_PAGE_PRESENT|_PAGE_ACCESSED) )
  21.169 +            spte = (host_gpte & _PAGE_DIRTY) ? host_gpte : (host_gpte & ~_PAGE_RW);
  21.170 +    }
  21.171 +        break;
  21.172      }
  21.173  
  21.174      *gpte_p = gpte;
  21.175 @@ -239,8 +343,12 @@ static inline void l2pde_general(
  21.176  
  21.177          /* Detect linear p.t. mappings and write-protect them. */
  21.178          if ( (frame_table[sl1pfn].u.inuse.type_info & PGT_type_mask) ==
  21.179 -             PGT_l2_page_table )
  21.180 -            spde = gpde & ~_PAGE_RW;
  21.181 +             PGT_l2_page_table ) 
  21.182 +        {
  21.183 +            if (m->shadow_mode != SHM_full_32)
  21.184 +                spde = gpde & ~_PAGE_RW;
  21.185 +
  21.186 +        }
  21.187      }
  21.188  
  21.189      *gpde_p = gpde;
  21.190 @@ -394,7 +502,7 @@ static inline void delete_shadow_status(
  21.191  
  21.192      head = hash_bucket(m, gpfn);
  21.193  
  21.194 -    SH_VVLOG("delete gpfn=%08x bucket=%p", gpfn, b);
  21.195 +    SH_VVLOG("delete gpfn=%08x bucket=%p", gpfn, head);
  21.196      shadow_audit(m, 0);
  21.197  
  21.198      /* Match on head item? */
  21.199 @@ -469,7 +577,7 @@ static inline void set_shadow_status(
  21.200  
  21.201      x = head = hash_bucket(m, gpfn);
  21.202     
  21.203 -    SH_VVLOG("set gpfn=%08x s=%08lx bucket=%p(%p)", gpfn, s, b, b->next);
  21.204 +    SH_VVLOG("set gpfn=%08x s=%08lx bucket=%p(%p)", gpfn, s, x, x->next);
  21.205      shadow_audit(m, 0);
  21.206  
  21.207      /*
  21.208 @@ -543,7 +651,72 @@ static inline void set_shadow_status(
  21.209   done:
  21.210      shadow_audit(m, 0);
  21.211  }
  21.212 +  
  21.213 +#ifdef CONFIG_VMX
  21.214 +#include <asm/domain_page.h>
  21.215  
  21.216 +static inline void vmx_update_shadow_state(
  21.217 +    struct mm_struct *mm, unsigned long gpfn, unsigned long spfn)
  21.218 +{
  21.219 +
  21.220 +    l2_pgentry_t *mpl2e = 0;
  21.221 +    l2_pgentry_t *gpl2e, *spl2e;
  21.222 +
  21.223 +    /* unmap the old mappings */
  21.224 +    if (mm->shadow_vtable)
  21.225 +        unmap_domain_mem(mm->shadow_vtable);
  21.226 +    if (mm->vpagetable)
  21.227 +        unmap_domain_mem(mm->vpagetable);
  21.228 +
  21.229 +    /* new mapping */
  21.230 +    mpl2e = (l2_pgentry_t *) 
  21.231 +        map_domain_mem(pagetable_val(mm->monitor_table));
  21.232 +
  21.233 +    mpl2e[SH_LINEAR_PT_VIRT_START >> L2_PAGETABLE_SHIFT] =
  21.234 +        mk_l2_pgentry((spfn << PAGE_SHIFT) | __PAGE_HYPERVISOR);
  21.235 +    __flush_tlb_one(SH_LINEAR_PT_VIRT_START);
  21.236 +
  21.237 +    spl2e = (l2_pgentry_t *) map_domain_mem(spfn << PAGE_SHIFT);
  21.238 +    gpl2e = (l2_pgentry_t *) map_domain_mem(gpfn << PAGE_SHIFT);
  21.239 +    memset(spl2e, 0, ENTRIES_PER_L2_PAGETABLE * sizeof(l2_pgentry_t));
  21.240 +
  21.241 +    mm->shadow_table = mk_pagetable(spfn<<PAGE_SHIFT);
  21.242 +    mm->shadow_vtable = spl2e;
  21.243 +    mm->vpagetable = gpl2e; /* expect the guest did clean this up */
  21.244 +    unmap_domain_mem(mpl2e);
  21.245 +}
  21.246 +
  21.247 +static inline void __shadow_mk_pagetable( struct mm_struct *mm )
  21.248 +{
  21.249 +    unsigned long gpfn = pagetable_val(mm->pagetable) >> PAGE_SHIFT;
  21.250 +    unsigned long spfn;
  21.251 +    SH_VLOG("0: __shadow_mk_pagetable(gpfn=%08lx\n", gpfn);
  21.252 +
  21.253 +    if (mm->shadow_mode == SHM_full_32) 
  21.254 +    {
  21.255 +        unsigned long guest_gpfn;
  21.256 +        guest_gpfn = machine_to_phys_mapping[gpfn];
  21.257 +
  21.258 +        SH_VVLOG("__shadow_mk_pagetable(guest_gpfn=%08lx, gpfn=%08lx\n", 
  21.259 +                 guest_gpfn, gpfn);
  21.260 +
  21.261 +        spfn = __shadow_status(mm, gpfn) & PSH_pfn_mask;
  21.262 +        if ( unlikely(spfn == 0) ) {
  21.263 +            spfn = shadow_l2_table(mm, gpfn);
  21.264 +            mm->shadow_table = mk_pagetable(spfn<<PAGE_SHIFT);
  21.265 +        } else {
  21.266 +            vmx_update_shadow_state(mm, gpfn, spfn);
  21.267 +        }
  21.268 +    } else {
  21.269 +        spfn = __shadow_status(mm, gpfn) & PSH_pfn_mask;
  21.270 +
  21.271 +        if ( unlikely(spfn == 0) ) {
  21.272 +            spfn = shadow_l2_table(mm, gpfn);
  21.273 +        }
  21.274 +        mm->shadow_table = mk_pagetable(spfn<<PAGE_SHIFT);
  21.275 +    }
  21.276 +}
  21.277 +#else
  21.278  static inline void __shadow_mk_pagetable(struct mm_struct *mm)
  21.279  {
  21.280      unsigned long gpfn = pagetable_val(mm->pagetable) >> PAGE_SHIFT;
  21.281 @@ -554,22 +727,26 @@ static inline void __shadow_mk_pagetable
  21.282  
  21.283      mm->shadow_table = mk_pagetable(spfn << PAGE_SHIFT);
  21.284  }
  21.285 +#endif /* CONFIG_VMX */
  21.286  
  21.287  static inline void shadow_mk_pagetable(struct mm_struct *mm)
  21.288  {
  21.289 -    SH_VVLOG("shadow_mk_pagetable( gptbase=%08lx, mode=%d )",
  21.290 -             pagetable_val(mm->pagetable), mm->shadow_mode );
  21.291 +     if ( unlikely(mm->shadow_mode) )
  21.292 +     {
  21.293 +         SH_VVLOG("shadow_mk_pagetable( gptbase=%08lx, mode=%d )",
  21.294 +             pagetable_val(mm->pagetable), mm->shadow_mode ); 
  21.295  
  21.296 -    if ( unlikely(mm->shadow_mode) )
  21.297 -    {
  21.298 -        shadow_lock(mm);
  21.299 -        __shadow_mk_pagetable(mm);
  21.300 -        shadow_unlock(mm);
  21.301 -    }
  21.302 +         shadow_lock(mm);
  21.303 +         __shadow_mk_pagetable(mm);
  21.304 +         shadow_unlock(mm);
  21.305  
  21.306 -    SH_VVLOG("leaving shadow_mk_pagetable( gptbase=%08lx, mode=%d ) sh=%08lx",
  21.307 -             pagetable_val(mm->pagetable), mm->shadow_mode, 
  21.308 -             pagetable_val(mm->shadow_table) );
  21.309 +     SH_VVLOG("leaving shadow_mk_pagetable:\n");
  21.310 + 
  21.311 +     SH_VVLOG("( gptbase=%08lx, mode=%d ) sh=%08lx",
  21.312 +              pagetable_val(mm->pagetable), mm->shadow_mode, 
  21.313 +              pagetable_val(mm->shadow_table) );
  21.314 + 
  21.315 +     } 
  21.316  }
  21.317  
  21.318  #if SHADOW_DEBUG
    22.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
    22.2 +++ b/xen/include/asm-x86/vmx.h	Wed Dec 15 23:09:11 2004 +0000
    22.3 @@ -0,0 +1,251 @@
    22.4 +/*
    22.5 + * vmx.h: VMX Architecture related definitions
    22.6 + * Copyright (c) 2004, Intel Corporation.
    22.7 + *
    22.8 + * This program is free software; you can redistribute it and/or modify it
    22.9 + * under the terms and conditions of the GNU General Public License,
   22.10 + * version 2, as published by the Free Software Foundation.
   22.11 + *
   22.12 + * This program is distributed in the hope it will be useful, but WITHOUT
   22.13 + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
   22.14 + * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
   22.15 + * more details.
   22.16 + *
   22.17 + * You should have received a copy of the GNU General Public License along with
   22.18 + * this program; if not, write to the Free Software Foundation, Inc., 59 Temple
   22.19 + * Place - Suite 330, Boston, MA 02111-1307 USA.
   22.20 + *
   22.21 + */
   22.22 +#ifndef __ASM_X86_VMX_H__
   22.23 +#define __ASM_X86_VMX_H__
   22.24 +
   22.25 +#include <xen/sched.h>
   22.26 +#include <asm/types.h>
   22.27 +#include <asm/regs.h>
   22.28 +#include <asm/processor.h>
   22.29 +#include <asm/vmx_vmcs.h>
   22.30 +
   22.31 +extern void vmx_asm_vmexit_handler(struct xen_regs);
   22.32 +extern void vmx_asm_do_resume(void);
   22.33 +extern void vmx_asm_do_launch(void);
   22.34 +extern void vmx_intr_assist(struct exec_domain *d);
   22.35 +
   22.36 +extern void arch_vmx_do_launch(struct exec_domain *);
   22.37 +extern void arch_vmx_do_resume(struct exec_domain *);
   22.38 +
   22.39 +extern int vmcs_size;
   22.40 +extern unsigned int cpu_rev;
   22.41 +
   22.42 +/*
   22.43 + * Need fill bits for SENTER
   22.44 + */
   22.45 +
   22.46 +#define MONITOR_PIN_BASED_EXEC_CONTROLS         0x0000001f      
   22.47 +#define MONITOR_CPU_BASED_EXEC_CONTROLS         0x0581e7f2
   22.48 +#define MONITOR_VM_EXIT_CONTROLS                0x0003edff
   22.49 +#define MONITOR_VM_ENTRY_CONTROLS               0x000011ff
   22.50 +
   22.51 +/*
   22.52 + * Exit Reasons
   22.53 + */
   22.54 +#define VMX_EXIT_REASONS_FAILED_VMENTRY         0x80000000
   22.55 +
   22.56 +#define EXIT_REASON_EXCEPTION_NMI       0
   22.57 +#define EXIT_REASON_EXTERNAL_INTERRUPT  1
   22.58 +
   22.59 +#define EXIT_REASON_PENDING_INTERRUPT   7
   22.60 +
   22.61 +#define EXIT_REASON_TASK_SWITCH         9
   22.62 +#define EXIT_REASON_CPUID               10
   22.63 +#define EXIT_REASON_HLT                 12
   22.64 +#define EXIT_REASON_INVLPG              14
   22.65 +#define EXIT_REASON_RDPMC               15
   22.66 +#define EXIT_REASON_RDTSC               16
   22.67 +#define EXIT_REASON_VMCALL              18
   22.68 +
   22.69 +#define EXIT_REASON_CR_ACCESS           28
   22.70 +#define EXIT_REASON_DR_ACCESS           29
   22.71 +#define EXIT_REASON_IO_INSTRUCTION      30
   22.72 +#define EXIT_REASON_MSR_READ            31
   22.73 +#define EXIT_REASON_MSR_WRITE           32
   22.74 +#define EXIT_REASON_MWAIT_INSTRUCTION   36
   22.75 +
   22.76 +/*
   22.77 + * Interruption-information format
   22.78 + */
   22.79 +#define INTR_INFO_VECTOR_MASK           0xff            /* 7:0 */
   22.80 +#define INTR_INFO_INTR_TYPE_MASK        0x700           /* 10:8 */
   22.81 +#define INTR_INFO_DELIEVER_CODE_MASK    0x800           /* 11 */
   22.82 +#define INTR_INFO_VALID_MASK            0x80000000      /* 31 */
   22.83 +
   22.84 +#define INTR_TYPE_EXT_INTR              (0 << 8) /* external interrupt */
   22.85 +#define INTR_TYPE_EXCEPTION             (3 << 8) /* processor exception */
   22.86 +
   22.87 +/*
   22.88 + * Exit Qualifications for MOV for Control Register Access
   22.89 + */
   22.90 +#define CONTROL_REG_ACCESS_NUM          0x7     /* 2:0, number of control register */
   22.91 +#define CONTROL_REG_ACCESS_TYPE         0x30    /* 5:4, access type */
   22.92 +#define TYPE_MOV_TO_CR                  (0 << 4) 
   22.93 +#define TYPE_MOV_FROM_CR                (1 << 4)
   22.94 +#define TYPE_CLTS                       (2 << 4)
   22.95 +#define CONTROL_REG_ACCESS_REG          0x700   /* 10:8, general purpose register */
   22.96 +#define REG_EAX                         (0 << 8) 
   22.97 +#define REG_ECX                         (1 << 8) 
   22.98 +#define REG_EDX                         (2 << 8) 
   22.99 +#define REG_EBX                         (3 << 8) 
  22.100 +#define REG_ESP                         (4 << 8) 
  22.101 +#define REG_EBP                         (5 << 8) 
  22.102 +#define REG_ESI                         (6 << 8) 
  22.103 +#define REG_EDI                         (7 << 8) 
  22.104 +
  22.105 +/*
  22.106 + * Exit Qualifications for MOV for Debug Register Access
  22.107 + */
  22.108 +#define DEBUG_REG_ACCESS_NUM            0x7     /* 2:0, number of debug register */
  22.109 +#define DEBUG_REG_ACCESS_TYPE           0x10    /* 4, direction of access */
  22.110 +#define TYPE_MOV_TO_DR                  (0 << 4) 
  22.111 +#define TYPE_MOV_FROM_DR                (1 << 4)
  22.112 +#define DEBUG_REG_ACCESS_REG            0x700   /* 11:8, general purpose register */
  22.113 + 
  22.114 +#define EXCEPTION_BITMAP_DE     (1 << 0)        /* Divide Error */
  22.115 +#define EXCEPTION_BITMAP_DB     (1 << 1)        /* Debug */
  22.116 +#define EXCEPTION_BITMAP_NMI    (1 << 2)        /* NMI */
  22.117 +#define EXCEPTION_BITMAP_BP     (1 << 3)        /* Breakpoint */
  22.118 +#define EXCEPTION_BITMAP_OF     (1 << 4)        /* Overflow */
  22.119 +#define EXCEPTION_BITMAP_BR     (1 << 5)        /* BOUND Range Exceeded */
  22.120 +#define EXCEPTION_BITMAP_UD     (1 << 6)        /* Invalid Opcode */
  22.121 +#define EXCEPTION_BITMAP_NM     (1 << 7)        /* Device Not Available */
  22.122 +#define EXCEPTION_BITMAP_DF     (1 << 8)        /* Double Fault */
  22.123 +/* reserved */
  22.124 +#define EXCEPTION_BITMAP_TS     (1 << 10)       /* Invalid TSS */
  22.125 +#define EXCEPTION_BITMAP_NP     (1 << 11)       /* Segment Not Present */
  22.126 +#define EXCEPTION_BITMAP_SS     (1 << 12)       /* Stack-Segment Fault */
  22.127 +#define EXCEPTION_BITMAP_GP     (1 << 13)       /* General Protection */
  22.128 +#define EXCEPTION_BITMAP_PG     (1 << 14)       /* Page Fault */
  22.129 +#define EXCEPTION_BITMAP_MF     (1 << 16)       /* x87 FPU Floating-Point Error (Math Fault)  */
  22.130 +#define EXCEPTION_BITMAP_AC     (1 << 17)       /* Alignment Check */
  22.131 +#define EXCEPTION_BITMAP_MC     (1 << 18)       /* Machine Check */
  22.132 +#define EXCEPTION_BITMAP_XF     (1 << 19)       /* SIMD Floating-Point Exception */
  22.133 +
  22.134 +#ifdef XEN_DEBUGGER
  22.135 +#define MONITOR_DEFAULT_EXCEPTION_BITMAP        \
  22.136 +    ( EXCEPTION_BITMAP_PG |                     \
  22.137 +      EXCEPTION_BITMAP_DB |                     \
  22.138 +      EXCEPTION_BITMAP_BP |                     \
  22.139 +      EXCEPTION_BITMAP_GP )
  22.140 +#else
  22.141 +#define MONITOR_DEFAULT_EXCEPTION_BITMAP        \
  22.142 +    ( EXCEPTION_BITMAP_PG |                     \
  22.143 +      EXCEPTION_BITMAP_GP )
  22.144 +#endif
  22.145 +
  22.146 +#define VMCALL_OPCODE   ".byte 0x0f,0x01,0xc1\n"
  22.147 +#define VMCLEAR_OPCODE  ".byte 0x66,0x0f,0xc7\n"        /* reg/opcode: /6 */
  22.148 +#define VMLAUNCH_OPCODE ".byte 0x0f,0x01,0xc2\n"
  22.149 +#define VMPTRLD_OPCODE  ".byte 0x0f,0xc7\n"             /* reg/opcode: /6 */
  22.150 +#define VMPTRST_OPCODE  ".byte 0x0f,0xc7\n"             /* reg/opcode: /7 */
  22.151 +#define VMREAD_OPCODE   ".byte 0x0f,0x78\n"
  22.152 +#define VMRESUME_OPCODE ".byte 0x0f,0x01,0xc3\n"
  22.153 +#define VMWRITE_OPCODE  ".byte 0x0f,0x79\n"
  22.154 +#define VMXOFF_OPCODE   ".byte 0x0f,0x01,0xc4\n"
  22.155 +#define VMXON_OPCODE    ".byte 0xf3,0x0f,0xc7\n"
  22.156 +
  22.157 +#define MODRM_EAX_06    ".byte 0x30\n" /* [EAX], with reg/opcode: /6 */
  22.158 +#define MODRM_EAX_07    ".byte 0x38\n" /* [EAX], with reg/opcode: /7 */
  22.159 +#define MODRM_EAX_ECX   ".byte 0xc1\n" /* [EAX], [ECX] */
  22.160 +
  22.161 +static inline int __vmptrld (u64 addr)
  22.162 +{
  22.163 +    unsigned long eflags;
  22.164 +    __asm__ __volatile__ ( VMPTRLD_OPCODE
  22.165 +                           MODRM_EAX_06
  22.166 +                           :
  22.167 +                           : "a" (&addr) 
  22.168 +                           : "memory");
  22.169 +
  22.170 +    __save_flags(eflags);
  22.171 +    if (eflags & X86_EFLAGS_ZF || eflags & X86_EFLAGS_CF)
  22.172 +        return -1;
  22.173 +    return 0;
  22.174 +}
  22.175 +
  22.176 +static inline void __vmptrst (u64 addr)
  22.177 +{
  22.178 +    __asm__ __volatile__ ( VMPTRST_OPCODE
  22.179 +                           MODRM_EAX_07
  22.180 +                           :
  22.181 +                           : "a" (&addr) 
  22.182 +                           : "memory");
  22.183 +}
  22.184 +
  22.185 +static inline int __vmpclear (u64 addr)
  22.186 +{
  22.187 +    unsigned long eflags;
  22.188 +
  22.189 +    __asm__ __volatile__ ( VMCLEAR_OPCODE
  22.190 +                           MODRM_EAX_06
  22.191 +                           :
  22.192 +                           : "a" (&addr) 
  22.193 +                           : "memory");
  22.194 +    __save_flags(eflags);
  22.195 +    if (eflags & X86_EFLAGS_ZF || eflags & X86_EFLAGS_CF)
  22.196 +        return -1;
  22.197 +    return 0;
  22.198 +}
  22.199 +
  22.200 +static inline int __vmread (unsigned int field, void *value)
  22.201 +{
  22.202 +    unsigned long eflags;
  22.203 +    unsigned long ecx = 0;
  22.204 +
  22.205 +    __asm__ __volatile__ ( VMREAD_OPCODE
  22.206 +                           MODRM_EAX_ECX       
  22.207 +                           : "=c" (ecx)
  22.208 +                           : "a" (field)
  22.209 +                           : "memory");
  22.210 +
  22.211 +    *((long *) value) = ecx;
  22.212 +
  22.213 +    __save_flags(eflags);
  22.214 +    if (eflags & X86_EFLAGS_ZF || eflags & X86_EFLAGS_CF)
  22.215 +        return -1;
  22.216 +    return 0;
  22.217 +}
  22.218 +
  22.219 +static inline int __vmwrite (unsigned int field, unsigned int value)
  22.220 +{
  22.221 +    unsigned long eflags;
  22.222 +
  22.223 +    __asm__ __volatile__ ( VMWRITE_OPCODE
  22.224 +                           MODRM_EAX_ECX       
  22.225 +                           :
  22.226 +                           : "a" (field) , "c" (value)
  22.227 +                           : "memory");
  22.228 +    __save_flags(eflags);
  22.229 +    if (eflags & X86_EFLAGS_ZF || eflags & X86_EFLAGS_CF)
  22.230 +        return -1;
  22.231 +    return 0;
  22.232 +}
  22.233 +
  22.234 +static inline void __vmxoff (void)
  22.235 +{
  22.236 +    __asm__ __volatile__ ( VMXOFF_OPCODE 
  22.237 +                           ::: "memory");
  22.238 +}
  22.239 +
  22.240 +static inline int __vmxon (u64 addr)
  22.241 +{
  22.242 +    unsigned long eflags;
  22.243 +
  22.244 +    __asm__ __volatile__ ( VMXON_OPCODE
  22.245 +                           MODRM_EAX_06
  22.246 +                           :
  22.247 +                           : "a" (&addr) 
  22.248 +                           : "memory");
  22.249 +    __save_flags(eflags);
  22.250 +    if (eflags & X86_EFLAGS_ZF || eflags & X86_EFLAGS_CF)
  22.251 +        return -1;
  22.252 +    return 0;
  22.253 +}
  22.254 +#endif /* __ASM_X86_VMX_H__ */
    23.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
    23.2 +++ b/xen/include/asm-x86/vmx_cpu.h	Wed Dec 15 23:09:11 2004 +0000
    23.3 @@ -0,0 +1,35 @@
    23.4 +/*
    23.5 + * vmx_cpu.h: Virtual CPU state
    23.6 + * Copyright (c) 2004, Intel Corporation.
    23.7 + *
    23.8 + * This program is free software; you can redistribute it and/or modify it
    23.9 + * under the terms and conditions of the GNU General Public License,
   23.10 + * version 2, as published by the Free Software Foundation.
   23.11 + *
   23.12 + * This program is distributed in the hope it will be useful, but WITHOUT
   23.13 + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
   23.14 + * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
   23.15 + * more details.
   23.16 + *
   23.17 + * You should have received a copy of the GNU General Public License along with
   23.18 + * this program; if not, write to the Free Software Foundation, Inc., 59 Temple
   23.19 + * Place - Suite 330, Boston, MA 02111-1307 USA.
   23.20 + *
   23.21 + */
   23.22 +#ifndef __ASM_X86_VMX_VMCS_H__
   23.23 +#define __ASM_X86_VMX_VMCS_H__
   23.24 +
   23.25 +/*
   23.26 + * Virtual CPU
   23.27 + */
   23.28 +struct arch_state_struct {
   23.29 +    unsigned long       mode_flags; /* vm86, 32-bit, 64-bit, etc. */
   23.30 +    /* debug registers */
   23.31 +    /* MSRs */
   23.32 +};
   23.33 +
   23.34 +#define VMX_MF_VM86     0
   23.35 +#define VMX_MF_32       1
   23.36 +#define VMX_MF_64       2
   23.37 +
   23.38 +#endif
    24.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
    24.2 +++ b/xen/include/asm-x86/vmx_platform.h	Wed Dec 15 23:09:11 2004 +0000
    24.3 @@ -0,0 +1,24 @@
    24.4 +/*
    24.5 + * vmx_platform.h: VMX platform support
    24.6 + * Copyright (c) 2004, Intel Corporation.
    24.7 + *
    24.8 + * This program is free software; you can redistribute it and/or modify it
    24.9 + * under the terms and conditions of the GNU General Public License,
   24.10 + * version 2, as published by the Free Software Foundation.
   24.11 + *
   24.12 + * This program is distributed in the hope it will be useful, but WITHOUT
   24.13 + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
   24.14 + * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
   24.15 + * more details.
   24.16 + *
   24.17 + * You should have received a copy of the GNU General Public License along with
   24.18 + * this program; if not, write to the Free Software Foundation, Inc., 59 Temple
   24.19 + * Place - Suite 330, Boston, MA 02111-1307 USA.
   24.20 + *
   24.21 + */
   24.22 +#ifndef __ASM_X86_VMX_PLATFORM_H__
   24.23 +#define __ASM_X86_VMX_PLATFORM_H__
   24.24 +
   24.25 +#include <asm/e820.h>		/* from Linux */
   24.26 +
   24.27 +#endif
    25.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
    25.2 +++ b/xen/include/asm-x86/vmx_vmcs.h	Wed Dec 15 23:09:11 2004 +0000
    25.3 @@ -0,0 +1,225 @@
    25.4 +/*
    25.5 + * vmx_vmcs.h: VMCS related definitions
    25.6 + * Copyright (c) 2004, Intel Corporation.
    25.7 + *
    25.8 + * This program is free software; you can redistribute it and/or modify it
    25.9 + * under the terms and conditions of the GNU General Public License,
   25.10 + * version 2, as published by the Free Software Foundation.
   25.11 + *
   25.12 + * This program is distributed in the hope it will be useful, but WITHOUT
   25.13 + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
   25.14 + * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
   25.15 + * more details.
   25.16 + *
   25.17 + * You should have received a copy of the GNU General Public License along with
   25.18 + * this program; if not, write to the Free Software Foundation, Inc., 59 Temple
   25.19 + * Place - Suite 330, Boston, MA 02111-1307 USA.
   25.20 + *
   25.21 + */
   25.22 +#ifndef __ASM_X86_VMX_VMCS_H__
   25.23 +#define __ASM_X86_VMX_VMCS_H__
   25.24 +
   25.25 +#include <asm/config.h>
   25.26 +#include <asm/vmx_cpu.h>
   25.27 +#include <asm/vmx_platform.h>
   25.28 +
   25.29 +extern int start_vmx(void);
   25.30 +extern void stop_vmx(void);
   25.31 +
   25.32 +void vmx_enter_scheduler(void);
   25.33 +
   25.34 +union vmcs_arbytes {
   25.35 +    struct arbyte_fields {
   25.36 +        unsigned int 
   25.37 +        seg_type: 4, s: 1, dpl: 2, p: 1, 
   25.38 +        reserved0: 4, avl: 1, reserved1: 1,     
   25.39 +        default_ops_size: 1, g: 1, null_bit: 1, 
   25.40 +        reserved2: 15;
   25.41 +    }  __attribute__((packed)) fields;
   25.42 +    unsigned int bytes;
   25.43 +};
   25.44 +
   25.45 +struct virutal_platform_def {
   25.46 +    unsigned long   *real_mode_data; /* E820, etc. */
   25.47 +    unsigned long   shared_page_va;
   25.48 +};
   25.49 +
   25.50 +int vmx_setup_platform(struct exec_domain *, execution_context_t *);
   25.51 +
   25.52 +#define VMX_CPU_STATE_PG_ENABLED        0       
   25.53 +
   25.54 +#define VMCS_SIZE                       0x1000
   25.55 +
   25.56 +struct vmcs_struct {
   25.57 +    u32 vmcs_revision_id;
   25.58 +    unsigned char data [0x1000 - sizeof (u32)];
   25.59 +};
   25.60 +
   25.61 +struct arch_vmx_struct {
   25.62 +    struct vmcs_struct      *vmcs;  /* VMCS pointer in virtual */
   25.63 +    unsigned long           flags;  /* VMCS flags */
   25.64 +    unsigned long           cpu_cr2; /* save CR2 */
   25.65 +    unsigned long           cpu_cr3;
   25.66 +    unsigned long           cpu_state;
   25.67 +    struct virutal_platform_def     vmx_platform; 
   25.68 +#if 0
   25.69 +    /* open */
   25.70 +    unsigned long *page_list; /* page list for MMIO */
   25.71 +#endif
   25.72 +};
   25.73 +
   25.74 +#define vmx_schedule_tail(next)         \
   25.75 +    (next)->thread.arch_vmx.arch_vmx_schedule_tail((next))
   25.76 +
   25.77 +#define VMX_DOMAIN(d)   d->thread.arch_vmx.flags
   25.78 +
   25.79 +#define ARCH_VMX_VMCS_LOADED    0       /* VMCS has been loaded and active */
   25.80 +#define ARCH_VMX_VMCS_LAUNCH    1       /* Needs VMCS launch */
   25.81 +#define ARCH_VMX_VMCS_RESUME    2       /* Needs VMCS resume */
   25.82 +#define ARCH_VMX_IO_WAIT        3       /* Waiting for I/O completion */
   25.83 +
   25.84 +void vmx_do_launch(struct exec_domain *); 
   25.85 +void vmx_do_resume(struct exec_domain *); 
   25.86 +
   25.87 +struct vmcs_struct *alloc_vmcs(void);
   25.88 +void free_vmcs(struct vmcs_struct *);
   25.89 +int  load_vmcs(struct arch_vmx_struct *, u64);
   25.90 +int  store_vmcs(struct arch_vmx_struct *, u64);
   25.91 +void dump_vmcs(void);
   25.92 +int  construct_vmcs(struct arch_vmx_struct *, execution_context_t *, 
   25.93 +                    full_execution_context_t *, int);
   25.94 +
   25.95 +#define VMCS_USE_HOST_ENV       1
   25.96 +#define VMCS_USE_SEPARATE_ENV   0
   25.97 +
   25.98 +#define VMCS_EFLAGS_RESERVED_0          0xffc08028 /* bitmap for 0 */
   25.99 +#define VMCS_EFLAGS_RESERVED_1          0x00000002 /* bitmap for 1 */
  25.100 +
  25.101 +extern int vmcs_version;
  25.102 +
  25.103 +/* VMCS Encordings */
  25.104 +enum vmcs_field {
  25.105 +    GUEST_ES_SELECTOR               = 0x00000800,
  25.106 +    GUEST_CS_SELECTOR               = 0x00000802,
  25.107 +    GUEST_SS_SELECTOR               = 0x00000804,
  25.108 +    GUEST_DS_SELECTOR               = 0x00000806,
  25.109 +    GUEST_FS_SELECTOR               = 0x00000808,
  25.110 +    GUEST_GS_SELECTOR               = 0x0000080a,
  25.111 +    GUEST_LDTR_SELECTOR             = 0x0000080c,
  25.112 +    GUEST_TR_SELECTOR               = 0x0000080e,
  25.113 +    HOST_ES_SELECTOR                = 0x00000c00,
  25.114 +    HOST_CS_SELECTOR                = 0x00000c02,
  25.115 +    HOST_SS_SELECTOR                = 0x00000c04,
  25.116 +    HOST_DS_SELECTOR                = 0x00000c06,
  25.117 +    HOST_FS_SELECTOR                = 0x00000c08,
  25.118 +    HOST_GS_SELECTOR                = 0x00000c0a,
  25.119 +    HOST_TR_SELECTOR                = 0x00000c0c,
  25.120 +    IO_BITMAP_A                     = 0x00002000, 
  25.121 +    IO_BITMAP_B                     = 0x00002002, 
  25.122 +    VM_EXIT_MSR_STORE_ADDR          = 0x00002006,
  25.123 +    VM_EXIT_MSR_LOAD_ADDR           = 0x00002008,
  25.124 +    VM_ENTRY_MSR_LOAD_ADDR          = 0x0000200a,
  25.125 +    TSC_OFFSET                      = 0x00002010,
  25.126 +    GUEST_VMCS0                     = 0x00002800,
  25.127 +    GUEST_VMCS1                     = 0x00002801,
  25.128 +    GUEST_IA32_DEBUGCTL             = 0x00002802,
  25.129 +    PIN_BASED_VM_EXEC_CONTROL       = 0x00004000,
  25.130 +    CPU_BASED_VM_EXEC_CONTROL       = 0x00004002,   
  25.131 +    EXCEPTION_BITMAP                = 0x00004004,
  25.132 +    PAGE_FAULT_ERROR_CODE_MASK      = 0x00004006,
  25.133 +    PAGE_FAULT_ERROR_CODE_MATCH     = 0x00004008,
  25.134 +    CR3_TARGET_COUNT                = 0x0000400a,
  25.135 +    VM_EXIT_CONTROLS                = 0x0000400c,
  25.136 +    VM_EXIT_MSR_STORE_COUNT         = 0x0000400e,
  25.137 +    VM_EXIT_MSR_LOAD_COUNT          = 0x00004010,
  25.138 +    VM_ENTRY_CONTROLS               = 0x00004012,
  25.139 +    VM_ENTRY_MSR_LOAD_COUNT         = 0x00004014,
  25.140 +    VM_ENTRY_INTR_INFO_FIELD        = 0x00004016,
  25.141 +    VM_ENTRY_EXCEPTION_ERROR_CODE   = 0x00004018,
  25.142 +    VM_EXIT_REASON                  = 0x00004402,
  25.143 +    VM_EXIT_INTR_INFO               = 0x00004404,   
  25.144 +    VM_EXIT_INTR_ERROR_CODE         = 0x00004406,
  25.145 +    IDT_VECTORING_INFO_FIELD        = 0x00004408,
  25.146 +    IDT_VECTORING_ERROR_CODE        = 0x0000440a,
  25.147 +    INSTRUCTION_LEN                 = 0x0000440c,
  25.148 +    GUEST_ES_LIMIT                  = 0x00004800,
  25.149 +    GUEST_CS_LIMIT                  = 0x00004802,
  25.150 +    GUEST_SS_LIMIT                  = 0x00004804,
  25.151 +    GUEST_DS_LIMIT                  = 0x00004806,
  25.152 +    GUEST_FS_LIMIT                  = 0x00004808,
  25.153 +    GUEST_GS_LIMIT                  = 0x0000480a,
  25.154 +    GUEST_LDTR_LIMIT                = 0x0000480c,
  25.155 +    GUEST_TR_LIMIT                  = 0x0000480e,
  25.156 +    GUEST_GDTR_LIMIT                = 0x00004810,
  25.157 +    GUEST_IDTR_LIMIT                = 0x00004812,
  25.158 +    GUEST_ES_AR_BYTES               = 0x00004814,
  25.159 +    GUEST_CS_AR_BYTES               = 0x00004816,
  25.160 +    GUEST_SS_AR_BYTES               = 0x00004818,
  25.161 +    GUEST_DS_AR_BYTES               = 0x0000481a,
  25.162 +    GUEST_FS_AR_BYTES               = 0x0000481c,
  25.163 +    GUEST_GS_AR_BYTES               = 0x0000481e,
  25.164 +    GUEST_LDTR_AR_BYTES             = 0x00004820,
  25.165 +    GUEST_TR_AR_BYTES               = 0x00004822,
  25.166 +    GUEST_INTERRUPTIBILITY_INFO     = 0x00004824,
  25.167 +    CR0_GUEST_HOST_MASK             = 0x00006000,
  25.168 +    CR4_GUEST_HOST_MASK             = 0x00006002,
  25.169 +    CR0_READ_SHADOW                 = 0x00006004,
  25.170 +    CR4_READ_SHADOW                 = 0x00006006,
  25.171 +    CR3_TARGET_VALUES               = 0x00006008, 
  25.172 +    CR3_GUEST_HOST_MASK             = 0x00006208,
  25.173 +    EXIT_QUALIFICATION              = 0x00006400,
  25.174 +    GUEST_CR0                       = 0x00006800,
  25.175 +    GUEST_CR3                       = 0x00006802,
  25.176 +    GUEST_CR4                       = 0x00006804,
  25.177 +    GUEST_ES_BASE                   = 0x00006806,
  25.178 +    GUEST_CS_BASE                   = 0x00006808,
  25.179 +    GUEST_SS_BASE                   = 0x0000680a,
  25.180 +    GUEST_DS_BASE                   = 0x0000680c,
  25.181 +    GUEST_FS_BASE                   = 0x0000680e,
  25.182 +    GUEST_GS_BASE                   = 0x00006810,
  25.183 +    GUEST_LDTR_BASE                 = 0x00006812,
  25.184 +    GUEST_TR_BASE                   = 0x00006814,
  25.185 +    GUEST_GDTR_BASE                 = 0x00006816,    
  25.186 +    GUEST_IDTR_BASE                 = 0x00006818,
  25.187 +    GUEST_DR7                       = 0x0000681a,
  25.188 +    GUEST_ESP                       = 0x0000681c,
  25.189 +    GUEST_EIP                       = 0x0000681e,
  25.190 +    GUEST_EFLAGS                    = 0x00006820,
  25.191 +    GUEST_PENDING_DBG_EXCEPTIONS    = 0x00006822,
  25.192 +    HOST_CR0                        = 0x00006c00,
  25.193 +    HOST_CR3                        = 0x00006c02,
  25.194 +    HOST_CR4                        = 0x00006c04,
  25.195 +    HOST_FS_BASE                    = 0x00006c06,
  25.196 +    HOST_GS_BASE                    = 0x00006c08,
  25.197 +    HOST_TR_BASE                    = 0x00006c0a,
  25.198 +    HOST_GDTR_BASE                  = 0x00006c0c,
  25.199 +    HOST_IDTR_BASE                  = 0x00006c0e,
  25.200 +    HOST_ESP                        = 0x00006c14,
  25.201 +    HOST_EIP                        = 0x00006c16,
  25.202 +};
  25.203 +
  25.204 +#define VMX_DEBUG 1
  25.205 +#if VMX_DEBUG
  25.206 +#define DBG_LEVEL_0     (1 << 0)
  25.207 +#define DBG_LEVEL_1     (1 << 1)
  25.208 +#define DBG_LEVEL_2     (1 << 2)
  25.209 +#define DBG_LEVEL_3     (1 << 3)
  25.210 +#define DBG_LEVEL_IO    (1 << 4)
  25.211 +#define DBG_LEVEL_VMMU  (1 << 5)
  25.212 +
  25.213 +extern unsigned int opt_vmx_debug_level;
  25.214 +#define VMX_DBG_LOG(level, _f, _a...)           \
  25.215 +    if ((level) & opt_vmx_debug_level)          \
  25.216 +        printk("[VMX]" _f "\n", ## _a )
  25.217 +#else
  25.218 +#define VMX_DBG_LOG(level, _f, _a...)
  25.219 +#endif
  25.220 +
  25.221 +#define  __vmx_bug(regs)                                        \
  25.222 +    do {                                                        \
  25.223 +        printk("__vmx_bug at %s:%d\n", __FILE__, __LINE__);     \
  25.224 +        show_registers(regs);                                   \
  25.225 +        domain_crash();                                         \
  25.226 +    } while (0)
  25.227 +
  25.228 +#endif /* ASM_X86_VMX_VMCS_H__ */
    26.1 --- a/xen/include/public/arch-x86_32.h	Wed Dec 15 18:19:36 2004 +0000
    26.2 +++ b/xen/include/public/arch-x86_32.h	Wed Dec 15 23:09:11 2004 +0000
    26.3 @@ -114,6 +114,7 @@ typedef u64 tsc_timestamp_t; /* RDTSC ti
    26.4   */
    26.5  typedef struct {
    26.6  #define ECF_I387_VALID (1<<0)
    26.7 +#define ECF_VMX_GUEST  (2<<0)
    26.8      unsigned long flags;
    26.9      execution_context_t cpu_ctxt;           /* User-level CPU registers     */
   26.10      char          fpu_ctxt[256];            /* User-level FPU registers     */
    27.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
    27.2 +++ b/xen/include/public/io/ioreq.h	Wed Dec 15 23:09:11 2004 +0000
    27.3 @@ -0,0 +1,59 @@
    27.4 +/*
    27.5 + * ioreq.h: I/O request definitions for device models
    27.6 + * Copyright (c) 2004, Intel Corporation.
    27.7 + *
    27.8 + * This program is free software; you can redistribute it and/or modify it
    27.9 + * under the terms and conditions of the GNU General Public License,
   27.10 + * version 2, as published by the Free Software Foundation.
   27.11 + *
   27.12 + * This program is distributed in the hope it will be useful, but WITHOUT
   27.13 + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
   27.14 + * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
   27.15 + * more details.
   27.16 + *
   27.17 + * You should have received a copy of the GNU General Public License along with
   27.18 + * this program; if not, write to the Free Software Foundation, Inc., 59 Temple
   27.19 + * Place - Suite 330, Boston, MA 02111-1307 USA.
   27.20 + *
   27.21 + */
   27.22 +
   27.23 +#ifndef _IOREQ_H_
   27.24 +#define _IOREQ_H_
   27.25 +
   27.26 +#define IOREQ_READ      1
   27.27 +#define IOREQ_WRITE     0
   27.28 +
   27.29 +#define STATE_INVALID           0
   27.30 +#define STATE_IOREQ_READY       1
   27.31 +#define STATE_IOREQ_INPROCESS   2
   27.32 +#define STATE_IORESP_READY      3
   27.33 +
   27.34 +#define IOPACKET_PORT   2
   27.35 +
   27.36 +/* VMExit dispatcher should cooperate with instruction decoder to
   27.37 +   prepare this structure and notify service OS and DM by sending
   27.38 +   virq */
   27.39 +typedef struct {
   27.40 +    u64     addr;               /*  physical address            */
   27.41 +    u64     size;               /*  size in bytes               */
   27.42 +    u64     count;		/*  for rep prefixes            */
   27.43 +    union {
   27.44 +        u64     data;           /*  data                        */
   27.45 +        void    *pdata;         /*  pointer to data             */
   27.46 +    } u;
   27.47 +    u8      state:5;
   27.48 +    u8      pdata_valid:1;	/* if 1, use pdata above        */
   27.49 +    u8      dir:1;		/*  1=read, 0=write             */
   27.50 +    u8      port_mm:1;		/*  0=portio, 1=mmio            */
   27.51 +} ioreq_t;
   27.52 +
   27.53 +#define MAX_VECTOR    256
   27.54 +#define BITS_PER_BYTE   8
   27.55 +#define INTR_LEN        (MAX_VECTOR/(BITS_PER_BYTE * sizeof(unsigned long)))
   27.56 +
   27.57 +typedef struct {
   27.58 +    ioreq_t         vp_ioreq;
   27.59 +    unsigned long   vp_intr[INTR_LEN];
   27.60 +} vcpu_iodata_t;
   27.61 +
   27.62 +#endif /* _IOREQ_H_ */
    28.1 --- a/xen/include/xen/sched.h	Wed Dec 15 18:19:36 2004 +0000
    28.2 +++ b/xen/include/xen/sched.h	Wed Dec 15 23:09:11 2004 +0000
    28.3 @@ -26,8 +26,6 @@
    28.4  extern unsigned long volatile jiffies;
    28.5  extern rwlock_t domlist_lock;
    28.6  
    28.7 -struct domain;
    28.8 -
    28.9  /* A global pointer to the initial domain (DOM0). */
   28.10  extern struct domain *dom0;
   28.11  
    29.1 --- a/xen/include/xen/types.h	Wed Dec 15 18:19:36 2004 +0000
    29.2 +++ b/xen/include/xen/types.h	Wed Dec 15 23:09:11 2004 +0000
    29.3 @@ -44,5 +44,7 @@ typedef         __u32           uint32_t
    29.4  typedef         __u64           uint64_t;
    29.5  
    29.6  
    29.7 +struct domain;
    29.8 +struct exec_domain;
    29.9  
   29.10  #endif /* __TYPES_H__ */