ia64/xen-unstable

changeset 5664:9b1866006aea

merge
Signed-off-by: andrew.warfield@cl.cam.ac.uk
author akw27@arcadians.cl.cam.ac.uk
date Mon Jul 04 15:35:35 2005 +0000 (2005-07-04)
parents f6e7c967212e e1fbb7fee1d8
children 8bd2e8933277
files docs/misc/hg-cheatsheet.txt linux-2.6.11-xen-sparse/arch/xen/x86_64/kernel/traps.c xen/arch/x86/domain.c xen/arch/x86/vmx.c xen/arch/x86/vmx_platform.c xen/arch/x86/vmx_vmcs.c xen/include/asm-x86/msr.h xen/include/asm-x86/vmx_intercept.h xen/include/asm-x86/vmx_vmcs.h
line diff
     1.1 --- a/docs/misc/hg-cheatsheet.txt	Mon Jul 04 15:31:47 2005 +0000
     1.2 +++ b/docs/misc/hg-cheatsheet.txt	Mon Jul 04 15:35:35 2005 +0000
     1.3 @@ -3,7 +3,7 @@ Mercurial(hg) Cheatsheet for Xen
     1.4  ================================
     1.5  
     1.6  Written by Andrew Warfield, extended by Michael Fetterman and Ian Pratt
     1.7 -June 29, 2005
     1.8 +June 29, 2005, extended by Grzegorz Milos 04 July 2005.
     1.9  
    1.10  Overview
    1.11  --------
    1.12 @@ -121,7 +121,7 @@ without any merging of any kind.  "hg pu
    1.13  the current state of your working directory.  If you weren't already
    1.14  "updated" to your local repository's tip, you might be surprised to
    1.15  find yourself merging the results of the pull with a non-tip node in
    1.16 -your local repository.  
    1.17 +your local repository. 
    1.18  
    1.19  
    1.20  Revision History
    1.21 @@ -258,6 +258,11 @@ Generating a patch is easy,
    1.22  
    1.23  will generate a patch describing the diff between that changeset and 
    1.24  its parent.
    1.25 +    
    1.26 +To generate a patch between two specified revisions use:
    1.27 +   hg diff -r A -r B [files]
    1.28 +NB: BK syntax -rA..B isn't supported by Hg.   
    1.29 +
    1.30  
    1.31  Pushing changesets to a parent repository
    1.32  -----------------------------------------
    1.33 @@ -268,6 +273,12 @@ Pushes changes up to a parent. You can't
    1.34  repository off the web interface. In fact, you can currently only push
    1.35  to an ssh target -- filesystem drectory targets don't work, but this
    1.36  will be fixed soon.
    1.37 +For now it is possible to set up assymetric pull/push paths. Pulls can
    1.38 +be done via web interface while pushes via ssh. Example of .hg/hgrc config
    1.39 +file:
    1.40 +  | [paths]
    1.41 +  | default = http://your.server/repository_name
    1.42 +  | default-push = ssh://[username@]your.server//repository_location
    1.43  
    1.44  
    1.45  Repository history
    1.46 @@ -384,7 +395,7 @@ Additional useful commands
    1.47  Shows the differences between whatever changeset you most recently
    1.48  checked out, and your current working directory:
    1.49  
    1.50 -   hg diff
    1.51 +   hg diff 
    1.52  
    1.53  View an annotated version of a source file:
    1.54  
     2.1 --- a/linux-2.6.11-xen-sparse/arch/xen/x86_64/kernel/traps.c	Mon Jul 04 15:31:47 2005 +0000
     2.2 +++ b/linux-2.6.11-xen-sparse/arch/xen/x86_64/kernel/traps.c	Mon Jul 04 15:35:35 2005 +0000
     2.3 @@ -910,31 +910,31 @@ void do_call_debug(struct pt_regs *regs)
     2.4  
     2.5  
     2.6  static trap_info_t trap_table[] = {
     2.7 -        {  0, 0, (__KERNEL_CS|0x3), 0, (unsigned long)divide_error               },
     2.8 -        {  1, 0, (__KERNEL_CS|0x3), 0, (unsigned long)debug                      },
     2.9 -        {  3, 3, (__KERNEL_CS|0x3), 0, (unsigned long)int3                       },
    2.10 -        {  4, 3, (__KERNEL_CS|0x3), 0, (unsigned long)overflow                   },
    2.11 -        {  5, 3, (__KERNEL_CS|0x3), 0, (unsigned long)bounds                     },
    2.12 -        {  6, 0, (__KERNEL_CS|0x3), 0, (unsigned long)invalid_op                 },
    2.13 -        {  7, 0, (__KERNEL_CS|0x3), 0, (unsigned long)device_not_available       },
    2.14 -        {  9, 0, (__KERNEL_CS|0x3), 0, (unsigned long)coprocessor_segment_overrun},
    2.15 -        { 10, 0, (__KERNEL_CS|0x3), 0, (unsigned long)invalid_TSS                },
    2.16 -        { 11, 0, (__KERNEL_CS|0x3), 0, (unsigned long)segment_not_present        },
    2.17 -        { 12, 0, (__KERNEL_CS|0x3), 0, (unsigned long)stack_segment              },
    2.18 -        { 13, 0, (__KERNEL_CS|0x3), 0, (unsigned long)general_protection         },
    2.19 -        { 14, 0, (__KERNEL_CS|0x3), 0, (unsigned long)page_fault                 },
    2.20 -        { 15, 0, (__KERNEL_CS|0x3), 0, (unsigned long)spurious_interrupt_bug     },
    2.21 -        { 16, 0, (__KERNEL_CS|0x3), 0, (unsigned long)coprocessor_error          },
    2.22 -        { 17, 0, (__KERNEL_CS|0x3), 0, (unsigned long)alignment_check            },
    2.23 +        {  0, 0, (__KERNEL_CS|0x3), (unsigned long)divide_error               },
    2.24 +        {  1, 0, (__KERNEL_CS|0x3), (unsigned long)debug                      },
    2.25 +        {  3, 3, (__KERNEL_CS|0x3), (unsigned long)int3                       },
    2.26 +        {  4, 3, (__KERNEL_CS|0x3), (unsigned long)overflow                   },
    2.27 +        {  5, 3, (__KERNEL_CS|0x3), (unsigned long)bounds                     },
    2.28 +        {  6, 0, (__KERNEL_CS|0x3), (unsigned long)invalid_op                 },
    2.29 +        {  7, 0, (__KERNEL_CS|0x3), (unsigned long)device_not_available       },
    2.30 +        {  9, 0, (__KERNEL_CS|0x3), (unsigned long)coprocessor_segment_overrun},
    2.31 +        { 10, 0, (__KERNEL_CS|0x3), (unsigned long)invalid_TSS                },
    2.32 +        { 11, 0, (__KERNEL_CS|0x3), (unsigned long)segment_not_present        },
    2.33 +        { 12, 0, (__KERNEL_CS|0x3), (unsigned long)stack_segment              },
    2.34 +        { 13, 0, (__KERNEL_CS|0x3), (unsigned long)general_protection         },
    2.35 +        { 14, 0, (__KERNEL_CS|0x3), (unsigned long)page_fault                 },
    2.36 +        { 15, 0, (__KERNEL_CS|0x3), (unsigned long)spurious_interrupt_bug     },
    2.37 +        { 16, 0, (__KERNEL_CS|0x3), (unsigned long)coprocessor_error          },
    2.38 +        { 17, 0, (__KERNEL_CS|0x3), (unsigned long)alignment_check            },
    2.39  #ifdef CONFIG_X86_MCE
    2.40 -        { 18, 0, (__KERNEL_CS|0x3), 0, (unsigned long)machine_check              },
    2.41 +        { 18, 0, (__KERNEL_CS|0x3), (unsigned long)machine_check              },
    2.42  #endif
    2.43 -        { 19, 0, (__KERNEL_CS|0x3), 0, (unsigned long)simd_coprocessor_error     },
    2.44 -        { SYSCALL_VECTOR,  3, (__KERNEL_CS|0x3), 0, (unsigned long)system_call   },
    2.45 +        { 19, 0, (__KERNEL_CS|0x3), (unsigned long)simd_coprocessor_error     },
    2.46 +        { SYSCALL_VECTOR, 3, (__KERNEL_CS|0x3), (unsigned long)system_call   },
    2.47  #ifdef CONFIG_IA32_EMULATION
    2.48 -	{ IA32_SYSCALL_VECTOR, 3, (__KERNEL_CS|0x3), 0, (unsigned long)ia32_syscall},
    2.49 +	{ IA32_SYSCALL_VECTOR, 3, (__KERNEL_CS|0x3), (unsigned long)ia32_syscall},
    2.50  #endif
    2.51 -        {  0, 0,           0, 0,  0                                              }
    2.52 +        {  0, 0,           0, 0                                              }
    2.53  };
    2.54  
    2.55  void __init trap_init(void)
     3.1 --- a/xen/arch/x86/domain.c	Mon Jul 04 15:31:47 2005 +0000
     3.2 +++ b/xen/arch/x86/domain.c	Mon Jul 04 15:35:35 2005 +0000
     3.3 @@ -296,6 +296,8 @@ void arch_do_boot_vcpu(struct vcpu *v)
     3.4  }
     3.5  
     3.6  #ifdef CONFIG_VMX
     3.7 +static int vmx_switch_on;
     3.8 +
     3.9  void arch_vmx_do_resume(struct vcpu *v) 
    3.10  {
    3.11      u64 vmcs_phys_ptr = (u64) virt_to_phys(v->arch.arch_vmx.vmcs);
    3.12 @@ -364,6 +366,9 @@ static int vmx_final_setup_guest(
    3.13                             SHM_translate|SHM_external);
    3.14      }
    3.15  
    3.16 +    if (!vmx_switch_on)
    3.17 +        vmx_switch_on = 1;
    3.18 +
    3.19      return 0;
    3.20  
    3.21  out:
    3.22 @@ -441,9 +446,12 @@ int arch_set_info_guest(
    3.23      }
    3.24      else
    3.25      {
    3.26 -        if ( !get_page_and_type(&frame_table[phys_basetab>>PAGE_SHIFT], d, 
    3.27 -                                PGT_base_page_table) )
    3.28 -            return -EINVAL;
    3.29 +#ifdef __x86_64__
    3.30 +        if ( !(c->flags & VGCF_VMX_GUEST) )
    3.31 +#endif
    3.32 +            if ( !get_page_and_type(&frame_table[phys_basetab>>PAGE_SHIFT], d, 
    3.33 +                  PGT_base_page_table) )
    3.34 +                return -EINVAL;
    3.35      }
    3.36  
    3.37      if ( (rc = (int)set_gdt(v, c->gdt_frames, c->gdt_ents)) != 0 )
    3.38 @@ -524,6 +532,12 @@ void toggle_guest_mode(struct vcpu *v)
    3.39          : "=r" (__r) : "r" (value), "0" (__r) );\
    3.40      __r; })
    3.41  
    3.42 +#if CONFIG_VMX
    3.43 +#define load_msrs(_p, _n)     if (vmx_switch_on) vmx_load_msrs((_p), (_n))
    3.44 +#else
    3.45 +#define load_msrs(_p, _n)     ((void)0)
    3.46 +#endif 
    3.47 +
    3.48  static void load_segments(struct vcpu *p, struct vcpu *n)
    3.49  {
    3.50      struct vcpu_guest_context *pctxt = &p->arch.guest_context;
    3.51 @@ -681,6 +695,7 @@ long do_switch_to_user(void)
    3.52  #elif defined(__i386__)
    3.53  
    3.54  #define load_segments(_p, _n) ((void)0)
    3.55 +#define load_msrs(_p, _n)     ((void)0)
    3.56  #define save_segments(_p)     ((void)0)
    3.57  #define clear_segments()      ((void)0)
    3.58  
    3.59 @@ -780,6 +795,7 @@ void context_switch(struct vcpu *prev, s
    3.60          {
    3.61              load_LDT(next);
    3.62              load_segments(realprev, next);
    3.63 +            load_msrs(realprev, next);
    3.64          }
    3.65      }
    3.66  
     4.1 --- a/xen/arch/x86/vmx.c	Mon Jul 04 15:31:47 2005 +0000
     4.2 +++ b/xen/arch/x86/vmx.c	Mon Jul 04 15:35:35 2005 +0000
     4.3 @@ -46,6 +46,214 @@ int vmcs_size;
     4.4  unsigned int opt_vmx_debug_level = 0;
     4.5  integer_param("vmx_debug", opt_vmx_debug_level);
     4.6  
     4.7 +#ifdef __x86_64__
     4.8 +static struct msr_state percpu_msr[NR_CPUS];
     4.9 +
    4.10 +static u32 msr_data_index[VMX_MSR_COUNT] =
    4.11 +{
    4.12 +    MSR_LSTAR, MSR_STAR, MSR_CSTAR,
    4.13 +    MSR_SYSCALL_MASK, MSR_EFER,
    4.14 +};
    4.15 +
    4.16 +/*
    4.17 + * To avoid MSR save/restore at every VM exit/entry time, we restore
    4.18 + * the x86_64 specific MSRs at domain switch time. Since those MSRs are
    4.19 + * are not modified once set for generic domains, we don't save them, 
    4.20 + * but simply reset them to the values set at percpu_traps_init().
    4.21 + */
    4.22 +void vmx_load_msrs(struct vcpu *p, struct vcpu *n)
    4.23 +{
    4.24 +    struct msr_state *host_state;
    4.25 +    host_state = &percpu_msr[smp_processor_id()];
    4.26 +
    4.27 +    while (host_state->flags){
    4.28 +        int i;
    4.29 +
    4.30 +        i = find_first_set_bit(host_state->flags);
    4.31 +        wrmsrl(msr_data_index[i], host_state->msr_items[i]);
    4.32 +        clear_bit(i, &host_state->flags);
    4.33 +    }
    4.34 +}
    4.35 +
    4.36 +static void vmx_save_init_msrs(void)
    4.37 +{
    4.38 +    struct msr_state *host_state;
    4.39 +    host_state = &percpu_msr[smp_processor_id()];
    4.40 +    int i;
    4.41 +
    4.42 +    for (i = 0; i < VMX_MSR_COUNT; i++)
    4.43 +        rdmsrl(msr_data_index[i], host_state->msr_items[i]);
    4.44 +}
    4.45 +
    4.46 +#define CASE_READ_MSR(address)              \
    4.47 +    case MSR_ ## address:                 \
    4.48 +    msr_content = msr->msr_items[VMX_INDEX_MSR_ ## address]; \
    4.49 +    break
    4.50 +
    4.51 +#define CASE_WRITE_MSR(address)   \
    4.52 +    case MSR_ ## address:                   \
    4.53 +    msr->msr_items[VMX_INDEX_MSR_ ## address] = msr_content; \
    4.54 +    if (!test_bit(VMX_INDEX_MSR_ ## address, &msr->flags)){ \
    4.55 +    	set_bit(VMX_INDEX_MSR_ ## address, &msr->flags);   \
    4.56 +    }\
    4.57 +    break
    4.58 +
    4.59 +#define IS_CANO_ADDRESS(add) 1
    4.60 +static inline int long_mode_do_msr_read(struct cpu_user_regs *regs)
    4.61 +{
    4.62 +    u64     msr_content = 0;
    4.63 +    struct vcpu *vc = current;
    4.64 +    struct msr_state * msr = &vc->arch.arch_vmx.msr_content;
    4.65 +    switch(regs->ecx){
    4.66 +        case MSR_EFER:
    4.67 +            msr_content = msr->msr_items[VMX_INDEX_MSR_EFER];
    4.68 +            VMX_DBG_LOG(DBG_LEVEL_2, "EFER msr_content %llx\n", (unsigned long long)msr_content);
    4.69 +            if (test_bit(VMX_CPU_STATE_LME_ENABLED,
    4.70 +                          &vc->arch.arch_vmx.cpu_state))
    4.71 +                msr_content |= 1 << _EFER_LME;
    4.72 +
    4.73 +            if (VMX_LONG_GUEST(vc))
    4.74 +                msr_content |= 1 << _EFER_LMA;
    4.75 +            break;
    4.76 +        case MSR_FS_BASE:
    4.77 +            if (!(VMX_LONG_GUEST(vc)))
    4.78 +                /* XXX should it be GP fault */
    4.79 +                domain_crash();
    4.80 +            __vmread(GUEST_FS_BASE, &msr_content);
    4.81 +            break;
    4.82 +        case MSR_GS_BASE:
    4.83 +            if (!(VMX_LONG_GUEST(vc)))
    4.84 +                domain_crash();
    4.85 +            __vmread(GUEST_GS_BASE, &msr_content);
    4.86 +            break;
    4.87 +        case MSR_SHADOW_GS_BASE:
    4.88 +            msr_content = msr->shadow_gs;
    4.89 +            break;
    4.90 +
    4.91 +        CASE_READ_MSR(STAR);
    4.92 +        CASE_READ_MSR(LSTAR);
    4.93 +        CASE_READ_MSR(CSTAR);
    4.94 +        CASE_READ_MSR(SYSCALL_MASK);
    4.95 +        default:
    4.96 +            return 0;
    4.97 +    }
    4.98 +    VMX_DBG_LOG(DBG_LEVEL_2, "mode_do_msr_read: msr_content: %lx\n", msr_content);
    4.99 +    regs->eax = msr_content & 0xffffffff;
   4.100 +    regs->edx = msr_content >> 32;
   4.101 +    return 1;
   4.102 +}
   4.103 +
   4.104 +static inline int long_mode_do_msr_write(struct cpu_user_regs *regs)
   4.105 +{
   4.106 +    u64     msr_content = regs->eax | ((u64)regs->edx << 32); 
   4.107 +    struct vcpu *vc = current;
   4.108 +    struct msr_state * msr = &vc->arch.arch_vmx.msr_content;
   4.109 +    struct msr_state * host_state = 
   4.110 +		&percpu_msr[smp_processor_id()];
   4.111 +
   4.112 +    VMX_DBG_LOG(DBG_LEVEL_1, " mode_do_msr_write msr %lx msr_content %lx\n", 
   4.113 +                regs->ecx, msr_content);
   4.114 +
   4.115 +    switch (regs->ecx){
   4.116 +        case MSR_EFER:
   4.117 +            if ((msr_content & EFER_LME) ^
   4.118 +                  test_bit(VMX_CPU_STATE_LME_ENABLED,
   4.119 +                           &vc->arch.arch_vmx.cpu_state)){
   4.120 +                if (test_bit(VMX_CPU_STATE_PG_ENABLED,
   4.121 +                             &vc->arch.arch_vmx.cpu_state) ||
   4.122 +                    !test_bit(VMX_CPU_STATE_PAE_ENABLED,
   4.123 +                        &vc->arch.arch_vmx.cpu_state)){
   4.124 +                     vmx_inject_exception(vc, TRAP_gp_fault, 0);
   4.125 +                }
   4.126 +            }
   4.127 +            if (msr_content & EFER_LME)
   4.128 +                set_bit(VMX_CPU_STATE_LME_ENABLED,
   4.129 +                        &vc->arch.arch_vmx.cpu_state);
   4.130 +            /* No update for LME/LMA since it have no effect */
   4.131 +            msr->msr_items[VMX_INDEX_MSR_EFER] =
   4.132 +                  msr_content;
   4.133 +            if (msr_content & ~(EFER_LME | EFER_LMA)){
   4.134 +                msr->msr_items[VMX_INDEX_MSR_EFER] = msr_content;
   4.135 +                if (!test_bit(VMX_INDEX_MSR_EFER, &msr->flags)){ 
   4.136 +                    rdmsrl(MSR_EFER,
   4.137 +                            host_state->msr_items[VMX_INDEX_MSR_EFER]);
   4.138 +                      set_bit(VMX_INDEX_MSR_EFER, &host_state->flags);
   4.139 +                      set_bit(VMX_INDEX_MSR_EFER, &msr->flags);  
   4.140 +                      wrmsrl(MSR_EFER, msr_content);
   4.141 +                }
   4.142 +            }
   4.143 +            break;
   4.144 +
   4.145 +        case MSR_FS_BASE:
   4.146 +        case MSR_GS_BASE:
   4.147 +           if (!(VMX_LONG_GUEST(vc)))
   4.148 +                domain_crash();
   4.149 +           if (!IS_CANO_ADDRESS(msr_content)){
   4.150 +               VMX_DBG_LOG(DBG_LEVEL_1, "Not cano address of msr write\n");
   4.151 +               vmx_inject_exception(vc, TRAP_gp_fault, 0);
   4.152 +           }
   4.153 +           if (regs->ecx == MSR_FS_BASE)
   4.154 +               __vmwrite(GUEST_FS_BASE, msr_content);
   4.155 +           else 
   4.156 +               __vmwrite(GUEST_GS_BASE, msr_content);
   4.157 +           break;
   4.158 +
   4.159 +        case MSR_SHADOW_GS_BASE:
   4.160 +           if (!(VMX_LONG_GUEST(vc)))
   4.161 +               domain_crash();
   4.162 +           vc->arch.arch_vmx.msr_content.shadow_gs = msr_content;
   4.163 +           wrmsrl(MSR_SHADOW_GS_BASE, msr_content);
   4.164 +           break;
   4.165 +
   4.166 +           CASE_WRITE_MSR(STAR);
   4.167 +           CASE_WRITE_MSR(LSTAR);
   4.168 +           CASE_WRITE_MSR(CSTAR);
   4.169 +           CASE_WRITE_MSR(SYSCALL_MASK);
   4.170 +        default:
   4.171 +            return 0;
   4.172 +    }
   4.173 +    return 1;
   4.174 +}
   4.175 +
   4.176 +void
   4.177 +vmx_restore_msrs(struct vcpu *d)
   4.178 +{
   4.179 +    int i = 0;
   4.180 +    struct msr_state *guest_state;
   4.181 +    struct msr_state *host_state;
   4.182 +    unsigned long guest_flags ;
   4.183 +
   4.184 +    guest_state = &d->arch.arch_vmx.msr_content;;
   4.185 +    host_state = &percpu_msr[smp_processor_id()];
   4.186 +
   4.187 +    wrmsrl(MSR_SHADOW_GS_BASE, guest_state->shadow_gs);
   4.188 +    guest_flags = guest_state->flags;
   4.189 +    if (!guest_flags)
   4.190 +        return;
   4.191 +
   4.192 +    while (guest_flags){
   4.193 +        i = find_first_set_bit(guest_flags);
   4.194 +
   4.195 +        VMX_DBG_LOG(DBG_LEVEL_2,
   4.196 +          "restore guest's index %d msr %lx with %lx\n",
   4.197 +          i, (unsigned long) msr_data_index[i], (unsigned long) guest_state->msr_items[i]);
   4.198 +        set_bit(i, &host_state->flags);
   4.199 +        wrmsrl(msr_data_index[i], guest_state->msr_items[i]);
   4.200 +        clear_bit(i, &guest_flags);
   4.201 +    }
   4.202 +}
   4.203 +
   4.204 +#else  /* __i386__ */
   4.205 +#define  vmx_save_init_msrs()   ((void)0)
   4.206 +
   4.207 +static inline int  long_mode_do_msr_read(struct cpu_user_regs *regs){
   4.208 +    return 0;
   4.209 +}
   4.210 +static inline int  long_mode_do_msr_write(struct cpu_user_regs *regs){
   4.211 +    return 0;
   4.212 +}
   4.213 +#endif
   4.214 +
   4.215  extern long evtchn_send(int lport);
   4.216  extern long do_block(void);
   4.217  void do_nmi(struct cpu_user_regs *, unsigned long);
   4.218 @@ -93,6 +301,8 @@ int start_vmx(void)
   4.219          printk("VMXON is done\n");
   4.220      }
   4.221  
   4.222 +    vmx_save_init_msrs();
   4.223 +
   4.224      return 1;
   4.225  }
   4.226  
   4.227 @@ -122,7 +332,6 @@ static void inline __update_guest_eip(un
   4.228  static int vmx_do_page_fault(unsigned long va, struct cpu_user_regs *regs) 
   4.229  {
   4.230      unsigned long eip;
   4.231 -    l1_pgentry_t gpte;
   4.232      unsigned long gpa; /* FIXME: PAE */
   4.233      int result;
   4.234  
   4.235 @@ -139,13 +348,16 @@ static int vmx_do_page_fault(unsigned lo
   4.236          handle_mmio(va, va);
   4.237          return 1;
   4.238      }
   4.239 -    gpte = gva_to_gpte(va);
   4.240 -    if (!(l1e_get_flags(gpte) & _PAGE_PRESENT) )
   4.241 -            return 0;
   4.242 -    gpa = l1e_get_paddr(gpte) + (va & ~PAGE_MASK);
   4.243 +    gpa = gva_to_gpa(va);
   4.244  
   4.245      /* Use 1:1 page table to identify MMIO address space */
   4.246 -    if (mmio_space(gpa)){
   4.247 +    if ( mmio_space(gpa) ){
   4.248 +        if (gpa >= 0xFEE00000) { /* workaround for local APIC */
   4.249 +            u32 inst_len;
   4.250 +            __vmread(INSTRUCTION_LEN, &(inst_len));
   4.251 +            __update_guest_eip(inst_len);
   4.252 +            return 1;
   4.253 +        }
   4.254          handle_mmio(va, gpa);
   4.255          return 1;
   4.256      }
   4.257 @@ -196,9 +408,11 @@ static void vmx_vmexit_do_cpuid(unsigned
   4.258      cpuid(input, &eax, &ebx, &ecx, &edx);
   4.259  
   4.260      if (input == 1) {
   4.261 +#ifdef __i386__
   4.262          clear_bit(X86_FEATURE_PSE, &edx);
   4.263          clear_bit(X86_FEATURE_PAE, &edx);
   4.264          clear_bit(X86_FEATURE_PSE36, &edx);
   4.265 +#endif
   4.266      }
   4.267  
   4.268      regs->eax = (unsigned long) eax;
   4.269 @@ -386,8 +600,6 @@ static void vmx_io_instruction(struct cp
   4.270           * selector is null.
   4.271           */
   4.272          if (!vm86 && check_for_null_selector(eip)) {
   4.273 -            printf("String I/O with null selector (cs:eip=0x%lx:0x%lx)\n",
   4.274 -                cs, eip);
   4.275              laddr = (p->dir == IOREQ_WRITE) ? regs->esi : regs->edi;
   4.276          }
   4.277          p->pdata_valid = 1;
   4.278 @@ -709,10 +921,10 @@ error:
   4.279  static int vmx_set_cr0(unsigned long value)
   4.280  {
   4.281      struct vcpu *d = current;
   4.282 -    unsigned long old_base_mfn, mfn;
   4.283 +    unsigned long mfn;
   4.284      unsigned long eip;
   4.285      int paging_enabled;
   4.286 -
   4.287 +    unsigned long vm_entry_value;
   4.288      /* 
   4.289       * CR0: We don't want to lose PE and PG.
   4.290       */
   4.291 @@ -733,10 +945,42 @@ static int vmx_set_cr0(unsigned long val
   4.292              printk("Invalid CR3 value = %lx", d->arch.arch_vmx.cpu_cr3);
   4.293              domain_crash_synchronous(); /* need to take a clean path */
   4.294          }
   4.295 +
   4.296 +#if defined(__x86_64__)
   4.297 +        if (test_bit(VMX_CPU_STATE_LME_ENABLED,
   4.298 +              &d->arch.arch_vmx.cpu_state) &&
   4.299 +          !test_bit(VMX_CPU_STATE_PAE_ENABLED,
   4.300 +              &d->arch.arch_vmx.cpu_state)){
   4.301 +            VMX_DBG_LOG(DBG_LEVEL_1, "Enable paging before PAE enable\n");
   4.302 +            vmx_inject_exception(d, TRAP_gp_fault, 0);
   4.303 +        }
   4.304 +        if (test_bit(VMX_CPU_STATE_LME_ENABLED,
   4.305 +              &d->arch.arch_vmx.cpu_state)){
   4.306 +            /* Here the PAE is should to be opened */
   4.307 +            VMX_DBG_LOG(DBG_LEVEL_1, "Enable the Long mode\n");
   4.308 +            set_bit(VMX_CPU_STATE_LMA_ENABLED,
   4.309 +              &d->arch.arch_vmx.cpu_state);
   4.310 +            __vmread(VM_ENTRY_CONTROLS, &vm_entry_value);
   4.311 +            vm_entry_value |= VM_ENTRY_CONTROLS_IA_32E_MODE;
   4.312 +            __vmwrite(VM_ENTRY_CONTROLS, vm_entry_value);
   4.313 +
   4.314 +        }
   4.315 +
   4.316 +	unsigned long crn;
   4.317 +        /* update CR4's PAE if needed */
   4.318 +        __vmread(GUEST_CR4, &crn);
   4.319 +        if ( (!(crn & X86_CR4_PAE)) &&
   4.320 +          test_bit(VMX_CPU_STATE_PAE_ENABLED,
   4.321 +              &d->arch.arch_vmx.cpu_state)){
   4.322 +            VMX_DBG_LOG(DBG_LEVEL_1, "enable PAE on cr4\n");
   4.323 +            __vmwrite(GUEST_CR4, crn | X86_CR4_PAE);
   4.324 +        }
   4.325 +#elif defined( __i386__)
   4.326 +       	unsigned long old_base_mfn;
   4.327          old_base_mfn = pagetable_get_pfn(d->arch.guest_table);
   4.328          if (old_base_mfn)
   4.329              put_page(pfn_to_page(old_base_mfn));
   4.330 -
   4.331 +#endif
   4.332          /*
   4.333           * Now arch.guest_table points to machine physical.
   4.334           */
   4.335 @@ -760,6 +1004,24 @@ static int vmx_set_cr0(unsigned long val
   4.336       * a partition disables the CR0.PE bit.
   4.337       */
   4.338      if ((value & X86_CR0_PE) == 0) {
   4.339 +        if ( value & X86_CR0_PG ) {
   4.340 +            /* inject GP here */
   4.341 +            vmx_inject_exception(d, TRAP_gp_fault, 0);
   4.342 +            return 0;
   4.343 +        } else {
   4.344 +            /* 
   4.345 +             * Disable paging here.
   4.346 +             * Same to PE == 1 && PG == 0
   4.347 +             */
   4.348 +            if (test_bit(VMX_CPU_STATE_LMA_ENABLED,
   4.349 +                         &d->arch.arch_vmx.cpu_state)){
   4.350 +                clear_bit(VMX_CPU_STATE_LMA_ENABLED,
   4.351 +                          &d->arch.arch_vmx.cpu_state);
   4.352 +                __vmread(VM_ENTRY_CONTROLS, &vm_entry_value);
   4.353 +                vm_entry_value &= ~VM_ENTRY_CONTROLS_IA_32E_MODE;
   4.354 +                __vmwrite(VM_ENTRY_CONTROLS, vm_entry_value);
   4.355 +            }
   4.356 +        }
   4.357  	__vmread(GUEST_RIP, &eip);
   4.358  	VMX_DBG_LOG(DBG_LEVEL_1,
   4.359  	    "Disabling CR0.PE at %%eip 0x%lx\n", eip);
   4.360 @@ -791,6 +1053,26 @@ static int vmx_set_cr0(unsigned long val
   4.361  #define CASE_GET_REG(REG, reg)  \
   4.362      case REG_ ## REG: value = regs->reg; break
   4.363  
   4.364 +#define CASE_EXTEND_SET_REG \
   4.365 +      CASE_EXTEND_REG(S)
   4.366 +#define CASE_EXTEND_GET_REG \
   4.367 +      CASE_EXTEND_REG(G)
   4.368 +
   4.369 +#ifdef __i386__
   4.370 +#define CASE_EXTEND_REG(T)
   4.371 +#else
   4.372 +#define CASE_EXTEND_REG(T)    \
   4.373 +    CASE_ ## T ## ET_REG(R8, r8); \
   4.374 +    CASE_ ## T ## ET_REG(R9, r9); \
   4.375 +    CASE_ ## T ## ET_REG(R10, r10); \
   4.376 +    CASE_ ## T ## ET_REG(R11, r11); \
   4.377 +    CASE_ ## T ## ET_REG(R12, r12); \
   4.378 +    CASE_ ## T ## ET_REG(R13, r13); \
   4.379 +    CASE_ ## T ## ET_REG(R14, r14); \
   4.380 +    CASE_ ## T ## ET_REG(R15, r15);
   4.381 +#endif
   4.382 +
   4.383 +
   4.384  /*
   4.385   * Write to control registers
   4.386   */
   4.387 @@ -808,6 +1090,7 @@ static int mov_to_cr(int gp, int cr, str
   4.388          CASE_GET_REG(EBP, ebp);
   4.389          CASE_GET_REG(ESI, esi);
   4.390          CASE_GET_REG(EDI, edi);
   4.391 +        CASE_EXTEND_GET_REG
   4.392      case REG_ESP:
   4.393          __vmread(GUEST_RSP, &value);
   4.394          break;
   4.395 @@ -878,12 +1161,30 @@ static int mov_to_cr(int gp, int cr, str
   4.396          break;
   4.397      }
   4.398      case 4:         
   4.399 +    {
   4.400          /* CR4 */
   4.401 -        if (value & X86_CR4_PAE)
   4.402 -            __vmx_bug(regs);    /* not implemented */
   4.403 +        unsigned long old_guest_cr;
   4.404 +        unsigned long pae_disabled = 0;
   4.405 +
   4.406 +        __vmread(GUEST_CR4, &old_guest_cr);
   4.407 +        if (value & X86_CR4_PAE){
   4.408 +            set_bit(VMX_CPU_STATE_PAE_ENABLED, &d->arch.arch_vmx.cpu_state);
   4.409 +            if(!vmx_paging_enabled(d))
   4.410 +                pae_disabled = 1;
   4.411 +        } else {
   4.412 +            if (test_bit(VMX_CPU_STATE_LMA_ENABLED,
   4.413 +                         &d->arch.arch_vmx.cpu_state)){
   4.414 +                vmx_inject_exception(d, TRAP_gp_fault, 0);
   4.415 +            }
   4.416 +            clear_bit(VMX_CPU_STATE_PAE_ENABLED, &d->arch.arch_vmx.cpu_state);
   4.417 +        }
   4.418 +
   4.419          __vmread(CR4_READ_SHADOW, &old_cr);
   4.420 -        
   4.421 -        __vmwrite(GUEST_CR4, (value | X86_CR4_VMXE));
   4.422 +        if (pae_disabled)
   4.423 +            __vmwrite(GUEST_CR4, ((value & ~X86_CR4_PAE) | X86_CR4_VMXE));
   4.424 +        else
   4.425 +            __vmwrite(GUEST_CR4, value| X86_CR4_VMXE);
   4.426 +
   4.427          __vmwrite(CR4_READ_SHADOW, value);
   4.428  
   4.429          /*
   4.430 @@ -891,10 +1192,10 @@ static int mov_to_cr(int gp, int cr, str
   4.431           * all TLB entries except global entries.
   4.432           */
   4.433          if ((old_cr ^ value) & (X86_CR4_PSE | X86_CR4_PGE | X86_CR4_PAE)) {
   4.434 -            vmx_shadow_clear_state(d->domain);
   4.435              shadow_sync_all(d->domain);
   4.436          }
   4.437          break;
   4.438 +    }
   4.439      default:
   4.440          printk("invalid cr: %d\n", gp);
   4.441          __vmx_bug(regs);
   4.442 @@ -1000,7 +1301,9 @@ static inline void vmx_do_msr_read(struc
   4.443              regs->edx = 0;
   4.444              break;
   4.445          default:
   4.446 -            rdmsr(regs->ecx, regs->eax, regs->edx);
   4.447 +            if(long_mode_do_msr_read(regs))
   4.448 +                return;
   4.449 +            rdmsr_user(regs->ecx, regs->eax, regs->edx);
   4.450              break;
   4.451      }
   4.452  
   4.453 @@ -1026,6 +1329,7 @@ static inline void vmx_do_msr_write(stru
   4.454              __vmwrite(GUEST_SYSENTER_EIP, regs->eax);
   4.455              break;
   4.456          default:
   4.457 +            long_mode_do_msr_write(regs);
   4.458              break;
   4.459      }
   4.460  
     5.1 --- a/xen/arch/x86/vmx_platform.c	Mon Jul 04 15:31:47 2005 +0000
     5.2 +++ b/xen/arch/x86/vmx_platform.c	Mon Jul 04 15:35:35 2005 +0000
     5.3 @@ -503,7 +503,7 @@ static int vmx_decode(const unsigned cha
     5.4  
     5.5  int inst_copy_from_guest(unsigned char *buf, unsigned long guest_eip, int inst_len)
     5.6  {
     5.7 -    l1_pgentry_t gpte;
     5.8 +    unsigned long gpa;
     5.9      unsigned long mfn;
    5.10      unsigned char *inst_start;
    5.11      int remaining = 0;
    5.12 @@ -513,8 +513,9 @@ int inst_copy_from_guest(unsigned char *
    5.13  
    5.14      if ( vmx_paging_enabled(current) )
    5.15      {
    5.16 -        gpte = gva_to_gpte(guest_eip);
    5.17 -        mfn = phys_to_machine_mapping(l1e_get_pfn(gpte));
    5.18 +        gpa = gva_to_gpa(guest_eip);
    5.19 +        mfn = phys_to_machine_mapping(gpa >> PAGE_SHIFT);
    5.20 +
    5.21          /* Does this cross a page boundary ? */
    5.22          if ( (guest_eip & PAGE_MASK) != ((guest_eip + inst_len) & PAGE_MASK) )
    5.23          {
    5.24 @@ -533,8 +534,9 @@ int inst_copy_from_guest(unsigned char *
    5.25  
    5.26      if ( remaining )
    5.27      {
    5.28 -        gpte = gva_to_gpte(guest_eip+inst_len+remaining);
    5.29 -        mfn = phys_to_machine_mapping(l1e_get_pfn(gpte));
    5.30 +        gpa = gva_to_gpa(guest_eip+inst_len+remaining);
    5.31 +        mfn = phys_to_machine_mapping(gpa >> PAGE_SHIFT);
    5.32 +
    5.33          inst_start = map_domain_page(mfn);
    5.34          memcpy((char *)buf+inst_len, inst_start, remaining);
    5.35          unmap_domain_page(inst_start);
     6.1 --- a/xen/arch/x86/vmx_vmcs.c	Mon Jul 04 15:31:47 2005 +0000
     6.2 +++ b/xen/arch/x86/vmx_vmcs.c	Mon Jul 04 15:35:35 2005 +0000
     6.3 @@ -65,8 +65,12 @@ static inline int construct_vmcs_control
     6.4  
     6.5      error |= __vmwrite(CPU_BASED_VM_EXEC_CONTROL, 
     6.6                         MONITOR_CPU_BASED_EXEC_CONTROLS);
     6.7 -
     6.8 +#if defined (__x86_64__)
     6.9 +    error |= __vmwrite(VM_EXIT_CONTROLS, 
    6.10 +      MONITOR_VM_EXIT_CONTROLS | VM_EXIT_CONTROLS_IA_32E_MODE);
    6.11 +#else
    6.12      error |= __vmwrite(VM_EXIT_CONTROLS, MONITOR_VM_EXIT_CONTROLS);
    6.13 +#endif
    6.14      error |= __vmwrite(VM_ENTRY_CONTROLS, MONITOR_VM_ENTRY_CONTROLS);
    6.15  
    6.16      return error;
    6.17 @@ -93,6 +97,11 @@ struct host_execution_env {
    6.18      unsigned long tr_base;
    6.19      unsigned long ds_base;
    6.20      unsigned long cs_base;
    6.21 +#ifdef __x86_64__ 
    6.22 +    unsigned long fs_base; 
    6.23 +    unsigned long gs_base; 
    6.24 +#endif 
    6.25 +
    6.26      /* control registers */
    6.27      unsigned long cr3;
    6.28      unsigned long cr0;
    6.29 @@ -230,8 +239,8 @@ construct_init_vmcs_guest(struct cpu_use
    6.30      /* interrupt */
    6.31      error |= __vmwrite(VM_ENTRY_INTR_INFO_FIELD, 0);
    6.32      /* mask */
    6.33 -    error |= __vmwrite(CR0_GUEST_HOST_MASK, 0xffffffff);
    6.34 -    error |= __vmwrite(CR4_GUEST_HOST_MASK, 0xffffffff);
    6.35 +    error |= __vmwrite(CR0_GUEST_HOST_MASK, -1UL);
    6.36 +    error |= __vmwrite(CR4_GUEST_HOST_MASK, -1UL);
    6.37  
    6.38      error |= __vmwrite(PAGE_FAULT_ERROR_CODE_MASK, 0);
    6.39      error |= __vmwrite(PAGE_FAULT_ERROR_CODE_MATCH, 0);
    6.40 @@ -298,9 +307,19 @@ construct_init_vmcs_guest(struct cpu_use
    6.41      shadow_cr &= ~X86_CR0_PG;
    6.42      error |= __vmwrite(CR0_READ_SHADOW, shadow_cr);
    6.43      /* CR3 is set in vmx_final_setup_guest */
    6.44 +#ifdef __x86_64__
    6.45 +    error |= __vmwrite(GUEST_CR4, host_env->cr4 & ~X86_CR4_PAE);
    6.46 +    printk("construct_init_vmcs_guest: guest CR4 is %lx\n", host_env->cr4 );
    6.47 +#else
    6.48      error |= __vmwrite(GUEST_CR4, host_env->cr4);
    6.49 +#endif
    6.50      shadow_cr = host_env->cr4;
    6.51 +
    6.52 +#ifdef __x86_64__
    6.53 +    shadow_cr &= ~(X86_CR4_PGE | X86_CR4_VMXE | X86_CR4_PAE);
    6.54 +#else
    6.55      shadow_cr &= ~(X86_CR4_PGE | X86_CR4_VMXE);
    6.56 +#endif
    6.57      error |= __vmwrite(CR4_READ_SHADOW, shadow_cr);
    6.58  
    6.59      error |= __vmwrite(GUEST_ES_BASE, host_env->ds_base);
    6.60 @@ -339,16 +358,24 @@ static inline int construct_vmcs_host(st
    6.61      error |= __vmwrite(HOST_ES_SELECTOR, host_env->ds_selector);
    6.62      error |= __vmwrite(HOST_SS_SELECTOR, host_env->ds_selector);
    6.63      error |= __vmwrite(HOST_DS_SELECTOR, host_env->ds_selector);
    6.64 +#if defined (__i386__)
    6.65      error |= __vmwrite(HOST_FS_SELECTOR, host_env->ds_selector);
    6.66      error |= __vmwrite(HOST_GS_SELECTOR, host_env->ds_selector);
    6.67 +    error |= __vmwrite(HOST_FS_BASE, host_env->ds_base); 
    6.68 +    error |= __vmwrite(HOST_GS_BASE, host_env->ds_base); 
    6.69  
    6.70 +#else
    6.71 +    rdmsrl(MSR_FS_BASE, host_env->fs_base); 
    6.72 +    rdmsrl(MSR_GS_BASE, host_env->gs_base); 
    6.73 +    error |= __vmwrite(HOST_FS_BASE, host_env->fs_base); 
    6.74 +    error |= __vmwrite(HOST_GS_BASE, host_env->gs_base); 
    6.75 +
    6.76 +#endif
    6.77      host_env->cs_selector = __HYPERVISOR_CS;
    6.78      error |= __vmwrite(HOST_CS_SELECTOR, host_env->cs_selector);
    6.79  
    6.80      host_env->ds_base = 0;
    6.81      host_env->cs_base = 0;
    6.82 -    error |= __vmwrite(HOST_FS_BASE, host_env->ds_base);
    6.83 -    error |= __vmwrite(HOST_GS_BASE, host_env->ds_base);
    6.84  
    6.85  /* Debug */
    6.86      __asm__ __volatile__ ("sidt  (%0) \n" :: "a"(&desc) : "memory");
    6.87 @@ -366,6 +393,12 @@ static inline int construct_vmcs_host(st
    6.88      host_env->cr4 = crn;
    6.89      error |= __vmwrite(HOST_CR4, crn);
    6.90      error |= __vmwrite(HOST_RIP, (unsigned long) vmx_asm_vmexit_handler);
    6.91 +#ifdef __x86_64__ 
    6.92 +    /* TBD: support cr8 for 64-bit guest */ 
    6.93 +    __vmwrite(VIRTUAL_APIC_PAGE_ADDR, 0); 
    6.94 +    __vmwrite(TPR_THRESHOLD, 0); 
    6.95 +    __vmwrite(SECONDARY_VM_EXEC_CONTROL, 0); 
    6.96 +#endif 
    6.97  
    6.98      return error;
    6.99  }
     7.1 --- a/xen/include/asm-x86/msr.h	Mon Jul 04 15:31:47 2005 +0000
     7.2 +++ b/xen/include/asm-x86/msr.h	Mon Jul 04 15:35:35 2005 +0000
     7.3 @@ -18,6 +18,8 @@
     7.4  			  : /* no outputs */ \
     7.5  			  : "c" (msr), "a" (val1), "d" (val2))
     7.6  
     7.7 +#define wrmsrl(msr,val) wrmsr(msr,(__u32)((__u64)(val)),((__u64)(val))>>32) 
     7.8 +
     7.9  #define rdmsr_user(msr,val1,val2) ({\
    7.10      int _rc; \
    7.11      __asm__ __volatile__( \
     8.1 --- a/xen/include/asm-x86/vmx_intercept.h	Mon Jul 04 15:31:47 2005 +0000
     8.2 +++ b/xen/include/asm-x86/vmx_intercept.h	Mon Jul 04 15:35:35 2005 +0000
     8.3 @@ -8,7 +8,7 @@
     8.4  #include <xen/errno.h>
     8.5  #include <public/io/ioreq.h>
     8.6  
     8.7 -#define MAX_IO_HANDLER 6
     8.8 +#define MAX_IO_HANDLER 10
     8.9  
    8.10  typedef int (*intercept_action_t)(ioreq_t*);
    8.11  
     9.1 --- a/xen/include/asm-x86/vmx_vmcs.h	Mon Jul 04 15:31:47 2005 +0000
     9.2 +++ b/xen/include/asm-x86/vmx_vmcs.h	Mon Jul 04 15:35:35 2005 +0000
     9.3 @@ -27,6 +27,14 @@
     9.4  extern int start_vmx(void);
     9.5  extern void stop_vmx(void);
     9.6  
     9.7 +#if defined (__x86_64__)
     9.8 +extern void vmx_load_msrs(struct vcpu *p, struct vcpu *n);
     9.9 +void vmx_restore_msrs(struct vcpu *d);
    9.10 +#else
    9.11 +#define vmx_load_msrs(_p, _n)      ((void)0)
    9.12 +#define vmx_restore_msrs(_v)       ((void)0)
    9.13 +#endif
    9.14 +
    9.15  void vmx_enter_scheduler(void);
    9.16  
    9.17  enum {
    9.18 @@ -87,7 +95,6 @@ struct vmcs_struct *alloc_vmcs(void);
    9.19  void free_vmcs(struct vmcs_struct *);
    9.20  int  load_vmcs(struct arch_vmx_struct *, u64);
    9.21  int  store_vmcs(struct arch_vmx_struct *, u64);
    9.22 -void dump_vmcs(void);
    9.23  int  construct_vmcs(struct arch_vmx_struct *, struct cpu_user_regs *, 
    9.24                      struct vcpu_guest_context *, int);
    9.25