ia64/xen-unstable

changeset 16017:f71b7d6ad5d8

merge with xen-unstable.hg
author Alex Williamson <alex.williamson@hp.com>
date Mon Oct 01 13:59:37 2007 -0600 (2007-10-01)
parents d3665dc74a41 5c7afb32df99
children 9fbbba4c23fb
files
line diff
     1.1 --- a/tools/python/xen/xm/main.py	Mon Oct 01 09:59:24 2007 -0600
     1.2 +++ b/tools/python/xen/xm/main.py	Mon Oct 01 13:59:37 2007 -0600
     1.3 @@ -55,6 +55,9 @@ from xen.util.acmpolicy import ACM_LABEL
     1.4  
     1.5  import XenAPI
     1.6  
     1.7 +import inspect
     1.8 +from xen.xend import XendOptions
     1.9 +xoptions = XendOptions.instance()
    1.10  
    1.11  # getopt.gnu_getopt is better, but only exists in Python 2.3+.  Use
    1.12  # getopt.getopt if gnu_getopt is not available.  This will mean that options
    1.13 @@ -1595,7 +1598,31 @@ def xm_sched_credit(args):
    1.14                  err(str(result))
    1.15  
    1.16  def xm_info(args):
    1.17 -    arg_check(args, "info", 0)
    1.18 +    arg_check(args, "info", 0, 1)
    1.19 +    
    1.20 +    try:
    1.21 +        (options, params) = getopt.gnu_getopt(args, 'c', ['config'])
    1.22 +    except getopt.GetoptError, opterr:
    1.23 +        err(opterr)
    1.24 +        usage('info')
    1.25 +    
    1.26 +    show_xend_config = 0
    1.27 +    for (k, v) in options:
    1.28 +        if k in ['-c', '--config']:
    1.29 +            show_xend_config = 1
    1.30 +
    1.31 +    if show_xend_config:
    1.32 +        for name, obj in inspect.getmembers(xoptions):
    1.33 +            if not inspect.ismethod(obj):
    1.34 +                if name == "config":
    1.35 +                    for x in obj[1:]:
    1.36 +                        if len(x) < 2: 
    1.37 +                            print "%-38s: (none)" % x[0]
    1.38 +                        else: 
    1.39 +                            print "%-38s:" % x[0], x[1]
    1.40 +                else:
    1.41 +                    print "%-38s:" % name, obj
    1.42 +        return
    1.43  
    1.44      if serverType == SERVER_XEN_API:
    1.45  
     2.1 --- a/xen/arch/x86/hvm/hvm.c	Mon Oct 01 09:59:24 2007 -0600
     2.2 +++ b/xen/arch/x86/hvm/hvm.c	Mon Oct 01 13:59:37 2007 -0600
     2.3 @@ -49,6 +49,10 @@
     2.4  #include <public/version.h>
     2.5  #include <public/memory.h>
     2.6  
     2.7 +/* Xen command-line option to disable hardware-assisted paging */
     2.8 +static int opt_hap_disabled;
     2.9 +invbool_param("hap", opt_hap_disabled);
    2.10 +
    2.11  int hvm_enabled __read_mostly;
    2.12  
    2.13  unsigned int opt_hvm_debug_level __read_mostly;
    2.14 @@ -74,6 +78,14 @@ void hvm_enable(struct hvm_function_tabl
    2.15  
    2.16      hvm_funcs   = *fns;
    2.17      hvm_enabled = 1;
    2.18 +
    2.19 +    if ( hvm_funcs.hap_supported )
    2.20 +    {
    2.21 +        if ( opt_hap_disabled )
    2.22 +            hvm_funcs.hap_supported = 0;
    2.23 +        printk("HVM: Hardware Assisted Paging %sabled\n",
    2.24 +               hvm_funcs.hap_supported ? "en" : "dis");
    2.25 +    }
    2.26  }
    2.27  
    2.28  void hvm_set_guest_time(struct vcpu *v, u64 gtime)
    2.29 @@ -325,6 +337,34 @@ static int hvm_load_cpu_ctxt(struct doma
    2.30      if ( hvm_load_entry(CPU, h, &ctxt) != 0 ) 
    2.31          return -EINVAL;
    2.32  
    2.33 +    /* Sanity check some control registers. */
    2.34 +    if ( (ctxt.cr0 & HVM_CR0_GUEST_RESERVED_BITS) ||
    2.35 +         !(ctxt.cr0 & X86_CR0_ET) ||
    2.36 +         ((ctxt.cr0 & (X86_CR0_PE|X86_CR0_PG)) == X86_CR0_PG) )
    2.37 +    {
    2.38 +        gdprintk(XENLOG_ERR, "HVM restore: bad CR0 0x%"PRIx64"\n",
    2.39 +                 ctxt.msr_efer);
    2.40 +        return -EINVAL;
    2.41 +    }
    2.42 +
    2.43 +    if ( ctxt.cr4 & HVM_CR4_GUEST_RESERVED_BITS )
    2.44 +    {
    2.45 +        gdprintk(XENLOG_ERR, "HVM restore: bad CR4 0x%"PRIx64"\n",
    2.46 +                 ctxt.msr_efer);
    2.47 +        return -EINVAL;
    2.48 +    }
    2.49 +
    2.50 +    if ( (ctxt.msr_efer & ~(EFER_LME | EFER_NX | EFER_SCE)) ||
    2.51 +         ((sizeof(long) != 8) && (ctxt.msr_efer & EFER_LME)) ||
    2.52 +         (!cpu_has_nx && (ctxt.msr_efer & EFER_NX)) ||
    2.53 +         (!cpu_has_syscall && (ctxt.msr_efer & EFER_SCE)) ||
    2.54 +         ((ctxt.msr_efer & (EFER_LME|EFER_LMA)) == EFER_LMA) )
    2.55 +    {
    2.56 +        gdprintk(XENLOG_ERR, "HVM restore: bad EFER 0x%"PRIx64"\n",
    2.57 +                 ctxt.msr_efer);
    2.58 +        return -EINVAL;
    2.59 +    }
    2.60 +
    2.61      /* Architecture-specific vmcs/vmcb bits */
    2.62      if ( hvm_funcs.load_cpu_ctxt(v, &ctxt) < 0 )
    2.63          return -EINVAL;
    2.64 @@ -520,6 +560,39 @@ void hvm_triple_fault(void)
    2.65      domain_shutdown(v->domain, SHUTDOWN_reboot);
    2.66  }
    2.67  
    2.68 +int hvm_set_efer(uint64_t value)
    2.69 +{
    2.70 +    struct vcpu *v = current;
    2.71 +
    2.72 +    value &= ~EFER_LMA;
    2.73 +
    2.74 +    if ( (value & ~(EFER_LME | EFER_NX | EFER_SCE)) ||
    2.75 +         ((sizeof(long) != 8) && (value & EFER_LME)) ||
    2.76 +         (!cpu_has_nx && (value & EFER_NX)) ||
    2.77 +         (!cpu_has_syscall && (value & EFER_SCE)) )
    2.78 +    {
    2.79 +        gdprintk(XENLOG_WARNING, "Trying to set reserved bit in "
    2.80 +                 "EFER: %"PRIx64"\n", value);
    2.81 +        hvm_inject_exception(TRAP_gp_fault, 0, 0);
    2.82 +        return 0;
    2.83 +    }
    2.84 +
    2.85 +    if ( ((value ^ v->arch.hvm_vcpu.guest_efer) & EFER_LME) &&
    2.86 +         hvm_paging_enabled(v) )
    2.87 +    {
    2.88 +        gdprintk(XENLOG_WARNING,
    2.89 +                 "Trying to change EFER.LME with paging enabled\n");
    2.90 +        hvm_inject_exception(TRAP_gp_fault, 0, 0);
    2.91 +        return 0;
    2.92 +    }
    2.93 +
    2.94 +    value |= v->arch.hvm_vcpu.guest_efer & EFER_LMA;
    2.95 +    v->arch.hvm_vcpu.guest_efer = value;
    2.96 +    hvm_update_guest_efer(v);
    2.97 +
    2.98 +    return 1;
    2.99 +}
   2.100 +
   2.101  int hvm_set_cr0(unsigned long value)
   2.102  {
   2.103      struct vcpu *v = current;
     3.1 --- a/xen/arch/x86/hvm/svm/svm.c	Mon Oct 01 09:59:24 2007 -0600
     3.2 +++ b/xen/arch/x86/hvm/svm/svm.c	Mon Oct 01 13:59:37 2007 -0600
     3.3 @@ -69,8 +69,7 @@ static void *hsa[NR_CPUS] __read_mostly;
     3.4  /* vmcb used for extended host state */
     3.5  static void *root_vmcb[NR_CPUS] __read_mostly;
     3.6  
     3.7 -/* hardware assisted paging bits */
     3.8 -extern int opt_hap_enabled;
     3.9 +static void svm_update_guest_efer(struct vcpu *v);
    3.10  
    3.11  static void inline __update_guest_eip(
    3.12      struct cpu_user_regs *regs, int inst_len) 
    3.13 @@ -106,22 +105,10 @@ static void svm_cpu_down(void)
    3.14      write_efer(read_efer() & ~EFER_SVME);
    3.15  }
    3.16  
    3.17 -static int svm_lme_is_set(struct vcpu *v)
    3.18 -{
    3.19 -#ifdef __x86_64__
    3.20 -    u64 guest_efer = v->arch.hvm_vcpu.guest_efer;
    3.21 -    return guest_efer & EFER_LME;
    3.22 -#else
    3.23 -    return 0;
    3.24 -#endif
    3.25 -}
    3.26 -
    3.27  static enum handler_return long_mode_do_msr_write(struct cpu_user_regs *regs)
    3.28  {
    3.29      u64 msr_content = (u32)regs->eax | ((u64)regs->edx << 32);
    3.30      u32 ecx = regs->ecx;
    3.31 -    struct vcpu *v = current;
    3.32 -    struct vmcb_struct *vmcb = v->arch.hvm_svm.vmcb;
    3.33  
    3.34      HVM_DBG_LOG(DBG_LEVEL_0, "msr %x msr_content %"PRIx64,
    3.35                  ecx, msr_content);
    3.36 @@ -129,47 +116,8 @@ static enum handler_return long_mode_do_
    3.37      switch ( ecx )
    3.38      {
    3.39      case MSR_EFER:
    3.40 -        /* Offending reserved bit will cause #GP. */
    3.41 -#ifdef __x86_64__
    3.42 -        if ( (msr_content & ~(EFER_LME | EFER_LMA | EFER_NX | EFER_SCE)) ||
    3.43 -#else
    3.44 -        if ( (msr_content & ~(EFER_NX | EFER_SCE)) ||
    3.45 -#endif
    3.46 -             (!cpu_has_nx && (msr_content & EFER_NX)) ||
    3.47 -             (!cpu_has_syscall && (msr_content & EFER_SCE)) )
    3.48 -        {
    3.49 -            gdprintk(XENLOG_WARNING, "Trying to set reserved bit in "
    3.50 -                     "EFER: %"PRIx64"\n", msr_content);
    3.51 -            goto gp_fault;
    3.52 -        }
    3.53 -
    3.54 -        if ( (msr_content & EFER_LME) && !svm_lme_is_set(v) )
    3.55 -        {
    3.56 -            /* EFER.LME transition from 0 to 1. */
    3.57 -            if ( hvm_paging_enabled(v) ||
    3.58 -                 !(v->arch.hvm_vcpu.guest_cr[4] & X86_CR4_PAE) )
    3.59 -            {
    3.60 -                gdprintk(XENLOG_WARNING, "Trying to set LME bit when "
    3.61 -                         "in paging mode or PAE bit is not set\n");
    3.62 -                goto gp_fault;
    3.63 -            }
    3.64 -        }
    3.65 -        else if ( !(msr_content & EFER_LME) && svm_lme_is_set(v) )
    3.66 -        {
    3.67 -            /* EFER.LME transistion from 1 to 0. */
    3.68 -            if ( hvm_paging_enabled(v) )
    3.69 -            {
    3.70 -                gdprintk(XENLOG_WARNING, 
    3.71 -                         "Trying to clear EFER.LME while paging enabled\n");
    3.72 -                goto gp_fault;
    3.73 -            }
    3.74 -        }
    3.75 -
    3.76 -        v->arch.hvm_vcpu.guest_efer = msr_content;
    3.77 -        vmcb->efer = msr_content | EFER_SVME;
    3.78 -        if ( !hvm_paging_enabled(v) )
    3.79 -            vmcb->efer &= ~(EFER_LME | EFER_LMA);
    3.80 -
    3.81 +        if ( !hvm_set_efer(msr_content) )
    3.82 +            return HNDL_exception_raised;
    3.83          break;
    3.84  
    3.85      case MSR_K8_MC4_MISC: /* Threshold register */
    3.86 @@ -185,10 +133,6 @@ static enum handler_return long_mode_do_
    3.87      }
    3.88  
    3.89      return HNDL_done;
    3.90 -
    3.91 - gp_fault:
    3.92 -    svm_inject_exception(v, TRAP_gp_fault, 1, 0);
    3.93 -    return HNDL_exception_raised;
    3.94  }
    3.95  
    3.96  
    3.97 @@ -452,11 +396,7 @@ static void svm_load_cpu_state(struct vc
    3.98      vmcb->cstar      = data->msr_cstar;
    3.99      vmcb->sfmask     = data->msr_syscall_mask;
   3.100      v->arch.hvm_vcpu.guest_efer = data->msr_efer;
   3.101 -    vmcb->efer       = data->msr_efer | EFER_SVME;
   3.102 -    /* VMCB's EFER.LME isn't set unless we're actually in long mode
   3.103 -     * (see long_mode_do_msr_write()) */
   3.104 -    if ( !(vmcb->efer & EFER_LMA) )
   3.105 -        vmcb->efer &= ~EFER_LME;
   3.106 +    svm_update_guest_efer(v);
   3.107  
   3.108      hvm_set_guest_time(v, data->tsc);
   3.109  }
   3.110 @@ -546,14 +486,11 @@ static void svm_update_guest_cr(struct v
   3.111  
   3.112  static void svm_update_guest_efer(struct vcpu *v)
   3.113  {
   3.114 -#ifdef __x86_64__
   3.115      struct vmcb_struct *vmcb = v->arch.hvm_svm.vmcb;
   3.116  
   3.117 -    if ( v->arch.hvm_vcpu.guest_efer & EFER_LMA )
   3.118 -        vmcb->efer |= EFER_LME | EFER_LMA;
   3.119 -    else
   3.120 -        vmcb->efer &= ~(EFER_LME | EFER_LMA);
   3.121 -#endif
   3.122 +    vmcb->efer = (v->arch.hvm_vcpu.guest_efer | EFER_SVME) & ~EFER_LME;
   3.123 +    if ( vmcb->efer & EFER_LMA )
   3.124 +        vmcb->efer |= EFER_LME;
   3.125  }
   3.126  
   3.127  static void svm_flush_guest_tlbs(void)
   3.128 @@ -936,18 +873,14 @@ static struct hvm_function_table svm_fun
   3.129      .event_pending        = svm_event_pending
   3.130  };
   3.131  
   3.132 -static void svm_npt_detect(void)
   3.133 +static int svm_npt_detect(void)
   3.134  {
   3.135      u32 eax, ebx, ecx, edx;
   3.136  
   3.137      /* Check CPUID for nested paging support. */
   3.138      cpuid(0x8000000A, &eax, &ebx, &ecx, &edx);
   3.139  
   3.140 -    if ( !(edx & 1) && opt_hap_enabled )
   3.141 -    {
   3.142 -        printk("SVM: Nested paging is not supported by this CPU.\n");
   3.143 -        opt_hap_enabled = 0;
   3.144 -    }
   3.145 +    return (edx & 1);
   3.146  }
   3.147  
   3.148  int start_svm(struct cpuinfo_x86 *c)
   3.149 @@ -978,8 +911,6 @@ int start_svm(struct cpuinfo_x86 *c)
   3.150  
   3.151      write_efer(read_efer() | EFER_SVME);
   3.152  
   3.153 -    svm_npt_detect();
   3.154 -
   3.155      /* Initialize the HSA for this core. */
   3.156      phys_hsa = (u64) virt_to_maddr(hsa[cpu]);
   3.157      phys_hsa_lo = (u32) phys_hsa;
   3.158 @@ -994,11 +925,10 @@ int start_svm(struct cpuinfo_x86 *c)
   3.159  
   3.160      setup_vmcb_dump();
   3.161  
   3.162 +    svm_function_table.hap_supported = svm_npt_detect();
   3.163 +
   3.164      hvm_enable(&svm_function_table);
   3.165  
   3.166 -    if ( opt_hap_enabled )
   3.167 -        printk("SVM: Nested paging enabled.\n");
   3.168 -        
   3.169      return 1;
   3.170  }
   3.171  
     4.1 --- a/xen/arch/x86/hvm/vmx/vmx.c	Mon Oct 01 09:59:24 2007 -0600
     4.2 +++ b/xen/arch/x86/hvm/vmx/vmx.c	Mon Oct 01 13:59:37 2007 -0600
     4.3 @@ -199,42 +199,8 @@ static enum handler_return long_mode_do_
     4.4      switch ( ecx )
     4.5      {
     4.6      case MSR_EFER:
     4.7 -        /* offending reserved bit will cause #GP */
     4.8 -        if ( (msr_content & ~(EFER_LME | EFER_LMA | EFER_NX | EFER_SCE)) ||
     4.9 -             (!cpu_has_nx && (msr_content & EFER_NX)) ||
    4.10 -             (!cpu_has_syscall && (msr_content & EFER_SCE)) )
    4.11 -        {
    4.12 -            gdprintk(XENLOG_WARNING, "Trying to set reserved bit in "
    4.13 -                     "EFER: %"PRIx64"\n", msr_content);
    4.14 -            goto gp_fault;
    4.15 -        }
    4.16 -
    4.17 -        if ( (msr_content & EFER_LME)
    4.18 -             &&  !(v->arch.hvm_vcpu.guest_efer & EFER_LME) )
    4.19 -        {
    4.20 -            if ( unlikely(hvm_paging_enabled(v)) )
    4.21 -            {
    4.22 -                gdprintk(XENLOG_WARNING,
    4.23 -                         "Trying to set EFER.LME with paging enabled\n");
    4.24 -                goto gp_fault;
    4.25 -            }
    4.26 -        }
    4.27 -        else if ( !(msr_content & EFER_LME)
    4.28 -                  && (v->arch.hvm_vcpu.guest_efer & EFER_LME) )
    4.29 -        {
    4.30 -            if ( unlikely(hvm_paging_enabled(v)) )
    4.31 -            {
    4.32 -                gdprintk(XENLOG_WARNING,
    4.33 -                         "Trying to clear EFER.LME with paging enabled\n");
    4.34 -                goto gp_fault;
    4.35 -            }
    4.36 -        }
    4.37 -
    4.38 -        if ( (msr_content ^ v->arch.hvm_vcpu.guest_efer) & (EFER_NX|EFER_SCE) )
    4.39 -            write_efer((read_efer() & ~(EFER_NX|EFER_SCE)) |
    4.40 -                       (msr_content & (EFER_NX|EFER_SCE)));
    4.41 -
    4.42 -        v->arch.hvm_vcpu.guest_efer = msr_content;
    4.43 +        if ( !hvm_set_efer(msr_content) )
    4.44 +            goto exception_raised;
    4.45          break;
    4.46  
    4.47      case MSR_FS_BASE:
    4.48 @@ -285,6 +251,7 @@ static enum handler_return long_mode_do_
    4.49      HVM_DBG_LOG(DBG_LEVEL_0, "Not cano address of msr write %x", ecx);
    4.50   gp_fault:
    4.51      vmx_inject_hw_exception(v, TRAP_gp_fault, 0);
    4.52 + exception_raised:
    4.53      return HNDL_exception_raised;
    4.54  }
    4.55  
    4.56 @@ -380,7 +347,8 @@ static enum handler_return long_mode_do_
    4.57      u64 msr_content = 0;
    4.58      struct vcpu *v = current;
    4.59  
    4.60 -    switch ( regs->ecx ) {
    4.61 +    switch ( regs->ecx )
    4.62 +    {
    4.63      case MSR_EFER:
    4.64          msr_content = v->arch.hvm_vcpu.guest_efer;
    4.65          break;
    4.66 @@ -398,25 +366,12 @@ static enum handler_return long_mode_do_
    4.67  static enum handler_return long_mode_do_msr_write(struct cpu_user_regs *regs)
    4.68  {
    4.69      u64 msr_content = regs->eax | ((u64)regs->edx << 32);
    4.70 -    struct vcpu *v = current;
    4.71  
    4.72      switch ( regs->ecx )
    4.73      {
    4.74      case MSR_EFER:
    4.75 -        /* offending reserved bit will cause #GP */
    4.76 -        if ( (msr_content & ~EFER_NX) ||
    4.77 -             (!cpu_has_nx && (msr_content & EFER_NX)) )
    4.78 -        {
    4.79 -            gdprintk(XENLOG_WARNING, "Trying to set reserved bit in "
    4.80 -                     "EFER: %"PRIx64"\n", msr_content);
    4.81 -            vmx_inject_hw_exception(v, TRAP_gp_fault, 0);
    4.82 +        if ( !hvm_set_efer(msr_content) )
    4.83              return HNDL_exception_raised;
    4.84 -        }
    4.85 -
    4.86 -        if ( (msr_content ^ v->arch.hvm_vcpu.guest_efer) & EFER_NX )
    4.87 -            write_efer((read_efer() & ~EFER_NX) | (msr_content & EFER_NX));
    4.88 -
    4.89 -        v->arch.hvm_vcpu.guest_efer = msr_content;
    4.90          break;
    4.91  
    4.92      default:
    4.93 @@ -1096,6 +1051,10 @@ static void vmx_update_guest_efer(struct
    4.94  
    4.95      vmx_vmcs_exit(v);
    4.96  #endif
    4.97 +
    4.98 +    if ( v == current )
    4.99 +        write_efer((read_efer() & ~(EFER_NX|EFER_SCE)) |
   4.100 +                   (v->arch.hvm_vcpu.guest_efer & (EFER_NX|EFER_SCE)));
   4.101  }
   4.102  
   4.103  static void vmx_flush_guest_tlbs(void)
     5.1 --- a/xen/arch/x86/hvm/vmx/vtd/intel-iommu.c	Mon Oct 01 09:59:24 2007 -0600
     5.2 +++ b/xen/arch/x86/hvm/vmx/vtd/intel-iommu.c	Mon Oct 01 13:59:37 2007 -0600
     5.3 @@ -134,7 +134,7 @@ static int device_context_mapped(struct 
     5.4  #define level_mask(l) (((u64)(-1)) << level_to_offset_bits(l))
     5.5  #define level_size(l) (1 << level_to_offset_bits(l))
     5.6  #define align_to_level(addr, l) ((addr + level_size(l) - 1) & level_mask(l))
     5.7 -static struct dma_pte *addr_to_dma_pte(struct domain *domain, u64 addr)
     5.8 +static struct page_info *addr_to_dma_page(struct domain *domain, u64 addr)
     5.9  {
    5.10      struct hvm_iommu *hd = domain_hvm_iommu(domain);
    5.11      struct acpi_drhd_unit *drhd;
    5.12 @@ -144,6 +144,8 @@ static struct dma_pte *addr_to_dma_pte(s
    5.13      int level = agaw_to_level(hd->agaw);
    5.14      int offset;
    5.15      unsigned long flags;
    5.16 +    struct page_info *pg = NULL;
    5.17 +    u64 *vaddr = NULL;
    5.18  
    5.19      drhd = list_entry(acpi_drhd_units.next, typeof(*drhd), list);
    5.20      iommu = drhd->iommu;
    5.21 @@ -153,79 +155,105 @@ static struct dma_pte *addr_to_dma_pte(s
    5.22      if ( !hd->pgd )
    5.23      {
    5.24          pgd = (struct dma_pte *)alloc_xenheap_page();
    5.25 -        if ( !pgd && !hd->pgd )
    5.26 +        if ( !pgd )
    5.27          {
    5.28              spin_unlock_irqrestore(&hd->mapping_lock, flags);
    5.29              return NULL;
    5.30          }
    5.31 -        memset((u8*)pgd, 0, PAGE_SIZE);
    5.32 -        if ( !hd->pgd )
    5.33 -            hd->pgd = pgd;
    5.34 -        else /* somebody is fast */
    5.35 -            free_xenheap_page((void *) pgd);
    5.36 +        memset(pgd, 0, PAGE_SIZE);
    5.37 +        hd->pgd = pgd;
    5.38      }
    5.39 +
    5.40      parent = hd->pgd;
    5.41 -    while ( level > 0 )
    5.42 +    while ( level > 1 )
    5.43      {
    5.44 -        u8 *tmp;
    5.45          offset = address_level_offset(addr, level);
    5.46          pte = &parent[offset];
    5.47 -        if ( level == 1 )
    5.48 -            break;
    5.49 +
    5.50          if ( dma_pte_addr(*pte) == 0 )
    5.51          {
    5.52 -            tmp = alloc_xenheap_page();
    5.53 -            memset(tmp, 0, PAGE_SIZE);
    5.54 -            iommu_flush_cache_page(iommu, tmp);
    5.55 -
    5.56 -            if ( !tmp && dma_pte_addr(*pte) == 0 )
    5.57 +            pg = alloc_domheap_page(NULL);
    5.58 +            vaddr = map_domain_page(mfn_x(page_to_mfn(pg)));
    5.59 +            if ( !vaddr )
    5.60              {
    5.61                  spin_unlock_irqrestore(&hd->mapping_lock, flags);
    5.62                  return NULL;
    5.63              }
    5.64 -            if ( dma_pte_addr(*pte) == 0 )
    5.65 +            memset(vaddr, 0, PAGE_SIZE);
    5.66 +            iommu_flush_cache_page(iommu, vaddr);
    5.67 +
    5.68 +            dma_set_pte_addr(*pte, page_to_maddr(pg));
    5.69 +
    5.70 +            /*
    5.71 +             * high level table always sets r/w, last level
    5.72 +             * page table control read/write
    5.73 +             */
    5.74 +            dma_set_pte_readable(*pte);
    5.75 +            dma_set_pte_writable(*pte);
    5.76 +            iommu_flush_cache_entry(iommu, pte);
    5.77 +        }
    5.78 +        else
    5.79 +        {
    5.80 +            pg = maddr_to_page(pte->val);
    5.81 +            vaddr = map_domain_page(mfn_x(page_to_mfn(pg)));
    5.82 +            if ( !vaddr )
    5.83              {
    5.84 -                dma_set_pte_addr(*pte,
    5.85 -                                 virt_to_maddr(tmp));
    5.86 -                /*
    5.87 -                 * high level table always sets r/w, last level
    5.88 -                 * page table control read/write
    5.89 -                 */
    5.90 -                dma_set_pte_readable(*pte);
    5.91 -                dma_set_pte_writable(*pte);
    5.92 -                iommu_flush_cache_entry(iommu, pte);
    5.93 -            } else /* somebody is fast */
    5.94 -                free_xenheap_page(tmp);
    5.95 +                spin_unlock_irqrestore(&hd->mapping_lock, flags);
    5.96 +                return NULL;
    5.97 +            }
    5.98          }
    5.99 -        parent = maddr_to_virt(dma_pte_addr(*pte));
   5.100 +
   5.101 +        if ( parent != hd->pgd )
   5.102 +            unmap_domain_page(parent);
   5.103 +
   5.104 +        if ( level == 2 && vaddr )
   5.105 +        {
   5.106 +            unmap_domain_page(vaddr);
   5.107 +            break;
   5.108 +        }
   5.109 +
   5.110 +        parent = (struct dma_pte *)vaddr;
   5.111 +        vaddr = NULL;
   5.112          level--;
   5.113      }
   5.114 +
   5.115      spin_unlock_irqrestore(&hd->mapping_lock, flags);
   5.116 -    return pte;
   5.117 +    return pg;
   5.118  }
   5.119  
   5.120 -/* return address's pte at specific level */
   5.121 -static struct dma_pte *dma_addr_level_pte(struct domain *domain, u64 addr,
   5.122 -                                          int level)
   5.123 +/* return address's page at specific level */
   5.124 +static struct page_info *dma_addr_level_page(struct domain *domain,
   5.125 +                                             u64 addr, int level)
   5.126  {
   5.127      struct hvm_iommu *hd = domain_hvm_iommu(domain);
   5.128      struct dma_pte *parent, *pte = NULL;
   5.129      int total = agaw_to_level(hd->agaw);
   5.130      int offset;
   5.131 +    struct page_info *pg = NULL;
   5.132  
   5.133      parent = hd->pgd;
   5.134      while ( level <= total )
   5.135      {
   5.136          offset = address_level_offset(addr, total);
   5.137          pte = &parent[offset];
   5.138 -        if ( level == total )
   5.139 -            return pte;
   5.140 +        if ( dma_pte_addr(*pte) == 0 )
   5.141 +        {
   5.142 +            if ( parent != hd->pgd )
   5.143 +                unmap_domain_page(parent);
   5.144 +            break;
   5.145 +        }
   5.146  
   5.147 -        if ( dma_pte_addr(*pte) == 0 )
   5.148 -            break;
   5.149 -        parent = maddr_to_virt(dma_pte_addr(*pte));
   5.150 +        pg = maddr_to_page(pte->val);
   5.151 +        if ( parent != hd->pgd )
   5.152 +            unmap_domain_page(parent);
   5.153 +
   5.154 +        if ( level == total )
   5.155 +            return pg;
   5.156 +
   5.157 +        parent = map_domain_page(mfn_x(page_to_mfn(pg)));
   5.158          total--;
   5.159      }
   5.160 +
   5.161      return NULL;
   5.162  }
   5.163  
   5.164 @@ -506,12 +534,16 @@ static void dma_pte_clear_one(struct dom
   5.165      struct acpi_drhd_unit *drhd;
   5.166      struct iommu *iommu;
   5.167      struct dma_pte *pte = NULL;
   5.168 +    struct page_info *pg = NULL;
   5.169  
   5.170      drhd = list_entry(acpi_drhd_units.next, typeof(*drhd), list);
   5.171  
   5.172      /* get last level pte */
   5.173 -    pte = dma_addr_level_pte(domain, addr, 1);
   5.174 -
   5.175 +    pg = dma_addr_level_page(domain, addr, 1);
   5.176 +    if ( !pg )
   5.177 +        return;
   5.178 +    pte = (struct dma_pte *)map_domain_page(mfn_x(page_to_mfn(pg)));
   5.179 +    pte += address_level_offset(addr, 1);
   5.180      if ( pte )
   5.181      {
   5.182          dma_clear_pte(*pte);
   5.183 @@ -559,6 +591,7 @@ void dma_pte_free_pagetable(struct domai
   5.184      int total = agaw_to_level(hd->agaw);
   5.185      int level;
   5.186      u32 tmp;
   5.187 +    struct page_info *pg = NULL;
   5.188  
   5.189      drhd = list_entry(acpi_drhd_units.next, typeof(*drhd), list);
   5.190      iommu = drhd->iommu;
   5.191 @@ -576,13 +609,16 @@ void dma_pte_free_pagetable(struct domai
   5.192  
   5.193          while ( tmp < end )
   5.194          {
   5.195 -            pte = dma_addr_level_pte(domain, tmp, level);
   5.196 -            if ( pte )
   5.197 -            {
   5.198 -                free_xenheap_page((void *) maddr_to_virt(dma_pte_addr(*pte)));
   5.199 -                dma_clear_pte(*pte);
   5.200 -                iommu_flush_cache_entry(iommu, pte);
   5.201 -            }
   5.202 +            pg = dma_addr_level_page(domain, tmp, level);
   5.203 +            if ( !pg )
   5.204 +                return;
   5.205 +            pte = (struct dma_pte *)map_domain_page(mfn_x(page_to_mfn(pg)));
   5.206 +            pte += address_level_offset(tmp, level);
   5.207 +            dma_clear_pte(*pte);
   5.208 +            iommu_flush_cache_entry(iommu, pte);
   5.209 +            unmap_domain_page(pte);
   5.210 +            free_domheap_page(pg);
   5.211 +
   5.212              tmp += level_size(level);
   5.213          }
   5.214          level++;
   5.215 @@ -1445,6 +1481,7 @@ int iommu_map_page(struct domain *d, pad
   5.216      struct acpi_drhd_unit *drhd;
   5.217      struct iommu *iommu;
   5.218      struct dma_pte *pte = NULL;
   5.219 +    struct page_info *pg = NULL;
   5.220  
   5.221      drhd = list_entry(acpi_drhd_units.next, typeof(*drhd), list);
   5.222      iommu = drhd->iommu;
   5.223 @@ -1453,12 +1490,15 @@ int iommu_map_page(struct domain *d, pad
   5.224      if ( ecap_pass_thru(iommu->ecap) && (d->domain_id == 0) )
   5.225          return 0;
   5.226  
   5.227 -    pte = addr_to_dma_pte(d, gfn << PAGE_SHIFT_4K);
   5.228 -    if ( !pte )
   5.229 +    pg = addr_to_dma_page(d, gfn << PAGE_SHIFT_4K);
   5.230 +    if ( !pg )
   5.231          return -ENOMEM;
   5.232 +    pte = (struct dma_pte *)map_domain_page(mfn_x(page_to_mfn(pg)));
   5.233 +    pte += mfn & LEVEL_MASK;
   5.234      dma_set_pte_addr(*pte, mfn << PAGE_SHIFT_4K);
   5.235      dma_set_pte_prot(*pte, DMA_PTE_READ | DMA_PTE_WRITE);
   5.236      iommu_flush_cache_entry(iommu, pte);
   5.237 +    unmap_domain_page(pte);
   5.238  
   5.239      for_each_drhd_unit ( drhd )
   5.240      {
   5.241 @@ -1477,7 +1517,6 @@ int iommu_unmap_page(struct domain *d, d
   5.242  {
   5.243      struct acpi_drhd_unit *drhd;
   5.244      struct iommu *iommu;
   5.245 -    struct dma_pte *pte = NULL;
   5.246  
   5.247      drhd = list_entry(acpi_drhd_units.next, typeof(*drhd), list);
   5.248      iommu = drhd->iommu;
   5.249 @@ -1486,10 +1525,8 @@ int iommu_unmap_page(struct domain *d, d
   5.250      if ( ecap_pass_thru(iommu->ecap) && (d->domain_id == 0) )
   5.251          return 0;
   5.252  
   5.253 -    /* get last level pte */
   5.254 -    pte = dma_addr_level_pte(d, gfn << PAGE_SHIFT_4K, 1);
   5.255      dma_pte_clear_one(d, gfn << PAGE_SHIFT_4K);
   5.256 -    
   5.257 +
   5.258      return 0;
   5.259  }
   5.260  
   5.261 @@ -1501,6 +1538,7 @@ int iommu_page_mapping(struct domain *do
   5.262      unsigned long start_pfn, end_pfn;
   5.263      struct dma_pte *pte = NULL;
   5.264      int index;
   5.265 +    struct page_info *pg = NULL;
   5.266  
   5.267      drhd = list_entry(acpi_drhd_units.next, typeof(*drhd), list);
   5.268      iommu = drhd->iommu;
   5.269 @@ -1513,12 +1551,15 @@ int iommu_page_mapping(struct domain *do
   5.270      index = 0;
   5.271      while ( start_pfn < end_pfn )
   5.272      {
   5.273 -        pte = addr_to_dma_pte(domain, iova + PAGE_SIZE_4K * index);
   5.274 -        if ( !pte )
   5.275 +        pg = addr_to_dma_page(domain, iova + PAGE_SIZE_4K * index);
   5.276 +        if ( !pg )
   5.277              return -ENOMEM;
   5.278 +        pte = (struct dma_pte *)map_domain_page(mfn_x(page_to_mfn(pg)));
   5.279 +        pte += start_pfn & LEVEL_MASK;
   5.280          dma_set_pte_addr(*pte, start_pfn << PAGE_SHIFT_4K);
   5.281          dma_set_pte_prot(*pte, prot);
   5.282          iommu_flush_cache_entry(iommu, pte);
   5.283 +        unmap_domain_page(pte);
   5.284          start_pfn++;
   5.285          index++;
   5.286      }
   5.287 @@ -1537,12 +1578,8 @@ int iommu_page_mapping(struct domain *do
   5.288  
   5.289  int iommu_page_unmapping(struct domain *domain, dma_addr_t addr, size_t size)
   5.290  {
   5.291 -    struct dma_pte *pte = NULL;
   5.292 +    dma_pte_clear_range(domain, addr, addr + size);
   5.293  
   5.294 -    /* get last level pte */
   5.295 -    pte = dma_addr_level_pte(domain, addr, 1);
   5.296 -    dma_pte_clear_range(domain, addr, addr + size);
   5.297 -    
   5.298      return 0;
   5.299  }
   5.300  
     6.1 --- a/xen/arch/x86/mm/paging.c	Mon Oct 01 09:59:24 2007 -0600
     6.2 +++ b/xen/arch/x86/mm/paging.c	Mon Oct 01 13:59:37 2007 -0600
     6.3 @@ -28,9 +28,7 @@
     6.4  #include <asm/guest_access.h>
     6.5  #include <xsm/xsm.h>
     6.6  
     6.7 -/* Xen command-line option to enable hardware-assisted paging */
     6.8 -int opt_hap_enabled;
     6.9 -boolean_param("hap", opt_hap_enabled);
    6.10 +#define hap_enabled(d) (hvm_funcs.hap_supported && is_hvm_domain(d))
    6.11  
    6.12  /* Printouts */
    6.13  #define PAGING_PRINTK(_f, _a...)                                     \
    6.14 @@ -363,14 +361,14 @@ void paging_domain_init(struct domain *d
    6.15      shadow_domain_init(d);
    6.16  
    6.17      /* ... but we will use hardware assistance if it's available. */
    6.18 -    if ( opt_hap_enabled && is_hvm_domain(d) )
    6.19 +    if ( hap_enabled(d) )
    6.20          hap_domain_init(d);
    6.21  }
    6.22  
    6.23  /* vcpu paging struct initialization goes here */
    6.24  void paging_vcpu_init(struct vcpu *v)
    6.25  {
    6.26 -    if ( opt_hap_enabled && is_hvm_vcpu(v) )
    6.27 +    if ( hap_enabled(v->domain) )
    6.28          hap_vcpu_init(v);
    6.29      else
    6.30          shadow_vcpu_init(v);
    6.31 @@ -434,7 +432,7 @@ int paging_domctl(struct domain *d, xen_
    6.32      }
    6.33  
    6.34      /* Here, dispatch domctl to the appropriate paging code */
    6.35 -    if ( opt_hap_enabled && is_hvm_domain(d) )
    6.36 +    if ( hap_enabled(d) )
    6.37          return hap_domctl(d, sc, u_domctl);
    6.38      else
    6.39          return shadow_domctl(d, sc, u_domctl);
    6.40 @@ -443,7 +441,7 @@ int paging_domctl(struct domain *d, xen_
    6.41  /* Call when destroying a domain */
    6.42  void paging_teardown(struct domain *d)
    6.43  {
    6.44 -    if ( opt_hap_enabled && is_hvm_domain(d) )
    6.45 +    if ( hap_enabled(d) )
    6.46          hap_teardown(d);
    6.47      else
    6.48          shadow_teardown(d);
    6.49 @@ -455,7 +453,7 @@ void paging_teardown(struct domain *d)
    6.50  /* Call once all of the references to the domain have gone away */
    6.51  void paging_final_teardown(struct domain *d)
    6.52  {
    6.53 -    if ( opt_hap_enabled && is_hvm_domain(d) )
    6.54 +    if ( hap_enabled(d) )
    6.55          hap_final_teardown(d);
    6.56      else
    6.57          shadow_final_teardown(d);
    6.58 @@ -465,7 +463,7 @@ void paging_final_teardown(struct domain
    6.59   * creation. */
    6.60  int paging_enable(struct domain *d, u32 mode)
    6.61  {
    6.62 -    if ( opt_hap_enabled && is_hvm_domain(d) )
    6.63 +    if ( hap_enabled(d) )
    6.64          return hap_enable(d, mode | PG_HAP_enable);
    6.65      else
    6.66          return shadow_enable(d, mode | PG_SH_enable);
     7.1 --- a/xen/include/asm-x86/hvm/hvm.h	Mon Oct 01 09:59:24 2007 -0600
     7.2 +++ b/xen/include/asm-x86/hvm/hvm.h	Mon Oct 01 13:59:37 2007 -0600
     7.3 @@ -72,6 +72,9 @@ enum hvm_intack {
     7.4  struct hvm_function_table {
     7.5      char *name;
     7.6  
     7.7 +    /* Support Hardware-Assisted Paging? */
     7.8 +    int hap_supported;
     7.9 +
    7.10      /*
    7.11       * Initialise/destroy HVM domain/vcpu resources
    7.12       */
     8.1 --- a/xen/include/asm-x86/hvm/support.h	Mon Oct 01 09:59:24 2007 -0600
     8.2 +++ b/xen/include/asm-x86/hvm/support.h	Mon Oct 01 13:59:37 2007 -0600
     8.3 @@ -234,6 +234,7 @@ int hvm_do_hypercall(struct cpu_user_reg
     8.4  void hvm_hlt(unsigned long rflags);
     8.5  void hvm_triple_fault(void);
     8.6  
     8.7 +int hvm_set_efer(uint64_t value);
     8.8  int hvm_set_cr0(unsigned long value);
     8.9  int hvm_set_cr3(unsigned long value);
    8.10  int hvm_set_cr4(unsigned long value);
     9.1 --- a/xen/include/xsm/acm/acm_core.h	Mon Oct 01 09:59:24 2007 -0600
     9.2 +++ b/xen/include/xsm/acm/acm_core.h	Mon Oct 01 13:59:37 2007 -0600
     9.3 @@ -154,7 +154,7 @@ static inline int acm_array_append_tuple
     9.4  
     9.5  /* protos */
     9.6  int acm_init_domain_ssid(struct domain *, ssidref_t ssidref);
     9.7 -void acm_free_domain_ssid(struct acm_ssid_domain *ssid);
     9.8 +void acm_free_domain_ssid(struct domain *);
     9.9  int acm_init_binary_policy(u32 policy_code);
    9.10  int acm_set_policy(XEN_GUEST_HANDLE_64(void) buf, u32 buf_size);
    9.11  int do_acm_set_policy(void *buf, u32 buf_size, int is_bootpolicy,
    10.1 --- a/xen/include/xsm/acm/acm_hooks.h	Mon Oct 01 09:59:24 2007 -0600
    10.2 +++ b/xen/include/xsm/acm/acm_hooks.h	Mon Oct 01 13:59:37 2007 -0600
    10.3 @@ -258,7 +258,7 @@ static inline void acm_domain_destroy(st
    10.4              acm_secondary_ops->domain_destroy(ssid, d);
    10.5          /* free security ssid for the destroyed domain (also if null policy */
    10.6          acm_domain_ssid_off_list(ssid);
    10.7 -        acm_free_domain_ssid((struct acm_ssid_domain *)(ssid));
    10.8 +        acm_free_domain_ssid(d);
    10.9      }
   10.10  }
   10.11  
   10.12 @@ -294,7 +294,7 @@ static inline int acm_domain_create(stru
   10.13      {
   10.14          acm_domain_ssid_onto_list(d->ssid);
   10.15      } else {
   10.16 -        acm_free_domain_ssid(d->ssid);
   10.17 +        acm_free_domain_ssid(d);
   10.18      }
   10.19  
   10.20  error_out:
    11.1 --- a/xen/xsm/acm/acm_core.c	Mon Oct 01 09:59:24 2007 -0600
    11.2 +++ b/xen/xsm/acm/acm_core.c	Mon Oct 01 13:59:37 2007 -0600
    11.3 @@ -361,7 +361,7 @@ int acm_init_domain_ssid(struct domain *
    11.4      {
    11.5          printk("%s: ERROR instantiating individual ssids for domain 0x%02x.\n",
    11.6                 __func__, subj->domain_id);
    11.7 -        acm_free_domain_ssid(ssid);
    11.8 +        acm_free_domain_ssid(subj);
    11.9          return ACM_INIT_SSID_ERROR;
   11.10      }
   11.11  
   11.12 @@ -372,8 +372,10 @@ int acm_init_domain_ssid(struct domain *
   11.13  
   11.14  
   11.15  void
   11.16 -acm_free_domain_ssid(struct acm_ssid_domain *ssid)
   11.17 +acm_free_domain_ssid(struct domain *d)
   11.18  {
   11.19 +    struct acm_ssid_domain *ssid = d->ssid;
   11.20 +    
   11.21      /* domain is already gone, just ssid is left */
   11.22      if (ssid == NULL)
   11.23          return;
   11.24 @@ -387,6 +389,8 @@ acm_free_domain_ssid(struct acm_ssid_dom
   11.25      ssid->secondary_ssid = NULL;
   11.26  
   11.27      xfree(ssid);
   11.28 +    d->ssid = NULL;
   11.29 +    
   11.30      printkd("%s: Freed individual domain ssid (domain=%02x).\n",
   11.31              __func__, id);
   11.32  }