direct-io.hg

changeset 4652:d769cf21930d

bitkeeper revision 1.1379 (426ca278F5TOjFztt77FKYt8v457dg)

[PATCH] [PATCH] Assorted VMX patches

This patch contains the following VMX patches:

- ioemu: Update the address when doing a stosb/stosw/stosl. Without this patch
stosb writes req->count times at the same location req->addr. Not exactly
the intended semantics.

- xc_ptrace.c: Use the page_array to refer to partition memory. This allows
you to use gdbserver on a VMX partition (and presumably partitions that use
shadow page tables).

- dom0_ops.c: Preserve ECF_VMX_GUEST flag for gdbserver. Without it you
cannot (re)set the VMX domain state.

- vmx.c: Added support for lmsw. Unmodified FreeBSD uses this when running
inside a VMX partition.

Signed-Off-By: Leendert van Doorn <leendert@watson.ibm.com>

===== tools/ioemu/iodev/cpu.cc 1.8 vs edited =====
author leendert@watson.ibm.com[kaf24]
date Mon Apr 25 07:55:36 2005 +0000 (2005-04-25)
parents 3b385d58d823
children 4c9072ada4bf
files tools/ioemu/iodev/cpu.cc tools/libxc/xc_ptrace.c xen/arch/x86/dom0_ops.c xen/arch/x86/vmx.c xen/include/asm-x86/vmx.h
line diff
     1.1 --- a/tools/ioemu/iodev/cpu.cc	Mon Apr 25 07:55:26 2005 +0000
     1.2 +++ b/tools/ioemu/iodev/cpu.cc	Mon Apr 25 07:55:36 2005 +0000
     1.3 @@ -128,15 +128,13 @@ void bx_cpu_c::dispatch_ioreq(ioreq_t *r
     1.4  		if (!req->pdata_valid) {
     1.5  			if(req->dir == IOREQ_READ){//read
     1.6  				//BX_INFO(("mmio[value]: <READ> addr:%llx, value:%llx, size: %llx, count: %llx\n", req->addr, req->u.data, req->size, req->count));
     1.7 -
     1.8  				for (i = 0; i < req->count; i++) {
     1.9 -					BX_MEM_READ_PHYSICAL(req->addr, req->size, &req->u.data);
    1.10 +					BX_MEM_READ_PHYSICAL(req->addr + (sign * i * req->size), req->size, &req->u.data);
    1.11  				}
    1.12  			} else if(req->dir == IOREQ_WRITE) {//write
    1.13  				//BX_INFO(("mmio[value]: <WRITE> addr:%llx, value:%llx, size: %llx, count: %llx\n", req->addr, req->u.data, req->size, req->count));
    1.14 -
    1.15  				for (i = 0; i < req->count; i++) {
    1.16 -					BX_MEM_WRITE_PHYSICAL(req->addr, req->size, &req->u.data);
    1.17 +					BX_MEM_WRITE_PHYSICAL(req->addr + (sign * i * req->size), req->size, &req->u.data);
    1.18  				}
    1.19  			}
    1.20  		} else {
     2.1 --- a/tools/libxc/xc_ptrace.c	Mon Apr 25 07:55:26 2005 +0000
     2.2 +++ b/tools/libxc/xc_ptrace.c	Mon Apr 25 07:55:36 2005 +0000
     2.3 @@ -75,7 +75,7 @@ struct gdb_regs {
     2.4  	int retval = xc_domain_getfullinfo(xc_handle, domid, cpu, NULL, &ctxt[cpu]); \
     2.5  	if (retval) \
     2.6  	    goto error_out; \
     2.7 -	cr3[cpu] = ctxt[cpu].pt_base; \
     2.8 +	cr3[cpu] = ctxt[cpu].pt_base; /* physical address */ \
     2.9  	regs_valid[cpu] = 1; \
    2.10      } \
    2.11  
    2.12 @@ -128,11 +128,12 @@ struct gdb_regs {
    2.13  
    2.14  
    2.15  static int                      xc_handle;
    2.16 +static long			nr_pages = 0;
    2.17 +unsigned long			*page_array = NULL;
    2.18  static int                      regs_valid[MAX_VIRT_CPUS];
    2.19  static unsigned long            cr3[MAX_VIRT_CPUS];
    2.20  static full_execution_context_t ctxt[MAX_VIRT_CPUS];
    2.21  
    2.22 -
    2.23  /* --------------------- */
    2.24  
    2.25  static void *
    2.26 @@ -140,6 +141,7 @@ map_domain_va(unsigned long domid, int c
    2.27  {
    2.28      unsigned long pde, page;
    2.29      unsigned long va = (unsigned long)guest_va;
    2.30 +    long npgs = xc_get_tot_pages(xc_handle, domid);
    2.31  
    2.32      static unsigned long  cr3_phys[MAX_VIRT_CPUS];
    2.33      static unsigned long *cr3_virt[MAX_VIRT_CPUS];
    2.34 @@ -150,6 +152,21 @@ map_domain_va(unsigned long domid, int c
    2.35      
    2.36      static int            prev_perm[MAX_VIRT_CPUS];
    2.37  
    2.38 +    if (nr_pages != npgs) {
    2.39 +	if (nr_pages > 0)
    2.40 +	    free(page_array);
    2.41 +	nr_pages = npgs;
    2.42 +	if ((page_array = malloc(nr_pages * sizeof(unsigned long))) == NULL) {
    2.43 +	    printf("Could not allocate memory\n");
    2.44 +	    goto error_out;
    2.45 +	}
    2.46 +
    2.47 +	if (xc_get_pfn_list(xc_handle, domid, page_array, nr_pages) != nr_pages) {
    2.48 +		printf("Could not get the page frame list\n");
    2.49 +		goto error_out;
    2.50 +	}
    2.51 +    }
    2.52 +
    2.53      FETCH_REGS(cpu);
    2.54  
    2.55      if (cr3[cpu] != cr3_phys[cpu]) 
    2.56 @@ -162,8 +179,9 @@ map_domain_va(unsigned long domid, int c
    2.57  					     cr3_phys[cpu] >> PAGE_SHIFT)) == NULL)
    2.58  	    goto error_out;
    2.59      } 
    2.60 -    if ((pde = cr3_virt[cpu][vtopdi(va)]) == 0)
    2.61 +    if ((pde = cr3_virt[cpu][vtopdi(va)]) == 0) /* logical address */
    2.62  	goto error_out;
    2.63 +    pde = page_array[pde >> PAGE_SHIFT] << PAGE_SHIFT;
    2.64      if (pde != pde_phys[cpu]) 
    2.65      {
    2.66  	pde_phys[cpu] = pde;
    2.67 @@ -174,8 +192,9 @@ map_domain_va(unsigned long domid, int c
    2.68  					     pde_phys[cpu] >> PAGE_SHIFT)) == NULL)
    2.69  	    goto error_out;
    2.70      }
    2.71 -    if ((page = pde_virt[cpu][vtopti(va)]) == 0)
    2.72 +    if ((page = pde_virt[cpu][vtopti(va)]) == 0) /* logical address */
    2.73  	goto error_out;
    2.74 +    page = page_array[page >> PAGE_SHIFT] << PAGE_SHIFT;
    2.75      if (page != page_phys[cpu] || perm != prev_perm[cpu]) 
    2.76      {
    2.77  	page_phys[cpu] = page;
    2.78 @@ -330,6 +349,7 @@ xc_ptrace(enum __ptrace_request request,
    2.79  	    perror("dom0 op failed");
    2.80  	    goto error_out;
    2.81  	}
    2.82 +    	/* FALLTHROUGH */
    2.83      case PTRACE_CONT:
    2.84      case PTRACE_DETACH:
    2.85  	if (request != PTRACE_SINGLESTEP) {
     3.1 --- a/xen/arch/x86/dom0_ops.c	Mon Apr 25 07:55:26 2005 +0000
     3.2 +++ b/xen/arch/x86/dom0_ops.c	Mon Apr 25 07:55:36 2005 +0000
     3.3 @@ -402,6 +402,10 @@ void arch_getdomaininfo_ctxt(
     3.4          c->flags |= ECF_I387_VALID;
     3.5      if ( KERNEL_MODE(ed, &ed->arch.user_ctxt) )
     3.6          c->flags |= ECF_IN_KERNEL;
     3.7 +#ifdef CONFIG_VMX
     3.8 +    if (VMX_DOMAIN(ed))
     3.9 +        c->flags |= ECF_VMX_GUEST;
    3.10 +#endif
    3.11      memcpy(&c->fpu_ctxt,
    3.12             &ed->arch.i387,
    3.13             sizeof(ed->arch.i387));
     4.1 --- a/xen/arch/x86/vmx.c	Mon Apr 25 07:55:26 2005 +0000
     4.2 +++ b/xen/arch/x86/vmx.c	Mon Apr 25 07:55:36 2005 +0000
     4.3 @@ -640,6 +640,84 @@ error:
     4.4      return 0;
     4.5  }
     4.6  
     4.7 +static int vmx_set_cr0(unsigned long value)
     4.8 +{
     4.9 +    struct exec_domain *d = current;
    4.10 +    unsigned long old_base_mfn, mfn;
    4.11 +    unsigned long eip;
    4.12 +
    4.13 +    /* 
    4.14 +     * CR0: We don't want to lose PE and PG.
    4.15 +     */
    4.16 +    __vmwrite(GUEST_CR0, (value | X86_CR0_PE | X86_CR0_PG));
    4.17 +
    4.18 +    if (value & (X86_CR0_PE | X86_CR0_PG) &&
    4.19 +        !test_bit(VMX_CPU_STATE_PG_ENABLED, &d->arch.arch_vmx.cpu_state)) {
    4.20 +        /*
    4.21 +         * Enable paging
    4.22 +         */
    4.23 +        set_bit(VMX_CPU_STATE_PG_ENABLED, &d->arch.arch_vmx.cpu_state);
    4.24 +        /*
    4.25 +         * The guest CR3 must be pointing to the guest physical.
    4.26 +         */
    4.27 +        if ( !VALID_MFN(mfn = phys_to_machine_mapping(
    4.28 +                            d->arch.arch_vmx.cpu_cr3 >> PAGE_SHIFT)) ||
    4.29 +             !get_page(pfn_to_page(mfn), d->domain) )
    4.30 +        {
    4.31 +            VMX_DBG_LOG(DBG_LEVEL_VMMU, "Invalid CR3 value = %lx",
    4.32 +                        d->arch.arch_vmx.cpu_cr3);
    4.33 +            domain_crash_synchronous(); /* need to take a clean path */
    4.34 +        }
    4.35 +        old_base_mfn = pagetable_val(d->arch.guest_table) >> PAGE_SHIFT;
    4.36 +        if (old_base_mfn)
    4.37 +            put_page(pfn_to_page(old_base_mfn));
    4.38 +
    4.39 +        /*
    4.40 +         * Now arch.guest_table points to machine physical.
    4.41 +         */
    4.42 +        d->arch.guest_table = mk_pagetable(mfn << PAGE_SHIFT);
    4.43 +        update_pagetables(d);
    4.44 +
    4.45 +        VMX_DBG_LOG(DBG_LEVEL_VMMU, "New arch.guest_table = %lx", 
    4.46 +                (unsigned long) (mfn << PAGE_SHIFT));
    4.47 +
    4.48 +        __vmwrite(GUEST_CR3, pagetable_val(d->arch.shadow_table));
    4.49 +        /* 
    4.50 +         * arch->shadow_table should hold the next CR3 for shadow
    4.51 +         */
    4.52 +        VMX_DBG_LOG(DBG_LEVEL_VMMU, "Update CR3 value = %lx, mfn = %lx", 
    4.53 +                d->arch.arch_vmx.cpu_cr3, mfn);
    4.54 +    } else {
    4.55 +        if ((value & X86_CR0_PE) == 0) {
    4.56 +            __vmread(GUEST_EIP, &eip);
    4.57 +            VMX_DBG_LOG(DBG_LEVEL_1,
    4.58 +		"Disabling CR0.PE at %%eip 0x%lx", eip);
    4.59 +	    if (vmx_assist(d, VMX_ASSIST_INVOKE)) {
    4.60 +		set_bit(VMX_CPU_STATE_ASSIST_ENABLED,
    4.61 +					&d->arch.arch_vmx.cpu_state);
    4.62 +		__vmread(GUEST_EIP, &eip);
    4.63 +		VMX_DBG_LOG(DBG_LEVEL_1,
    4.64 +		    "Transfering control to vmxassist %%eip 0x%lx", eip);
    4.65 +		return 0; /* do not update eip! */
    4.66 +	    }
    4.67 +	} else if (test_bit(VMX_CPU_STATE_ASSIST_ENABLED,
    4.68 +					&d->arch.arch_vmx.cpu_state)) {
    4.69 +	    __vmread(GUEST_EIP, &eip);
    4.70 +	    VMX_DBG_LOG(DBG_LEVEL_1,
    4.71 +		"Enabling CR0.PE at %%eip 0x%lx", eip);
    4.72 +	    if (vmx_assist(d, VMX_ASSIST_RESTORE)) {
    4.73 +		clear_bit(VMX_CPU_STATE_ASSIST_ENABLED,
    4.74 +					&d->arch.arch_vmx.cpu_state);
    4.75 +		__vmread(GUEST_EIP, &eip);
    4.76 +		VMX_DBG_LOG(DBG_LEVEL_1,
    4.77 +		    "Restoring to %%eip 0x%lx", eip);
    4.78 +		return 0; /* do not update eip! */
    4.79 +	    }
    4.80 +	}
    4.81 +    }
    4.82 +    return 1;
    4.83 +}
    4.84 +
    4.85  #define CASE_GET_REG(REG, reg)  \
    4.86      case REG_ ## REG: value = regs->reg; break
    4.87  
    4.88 @@ -650,7 +728,6 @@ static int mov_to_cr(int gp, int cr, str
    4.89  {
    4.90      unsigned long value;
    4.91      unsigned long old_cr;
    4.92 -    unsigned long eip;
    4.93      struct exec_domain *d = current;
    4.94  
    4.95      switch (gp) {
    4.96 @@ -675,80 +752,8 @@ static int mov_to_cr(int gp, int cr, str
    4.97      switch(cr) {
    4.98      case 0: 
    4.99      {
   4.100 -        unsigned long old_base_mfn, mfn;
   4.101 -
   4.102 -        /* 
   4.103 -         * CR0:
   4.104 -         * We don't want to lose PE and PG.
   4.105 -         */
   4.106 -        __vmwrite(GUEST_CR0, (value | X86_CR0_PE | X86_CR0_PG));
   4.107 -        __vmwrite(CR0_READ_SHADOW, value);
   4.108 -
   4.109 -        if (value & (X86_CR0_PE | X86_CR0_PG) &&
   4.110 -            !test_bit(VMX_CPU_STATE_PG_ENABLED, &d->arch.arch_vmx.cpu_state)) {
   4.111 -            /*
   4.112 -             * Enable paging
   4.113 -             */
   4.114 -            set_bit(VMX_CPU_STATE_PG_ENABLED, &d->arch.arch_vmx.cpu_state);
   4.115 -            /*
   4.116 -             * The guest CR3 must be pointing to the guest physical.
   4.117 -             */
   4.118 -            if ( !VALID_MFN(mfn = phys_to_machine_mapping(
   4.119 -                                d->arch.arch_vmx.cpu_cr3 >> PAGE_SHIFT)) ||
   4.120 -                 !get_page(pfn_to_page(mfn), d->domain) )
   4.121 -            {
   4.122 -                VMX_DBG_LOG(DBG_LEVEL_VMMU, "Invalid CR3 value = %lx",
   4.123 -                            d->arch.arch_vmx.cpu_cr3);
   4.124 -                domain_crash_synchronous(); /* need to take a clean path */
   4.125 -            }
   4.126 -            old_base_mfn = pagetable_val(d->arch.guest_table) >> PAGE_SHIFT;
   4.127 -            if ( old_base_mfn )
   4.128 -                put_page(pfn_to_page(old_base_mfn));
   4.129 -
   4.130 -            /*
   4.131 -             * Now arch.guest_table points to machine physical.
   4.132 -             */
   4.133 -            d->arch.guest_table = mk_pagetable(mfn << PAGE_SHIFT);
   4.134 -            update_pagetables(d);
   4.135 -
   4.136 -            VMX_DBG_LOG(DBG_LEVEL_VMMU, "New arch.guest_table = %lx", 
   4.137 -                    (unsigned long) (mfn << PAGE_SHIFT));
   4.138 -
   4.139 -            __vmwrite(GUEST_CR3, pagetable_val(d->arch.shadow_table));
   4.140 -            /* 
   4.141 -             * arch->shadow_table should hold the next CR3 for shadow
   4.142 -             */
   4.143 -            VMX_DBG_LOG(DBG_LEVEL_VMMU, "Update CR3 value = %lx, mfn = %lx", 
   4.144 -                    d->arch.arch_vmx.cpu_cr3, mfn);
   4.145 -        } else {
   4.146 -            if ((value & X86_CR0_PE) == 0) {
   4.147 -	        __vmread(GUEST_EIP, &eip);
   4.148 -                VMX_DBG_LOG(DBG_LEVEL_1,
   4.149 -			"Disabling CR0.PE at %%eip 0x%lx", eip);
   4.150 -		if (vmx_assist(d, VMX_ASSIST_INVOKE)) {
   4.151 -		    set_bit(VMX_CPU_STATE_ASSIST_ENABLED,
   4.152 -						&d->arch.arch_vmx.cpu_state);
   4.153 -	            __vmread(GUEST_EIP, &eip);
   4.154 -		    VMX_DBG_LOG(DBG_LEVEL_1,
   4.155 -			"Transfering control to vmxassist %%eip 0x%lx", eip);
   4.156 -		    return 0; /* do not update eip! */
   4.157 -		}
   4.158 -	    } else if (test_bit(VMX_CPU_STATE_ASSIST_ENABLED,
   4.159 -					&d->arch.arch_vmx.cpu_state)) {
   4.160 -		__vmread(GUEST_EIP, &eip);
   4.161 -		VMX_DBG_LOG(DBG_LEVEL_1,
   4.162 -			"Enabling CR0.PE at %%eip 0x%lx", eip);
   4.163 -		if (vmx_assist(d, VMX_ASSIST_RESTORE)) {
   4.164 -		    clear_bit(VMX_CPU_STATE_ASSIST_ENABLED,
   4.165 -						&d->arch.arch_vmx.cpu_state);
   4.166 -		    __vmread(GUEST_EIP, &eip);
   4.167 -		    VMX_DBG_LOG(DBG_LEVEL_1,
   4.168 -			"Restoring to %%eip 0x%lx", eip);
   4.169 -		    return 0; /* do not update eip! */
   4.170 -		}
   4.171 -	    }
   4.172 -	}
   4.173 -        break;
   4.174 +	__vmwrite(CR0_READ_SHADOW, value);
   4.175 +	return vmx_set_cr0(value);
   4.176      }
   4.177      case 3: 
   4.178      {
   4.179 @@ -790,8 +795,8 @@ static int mov_to_cr(int gp, int cr, str
   4.180                  domain_crash_synchronous(); /* need to take a clean path */
   4.181              }
   4.182              old_base_mfn = pagetable_val(d->arch.guest_table) >> PAGE_SHIFT;
   4.183 -            d->arch.guest_table  = mk_pagetable(mfn << PAGE_SHIFT);
   4.184 -            if ( old_base_mfn )
   4.185 +            d->arch.guest_table = mk_pagetable(mfn << PAGE_SHIFT);
   4.186 +            if (old_base_mfn)
   4.187                  put_page(pfn_to_page(old_base_mfn));
   4.188              update_pagetables(d);
   4.189              /* 
   4.190 @@ -893,6 +898,13 @@ static int vmx_cr_access(unsigned long e
   4.191          value &= ~X86_CR0_TS; /* clear TS */
   4.192          __vmwrite(CR0_READ_SHADOW, value);
   4.193          break;
   4.194 +    case TYPE_LMSW:
   4.195 +        __vmwrite(CR0_READ_SHADOW, value);
   4.196 +	value = (value & ~0xF) |
   4.197 +		(((exit_qualification & LMSW_SOURCE_DATA) >> 16) & 0xF) |
   4.198 +		1 /* CR0.PE == 1 */;
   4.199 +	return vmx_set_cr0(value);
   4.200 +        break;
   4.201      default:
   4.202          __vmx_bug(regs);
   4.203          break;
     5.1 --- a/xen/include/asm-x86/vmx.h	Mon Apr 25 07:55:26 2005 +0000
     5.2 +++ b/xen/include/asm-x86/vmx.h	Mon Apr 25 07:55:36 2005 +0000
     5.3 @@ -89,6 +89,7 @@ extern unsigned int cpu_rev;
     5.4  #define TYPE_MOV_TO_CR                  (0 << 4) 
     5.5  #define TYPE_MOV_FROM_CR                (1 << 4)
     5.6  #define TYPE_CLTS                       (2 << 4)
     5.7 +#define	TYPE_LMSW			(3 << 4)
     5.8  #define CONTROL_REG_ACCESS_REG          0x700   /* 10:8, general purpose register */
     5.9  #define REG_EAX                         (0 << 8) 
    5.10  #define REG_ECX                         (1 << 8) 
    5.11 @@ -98,6 +99,7 @@ extern unsigned int cpu_rev;
    5.12  #define REG_EBP                         (5 << 8) 
    5.13  #define REG_ESI                         (6 << 8) 
    5.14  #define REG_EDI                         (7 << 8) 
    5.15 +#define	LMSW_SOURCE_DATA		(0xFFFF << 16) /* 16:31 lmsw source */
    5.16  
    5.17  /*
    5.18   * Exit Qualifications for MOV for Debug Register Access