ia64/xen-unstable

changeset 6612:20140d3fbf83

Attached are the patches for new ioemu communication mechanism. The new
mechanism provides richer I/O operation semantics, such as and,or,xor
operation on MMIO space. This is necessary for operating systems such
as Windows XP and Windows 2003.

This is the second part of a two part patch. This patch applies to xen.

Signed-Off-By: Leendert van Doorn <leendert@watson.ibm.com>
author kaf24@firebug.cl.cam.ac.uk
date Fri Sep 02 17:54:34 2005 +0000 (2005-09-02)
parents ed474440decd
children 0746ef61733b
files xen/arch/x86/vmx.c xen/arch/x86/vmx_intercept.c xen/arch/x86/vmx_io.c xen/arch/x86/vmx_platform.c xen/include/asm-x86/vmx_platform.h xen/include/public/io/ioreq.h
line diff
     1.1 --- a/xen/arch/x86/vmx.c	Fri Sep 02 17:53:52 2005 +0000
     1.2 +++ b/xen/arch/x86/vmx.c	Fri Sep 02 17:54:34 2005 +0000
     1.3 @@ -602,16 +602,67 @@ static int check_for_null_selector(unsig
     1.4      return 0;
     1.5  }
     1.6  
     1.7 +void send_pio_req(struct cpu_user_regs *regs, unsigned long port,
     1.8 +       unsigned long count, int size, long value, int dir, int pvalid)
     1.9 +{
    1.10 +    struct vcpu *v = current;
    1.11 +    vcpu_iodata_t *vio;
    1.12 +    ioreq_t *p;
    1.13 +
    1.14 +    vio = get_vio(v->domain, v->vcpu_id);
    1.15 +    if (vio == NULL) {
    1.16 +        printk("bad shared page: %lx\n", (unsigned long) vio);
    1.17 +        domain_crash_synchronous();
    1.18 +    }
    1.19 +
    1.20 +    if (test_bit(ARCH_VMX_IO_WAIT, &v->arch.arch_vmx.flags)) {
    1.21 +       printf("VMX I/O has not yet completed\n");
    1.22 +       domain_crash_synchronous();
    1.23 +    }
    1.24 +    set_bit(ARCH_VMX_IO_WAIT, &v->arch.arch_vmx.flags);
    1.25 +
    1.26 +    p = &vio->vp_ioreq;
    1.27 +    p->dir = dir;
    1.28 +    p->pdata_valid = pvalid;
    1.29 +
    1.30 +    p->type = IOREQ_TYPE_PIO;
    1.31 +    p->size = size;
    1.32 +    p->addr = port;
    1.33 +    p->count = count;
    1.34 +    p->df = regs->eflags & EF_DF ? 1 : 0;
    1.35 +
    1.36 +    if (pvalid) {
    1.37 +        if (vmx_paging_enabled(current))
    1.38 +            p->u.pdata = (void *) gva_to_gpa(value);
    1.39 +        else
    1.40 +            p->u.pdata = (void *) value; /* guest VA == guest PA */
    1.41 +    } else
    1.42 +        p->u.data = value;
    1.43 +
    1.44 +    p->state = STATE_IOREQ_READY;
    1.45 +
    1.46 +    if (vmx_portio_intercept(p)) {
    1.47 +        /* no blocking & no evtchn notification */
    1.48 +        clear_bit(ARCH_VMX_IO_WAIT, &v->arch.arch_vmx.flags);
    1.49 +        return;
    1.50 +    }
    1.51 +
    1.52 +    evtchn_send(iopacket_port(v->domain));
    1.53 +    vmx_wait_io();
    1.54 +}
    1.55 +
    1.56  static void vmx_io_instruction(struct cpu_user_regs *regs, 
    1.57                     unsigned long exit_qualification, unsigned long inst_len) 
    1.58  {
    1.59 -    struct vcpu *d = current;
    1.60 -    vcpu_iodata_t *vio;
    1.61 -    ioreq_t *p;
    1.62 -    unsigned long addr;
    1.63 +    struct mi_per_cpu_info *mpcip;
    1.64      unsigned long eip, cs, eflags;
    1.65 +    unsigned long port, size, dir;
    1.66      int vm86;
    1.67  
    1.68 +    mpcip = &current->domain->arch.vmx_platform.mpci;
    1.69 +    mpcip->instr = INSTR_PIO;
    1.70 +    mpcip->flags = 0;
    1.71 +
    1.72      __vmread(GUEST_RIP, &eip);
    1.73      __vmread(GUEST_CS_SELECTOR, &cs);
    1.74      __vmread(GUEST_RFLAGS, &eflags);
    1.75 @@ -623,80 +674,57 @@ static void vmx_io_instruction(struct cp
    1.76                  vm86, cs, eip, exit_qualification);
    1.77  
    1.78      if (test_bit(6, &exit_qualification))
    1.79 -        addr = (exit_qualification >> 16) & (0xffff);
    1.80 +        port = (exit_qualification >> 16) & 0xFFFF;
    1.81      else
    1.82 -        addr = regs->edx & 0xffff;
    1.83 -    TRACE_VMEXIT (2,addr);
    1.84 -
    1.85 -    vio = get_vio(d->domain, d->vcpu_id);
    1.86 -    if (vio == 0) {
    1.87 -        printk("bad shared page: %lx", (unsigned long) vio);
    1.88 -        domain_crash_synchronous(); 
    1.89 -    }
    1.90 -    p = &vio->vp_ioreq;
    1.91 -    p->dir = test_bit(3, &exit_qualification); /* direction */
    1.92 -
    1.93 -    p->pdata_valid = 0;
    1.94 -    p->count = 1;
    1.95 -    p->size = (exit_qualification & 7) + 1;
    1.96 +        port = regs->edx & 0xffff;
    1.97 +    TRACE_VMEXIT(2, port);
    1.98 +    size = (exit_qualification & 7) + 1;
    1.99 +    dir = test_bit(3, &exit_qualification); /* direction */
   1.100  
   1.101      if (test_bit(4, &exit_qualification)) { /* string instruction */
   1.102 -	unsigned long laddr;
   1.103 +	unsigned long addr, count = 1;
   1.104 +	int sign = regs->eflags & EF_DF ? -1 : 1;
   1.105  
   1.106 -	__vmread(GUEST_LINEAR_ADDRESS, &laddr);
   1.107 +	__vmread(GUEST_LINEAR_ADDRESS, &addr);
   1.108 +
   1.109          /*
   1.110           * In protected mode, guest linear address is invalid if the
   1.111           * selector is null.
   1.112           */
   1.113 -        if (!vm86 && check_for_null_selector(eip)) {
   1.114 -            laddr = (p->dir == IOREQ_WRITE) ? regs->esi : regs->edi;
   1.115 -        }
   1.116 -        p->pdata_valid = 1;
   1.117 -
   1.118 -        p->u.data = laddr;
   1.119 -        if (vmx_paging_enabled(d))
   1.120 -                p->u.pdata = (void *) gva_to_gpa(p->u.data);
   1.121 -        p->df = (eflags & X86_EFLAGS_DF) ? 1 : 0;
   1.122 -
   1.123 -        if (test_bit(5, &exit_qualification)) /* "rep" prefix */
   1.124 -            p->count = vm86 ? regs->ecx & 0xFFFF : regs->ecx;
   1.125 +        if (!vm86 && check_for_null_selector(eip))
   1.126 +            addr = dir == IOREQ_WRITE ? regs->esi : regs->edi;
   1.127  
   1.128 -        /*
   1.129 -         * Split up string I/O operations that cross page boundaries. Don't
   1.130 -         * advance %eip so that "rep insb" will restart at the next page.
   1.131 -         */
   1.132 -        if ((p->u.data & PAGE_MASK) != 
   1.133 -		((p->u.data + p->count * p->size - 1) & PAGE_MASK)) {
   1.134 -	    VMX_DBG_LOG(DBG_LEVEL_2,
   1.135 -		"String I/O crosses page boundary (cs:eip=0x%lx:0x%lx)\n",
   1.136 -		cs, eip);
   1.137 -            if (p->u.data & (p->size - 1)) {
   1.138 -		printf("Unaligned string I/O operation (cs:eip=0x%lx:0x%lx)\n",
   1.139 -			cs, eip);
   1.140 -                domain_crash_synchronous();     
   1.141 -            }
   1.142 -            p->count = (PAGE_SIZE - (p->u.data & ~PAGE_MASK)) / p->size;
   1.143 -        } else {
   1.144 -            __update_guest_eip(inst_len);
   1.145 -        }
   1.146 -    } else if (p->dir == IOREQ_WRITE) {
   1.147 -        p->u.data = regs->eax;
   1.148 +        if (test_bit(5, &exit_qualification)) { /* "rep" prefix */
   1.149 +	    mpcip->flags |= REPZ;
   1.150 +	    count = vm86 ? regs->ecx & 0xFFFF : regs->ecx;
   1.151 +	}
   1.152 +
   1.153 +	/*
   1.154 +	 * Handle string pio instructions that cross pages or that
   1.155 +	 * are unaligned. See the comments in vmx_platform.c/handle_mmio()
   1.156 +	 */
   1.157 +	if ((addr & PAGE_MASK) != ((addr + size - 1) & PAGE_MASK)) {
   1.158 +	    unsigned long value = 0;
   1.159 +
   1.160 +	    mpcip->flags |= OVERLAP;
   1.161 +	    if (dir == IOREQ_WRITE)
   1.162 +		vmx_copy(&value, addr, size, VMX_COPY_IN);
   1.163 +	    send_pio_req(regs, port, 1, size, value, dir, 0);
   1.164 +	} else {
   1.165 +	    if ((addr & PAGE_MASK) != ((addr + count * size - 1) & PAGE_MASK)) {
   1.166 +                if (sign > 0)
   1.167 +                    count = (PAGE_SIZE - (addr & ~PAGE_MASK)) / size;
   1.168 +                else
   1.169 +                    count = (addr & ~PAGE_MASK) / size;
   1.170 +	    } else
   1.171 +		__update_guest_eip(inst_len);
   1.172 +
   1.173 +	    send_pio_req(regs, port, count, size, addr, dir, 1);
   1.174 +	}
   1.175 +    } else {
   1.176          __update_guest_eip(inst_len);
   1.177 -    } else
   1.178 -        __update_guest_eip(inst_len);
   1.179 -
   1.180 -    p->addr = addr;
   1.181 -    p->port_mm = 0;
   1.182 -
   1.183 -    /* Check if the packet needs to be intercepted */
   1.184 -    if (vmx_portio_intercept(p))
   1.185 -	/* no blocking & no evtchn notification */
   1.186 -        return;
   1.187 -
   1.188 -    set_bit(ARCH_VMX_IO_WAIT, &d->arch.arch_vmx.flags);
   1.189 -    p->state = STATE_IOREQ_READY;
   1.190 -    evtchn_send(iopacket_port(d->domain));
   1.191 -    vmx_wait_io();
   1.192 +	send_pio_req(regs, port, 1, size, regs->eax, dir, 0);
   1.193 +    }
   1.194  }
   1.195  
   1.196  int
     2.1 --- a/xen/arch/x86/vmx_intercept.c	Fri Sep 02 17:53:52 2005 +0000
     2.2 +++ b/xen/arch/x86/vmx_intercept.c	Fri Sep 02 17:54:34 2005 +0000
     2.3 @@ -172,7 +172,7 @@ int intercept_pit_io(ioreq_t *p)
     2.4  
     2.5      if (p->size != 1 ||
     2.6          p->pdata_valid ||
     2.7 -        p->port_mm)
     2.8 +	p->type != IOREQ_TYPE_PIO)
     2.9          return 0;
    2.10      
    2.11      if (p->addr == PIT_MODE &&
    2.12 @@ -284,7 +284,5 @@ void vmx_hooks_assist(struct vcpu *d)
    2.13          if (!reinit)
    2.14  	    register_portio_handler(0x40, 4, intercept_pit_io); 
    2.15      }
    2.16 -
    2.17  }
    2.18 -
    2.19  #endif /* CONFIG_VMX */
     3.1 --- a/xen/arch/x86/vmx_io.c	Fri Sep 02 17:53:52 2005 +0000
     3.2 +++ b/xen/arch/x86/vmx_io.c	Fri Sep 02 17:54:34 2005 +0000
     3.3 @@ -33,6 +33,7 @@
     3.4  #include <asm/vmx_platform.h>
     3.5  #include <asm/vmx_virpit.h>
     3.6  #include <asm/apic.h>
     3.7 +#include <asm/shadow.h>
     3.8  
     3.9  #include <public/io/ioreq.h>
    3.10  #include <public/io/vmx_vlapic.h>
    3.11 @@ -123,7 +124,6 @@ static void set_reg_value (int size, int
    3.12              regs->esp &= 0xFFFF0000;
    3.13              regs->esp |= (value & 0xFFFF);
    3.14              break;
    3.15 -
    3.16          case 5:
    3.17              regs->ebp &= 0xFFFF0000;
    3.18              regs->ebp |= (value & 0xFFFF);
    3.19 @@ -207,7 +207,6 @@ static inline void __set_reg_value(unsig
    3.20              *reg &= ~0xFFFF;
    3.21              *reg |= (value & 0xFFFF);
    3.22              break;
    3.23 -
    3.24          case LONG:
    3.25              *reg &= ~0xFFFFFFFF;
    3.26              *reg |= (value & 0xFFFFFFFF);
    3.27 @@ -322,13 +321,319 @@ static void set_reg_value (int size, int
    3.28  }
    3.29  #endif
    3.30  
    3.31 +extern long get_reg_value(int size, int index, int seg, struct cpu_user_regs *regs);
    3.32 +
    3.33 +static inline void set_eflags_CF(int size, unsigned long v1,
    3.34 +	unsigned long v2, struct cpu_user_regs *regs)
    3.35 +{
    3.36 +    unsigned long mask = (1 << (8 * size)) - 1;
    3.37 +
    3.38 +    if ((v1 & mask) > (v2 & mask))
    3.39 +	regs->eflags |= X86_EFLAGS_CF;
    3.40 +    else
    3.41 +	regs->eflags &= ~X86_EFLAGS_CF;
    3.42 +}
    3.43 +
    3.44 +static inline void set_eflags_OF(int size, unsigned long v1,
    3.45 +	unsigned long v2, unsigned long v3, struct cpu_user_regs *regs)
    3.46 +{
    3.47 +    if ((v3 ^ v2) & (v3 ^ v1) & (1 << ((8 * size) - 1)))
    3.48 +	regs->eflags |= X86_EFLAGS_OF;
    3.49 +}
    3.50 +
    3.51 +static inline void set_eflags_AF(int size, unsigned long v1,
    3.52 +	unsigned long v2, unsigned long v3, struct cpu_user_regs *regs)
    3.53 +{
    3.54 +    if ((v1 ^ v2 ^ v3) & 0x10)
    3.55 +	regs->eflags |= X86_EFLAGS_AF;
    3.56 +}
    3.57 +
    3.58 +static inline void set_eflags_ZF(int size, unsigned long v1,
    3.59 +	struct cpu_user_regs *regs)
    3.60 +{
    3.61 +    unsigned long mask = (1 << (8 * size)) - 1;
    3.62 +
    3.63 +    if ((v1 & mask) == 0)
    3.64 +	regs->eflags |= X86_EFLAGS_ZF;
    3.65 +}
    3.66 +
    3.67 +static inline void set_eflags_SF(int size, unsigned long v1,
    3.68 +	struct cpu_user_regs *regs)
    3.69 +{
    3.70 +    if (v1 & (1 << ((8 * size) - 1)))
    3.71 +	regs->eflags |= X86_EFLAGS_SF;
    3.72 +}
    3.73 +
    3.74 +static char parity_table[256] = {
    3.75 +    1, 0, 0, 1, 0, 1, 1, 0, 0, 1, 1, 0, 1, 0, 0, 1,
    3.76 +    0, 1, 1, 0, 1, 0, 0, 1, 1, 0, 0, 1, 0, 1, 1, 0,
    3.77 +    0, 1, 1, 0, 1, 0, 0, 1, 1, 0, 0, 1, 0, 1, 1, 0,
    3.78 +    1, 0, 0, 1, 0, 1, 1, 0, 0, 1, 1, 0, 1, 0, 0, 1,
    3.79 +    0, 1, 1, 0, 1, 0, 0, 1, 1, 0, 0, 1, 0, 1, 1, 0,
    3.80 +    1, 0, 0, 1, 0, 1, 1, 0, 0, 1, 1, 0, 1, 0, 0, 1,
    3.81 +    1, 0, 0, 1, 0, 1, 1, 0, 0, 1, 1, 0, 1, 0, 0, 1,
    3.82 +    0, 1, 1, 0, 1, 0, 0, 1, 1, 0, 0, 1, 0, 1, 1, 0,
    3.83 +    0, 1, 1, 0, 1, 0, 0, 1, 1, 0, 0, 1, 0, 1, 1, 0,
    3.84 +    1, 0, 0, 1, 0, 1, 1, 0, 0, 1, 1, 0, 1, 0, 0, 1,
    3.85 +    1, 0, 0, 1, 0, 1, 1, 0, 0, 1, 1, 0, 1, 0, 0, 1,
    3.86 +    0, 1, 1, 0, 1, 0, 0, 1, 1, 0, 0, 1, 0, 1, 1, 0,
    3.87 +    1, 0, 0, 1, 0, 1, 1, 0, 0, 1, 1, 0, 1, 0, 0, 1,
    3.88 +    0, 1, 1, 0, 1, 0, 0, 1, 1, 0, 0, 1, 0, 1, 1, 0,
    3.89 +    0, 1, 1, 0, 1, 0, 0, 1, 1, 0, 0, 1, 0, 1, 1, 0,
    3.90 +    1, 0, 0, 1, 0, 1, 1, 0, 0, 1, 1, 0, 1, 0, 0, 1
    3.91 +};
    3.92 +
    3.93 +static inline void set_eflags_PF(int size, unsigned long v1,
    3.94 +	struct cpu_user_regs *regs)
    3.95 +{
    3.96 +    if (parity_table[v1 & 0xFF])
    3.97 +	regs->eflags |= X86_EFLAGS_PF;
    3.98 +}
    3.99 +
   3.100 +static void vmx_pio_assist(struct cpu_user_regs *regs, ioreq_t *p,
   3.101 +					struct mi_per_cpu_info *mpcip)
   3.102 +{
   3.103 +    unsigned long old_eax;
   3.104 +    int sign = p->df ? -1 : 1;
   3.105 +
   3.106 +    if (p->dir == IOREQ_WRITE) {
   3.107 +        if (p->pdata_valid) {
   3.108 +            regs->esi += sign * p->count * p->size;
   3.109 +	    if (mpcip->flags & REPZ)
   3.110 +		regs->ecx -= p->count;
   3.111 +        }
   3.112 +    } else {
   3.113 +	if (mpcip->flags & OVERLAP) {
   3.114 +	    unsigned long addr;
   3.115 +
   3.116 +            regs->edi += sign * p->count * p->size;
   3.117 +	    if (mpcip->flags & REPZ)
   3.118 +		regs->ecx -= p->count;
   3.119 +
   3.120 +	    addr = regs->edi;
   3.121 +	    if (sign > 0)
   3.122 +		addr -= p->size;
   3.123 +	    vmx_copy(&p->u.data, addr, p->size, VMX_COPY_OUT);
   3.124 +	} else if (p->pdata_valid) {
   3.125 +            regs->edi += sign * p->count * p->size;
   3.126 +	    if (mpcip->flags & REPZ)
   3.127 +		regs->ecx -= p->count;
   3.128 +        } else {
   3.129 +	    old_eax = regs->eax;
   3.130 +	    switch (p->size) {
   3.131 +            case 1:
   3.132 +                regs->eax = (old_eax & 0xffffff00) | (p->u.data & 0xff);
   3.133 +                break;
   3.134 +            case 2:
   3.135 +                regs->eax = (old_eax & 0xffff0000) | (p->u.data & 0xffff);
   3.136 +                break;
   3.137 +            case 4:
   3.138 +                regs->eax = (p->u.data & 0xffffffff);
   3.139 +                break;
   3.140 +            default:
   3.141 +		printk("Error: %s unknown port size\n", __FUNCTION__);
   3.142 +		domain_crash_synchronous();
   3.143 +	    }
   3.144 +    	}
   3.145 +    }
   3.146 +}
   3.147 +
   3.148 +static void vmx_mmio_assist(struct cpu_user_regs *regs, ioreq_t *p,
   3.149 +					struct mi_per_cpu_info *mpcip)
   3.150 +{
   3.151 +    int sign = p->df ? -1 : 1;
   3.152 +    int size = -1, index = -1;
   3.153 +    unsigned long value = 0, diff = 0;
   3.154 +    unsigned long src, dst;
   3.155 +
   3.156 +    src = mpcip->operand[0];
   3.157 +    dst = mpcip->operand[1];
   3.158 +    size = operand_size(src);
   3.159 +
   3.160 +    switch (mpcip->instr) {
   3.161 +    case INSTR_MOV:
   3.162 +	if (dst & REGISTER) {
   3.163 +	    index = operand_index(dst);
   3.164 +	    set_reg_value(size, index, 0, regs, p->u.data);
   3.165 +	}
   3.166 +	break;
   3.167 +
   3.168 +    case INSTR_MOVZ:
   3.169 +	if (dst & REGISTER) {
   3.170 +	    index = operand_index(dst);
   3.171 +	    switch (size) {
   3.172 +	    case BYTE: p->u.data = p->u.data & 0xFFULL; break;
   3.173 +	    case WORD: p->u.data = p->u.data & 0xFFFFULL; break;
   3.174 +	    case LONG: p->u.data = p->u.data & 0xFFFFFFFFULL; break;
   3.175 +	    }
   3.176 +	    set_reg_value(operand_size(dst), index, 0, regs, p->u.data);
   3.177 +	}
   3.178 +	break;
   3.179 +
   3.180 +    case INSTR_MOVS:
   3.181 +	sign = p->df ? -1 : 1;
   3.182 +	regs->esi += sign * p->count * p->size;
   3.183 +	regs->edi += sign * p->count * p->size;
   3.184 +
   3.185 +	if ((mpcip->flags & OVERLAP) && p->dir == IOREQ_READ) {
   3.186 +	    unsigned long addr = regs->edi;
   3.187 +
   3.188 +	    if (sign > 0)
   3.189 +		addr -= p->size;
   3.190 +	    vmx_copy(&p->u.data, addr, p->size, VMX_COPY_OUT);
   3.191 +	}
   3.192 +
   3.193 +	if (mpcip->flags & REPZ)
   3.194 +	    regs->ecx -= p->count;
   3.195 +	break;
   3.196 +
   3.197 +    case INSTR_STOS:
   3.198 +	sign = p->df ? -1 : 1;
   3.199 +	regs->edi += sign * p->count * p->size;
   3.200 +	if (mpcip->flags & REPZ)
   3.201 +	    regs->ecx -= p->count;
   3.202 +	break;
   3.203 +
   3.204 +    case INSTR_AND:
   3.205 +	if (src & REGISTER) {
   3.206 +	    index = operand_index(src);
   3.207 +	    value = get_reg_value(size, index, 0, regs);
   3.208 +	    diff = (unsigned long) p->u.data & value;
   3.209 +	} else if (src & IMMEDIATE) {
   3.210 +	    value = mpcip->immediate;
   3.211 +	    diff = (unsigned long) p->u.data & value;
   3.212 +	} else if (src & MEMORY) {
   3.213 +	    index = operand_index(dst);
   3.214 +	    value = get_reg_value(size, index, 0, regs);
   3.215 +	    diff = (unsigned long) p->u.data & value;
   3.216 +	    set_reg_value(size, index, 0, regs, diff);
   3.217 +	}
   3.218 +
   3.219 +	/*
   3.220 +	 * The OF and CF flags are cleared; the SF, ZF, and PF
   3.221 +	 * flags are set according to the result. The state of
   3.222 +	 * the AF flag is undefined.
   3.223 +	 */
   3.224 +	regs->eflags &= ~(X86_EFLAGS_CF|X86_EFLAGS_PF|
   3.225 +			  X86_EFLAGS_ZF|X86_EFLAGS_SF|X86_EFLAGS_OF);
   3.226 +	set_eflags_ZF(size, diff, regs);
   3.227 +	set_eflags_SF(size, diff, regs);
   3.228 +	set_eflags_PF(size, diff, regs);
   3.229 +	break;
   3.230 +
   3.231 +    case INSTR_OR:
   3.232 +	if (src & REGISTER) {
   3.233 +	    index = operand_index(src);
   3.234 +	    value = get_reg_value(size, index, 0, regs);
   3.235 +	    diff = (unsigned long) p->u.data | value;
   3.236 +	} else if (src & IMMEDIATE) {
   3.237 +	    value = mpcip->immediate;
   3.238 +	    diff = (unsigned long) p->u.data | value;
   3.239 +	} else if (src & MEMORY) {
   3.240 +	    index = operand_index(dst);
   3.241 +	    value = get_reg_value(size, index, 0, regs);
   3.242 +	    diff = (unsigned long) p->u.data | value;
   3.243 +	    set_reg_value(size, index, 0, regs, diff);
   3.244 +	}
   3.245 +
   3.246 +	/*
   3.247 +	 * The OF and CF flags are cleared; the SF, ZF, and PF
   3.248 +	 * flags are set according to the result. The state of
   3.249 +	 * the AF flag is undefined.
   3.250 +	 */
   3.251 +	regs->eflags &= ~(X86_EFLAGS_CF|X86_EFLAGS_PF|
   3.252 +			  X86_EFLAGS_ZF|X86_EFLAGS_SF|X86_EFLAGS_OF);
   3.253 +	set_eflags_ZF(size, diff, regs);
   3.254 +	set_eflags_SF(size, diff, regs);
   3.255 +	set_eflags_PF(size, diff, regs);
   3.256 +	break;
   3.257 +
   3.258 +    case INSTR_XOR:
   3.259 +	if (src & REGISTER) {
   3.260 +	    index = operand_index(src);
   3.261 +	    value = get_reg_value(size, index, 0, regs);
   3.262 +	    diff = (unsigned long) p->u.data ^ value;
   3.263 +	} else if (src & IMMEDIATE) {
   3.264 +	    value = mpcip->immediate;
   3.265 +	    diff = (unsigned long) p->u.data ^ value;
   3.266 +	} else if (src & MEMORY) {
   3.267 +	    index = operand_index(dst);
   3.268 +	    value = get_reg_value(size, index, 0, regs);
   3.269 +	    diff = (unsigned long) p->u.data ^ value;
   3.270 +	    set_reg_value(size, index, 0, regs, diff);
   3.271 +	}
   3.272 +
   3.273 +	/*
   3.274 +	 * The OF and CF flags are cleared; the SF, ZF, and PF
   3.275 +	 * flags are set according to the result. The state of
   3.276 +	 * the AF flag is undefined.
   3.277 +	 */
   3.278 +	regs->eflags &= ~(X86_EFLAGS_CF|X86_EFLAGS_PF|
   3.279 +			  X86_EFLAGS_ZF|X86_EFLAGS_SF|X86_EFLAGS_OF);
   3.280 +	set_eflags_ZF(size, diff, regs);
   3.281 +	set_eflags_SF(size, diff, regs);
   3.282 +	set_eflags_PF(size, diff, regs);
   3.283 +	break;
   3.284 +
   3.285 +    case INSTR_CMP:
   3.286 +	if (src & REGISTER) {
   3.287 +	    index = operand_index(src);
   3.288 +	    value = get_reg_value(size, index, 0, regs);
   3.289 +	    diff = (unsigned long) p->u.data - value;
   3.290 +	} else if (src & IMMEDIATE) {
   3.291 +	    value = mpcip->immediate;
   3.292 +	    diff = (unsigned long) p->u.data - value;
   3.293 +	} else if (src & MEMORY) {
   3.294 +	    index = operand_index(dst);
   3.295 +	    value = get_reg_value(size, index, 0, regs);
   3.296 +	    diff = value - (unsigned long) p->u.data;
   3.297 +	}
   3.298 +
   3.299 +	/*
   3.300 +	 * The CF, OF, SF, ZF, AF, and PF flags are set according
   3.301 +	 * to the result
   3.302 +	 */
   3.303 +	regs->eflags &= ~(X86_EFLAGS_CF|X86_EFLAGS_PF|X86_EFLAGS_AF|
   3.304 +			  X86_EFLAGS_ZF|X86_EFLAGS_SF|X86_EFLAGS_OF);
   3.305 +	set_eflags_CF(size, value, (unsigned long) p->u.data, regs);
   3.306 +	set_eflags_OF(size, diff, value, (unsigned long) p->u.data, regs);
   3.307 +	set_eflags_AF(size, diff, value, (unsigned long) p->u.data, regs);
   3.308 +	set_eflags_ZF(size, diff, regs);
   3.309 +	set_eflags_SF(size, diff, regs);
   3.310 +	set_eflags_PF(size, diff, regs);
   3.311 +	break;
   3.312 +
   3.313 +    case INSTR_TEST:
   3.314 +	if (src & REGISTER) {
   3.315 +	    index = operand_index(src);
   3.316 +	    value = get_reg_value(size, index, 0, regs);
   3.317 +	} else if (src & IMMEDIATE) {
   3.318 +	    value = mpcip->immediate;
   3.319 +	} else if (src & MEMORY) {
   3.320 +	    index = operand_index(dst);
   3.321 +	    value = get_reg_value(size, index, 0, regs);
   3.322 +	}
   3.323 +	diff = (unsigned long) p->u.data & value;
   3.324 +
   3.325 +	/*
   3.326 +	 * Sets the SF, ZF, and PF status flags. CF and OF are set to 0
   3.327 +	 */
   3.328 +	regs->eflags &= ~(X86_EFLAGS_CF|X86_EFLAGS_PF|
   3.329 +			  X86_EFLAGS_ZF|X86_EFLAGS_SF|X86_EFLAGS_OF);
   3.330 +	set_eflags_ZF(size, diff, regs);
   3.331 +	set_eflags_SF(size, diff, regs);
   3.332 +	set_eflags_PF(size, diff, regs);
   3.333 +	break;
   3.334 +    }
   3.335 +
   3.336 +    load_cpu_user_regs(regs);
   3.337 +}
   3.338 +
   3.339  void vmx_io_assist(struct vcpu *v) 
   3.340  {
   3.341      vcpu_iodata_t *vio;
   3.342      ioreq_t *p;
   3.343      struct cpu_user_regs *regs = guest_cpu_user_regs();
   3.344 -    unsigned long old_eax;
   3.345 -    int sign;
   3.346      struct mi_per_cpu_info *mpci_p;
   3.347      struct cpu_user_regs *inst_decoder_regs;
   3.348  
   3.349 @@ -340,80 +645,26 @@ void vmx_io_assist(struct vcpu *v)
   3.350      if (vio == 0) {
   3.351          VMX_DBG_LOG(DBG_LEVEL_1, 
   3.352                      "bad shared page: %lx", (unsigned long) vio);
   3.353 +	printf("bad shared page: %lx\n", (unsigned long) vio);
   3.354          domain_crash_synchronous();
   3.355      }
   3.356 -    p = &vio->vp_ioreq;
   3.357  
   3.358 -    if (p->state == STATE_IORESP_HOOK){
   3.359 +    p = &vio->vp_ioreq;
   3.360 +    if (p->state == STATE_IORESP_HOOK)
   3.361          vmx_hooks_assist(v);
   3.362 -    }
   3.363  
   3.364      /* clear IO wait VMX flag */
   3.365      if (test_bit(ARCH_VMX_IO_WAIT, &v->arch.arch_vmx.flags)) {
   3.366 -        if (p->state != STATE_IORESP_READY) {
   3.367 -                /* An interrupt send event raced us */
   3.368 -                return;
   3.369 -        } else {
   3.370 -            p->state = STATE_INVALID;
   3.371 -        }
   3.372 -        clear_bit(ARCH_VMX_IO_WAIT, &v->arch.arch_vmx.flags);
   3.373 -    } else {
   3.374 -        return;
   3.375 -    }
   3.376 -
   3.377 -    sign = (p->df) ? -1 : 1;
   3.378 -    if (p->port_mm) {
   3.379 -        if (p->pdata_valid) {
   3.380 -            regs->esi += sign * p->count * p->size;
   3.381 -            regs->edi += sign * p->count * p->size;
   3.382 -        } else {
   3.383 -            if (p->dir == IOREQ_WRITE) {
   3.384 -                return;
   3.385 -            }
   3.386 -            int size = -1, index = -1;
   3.387 -
   3.388 -            size = operand_size(v->domain->arch.vmx_platform.mpci.mmio_target);
   3.389 -            index = operand_index(v->domain->arch.vmx_platform.mpci.mmio_target);
   3.390 -
   3.391 -            if (v->domain->arch.vmx_platform.mpci.mmio_target & WZEROEXTEND) {
   3.392 -                p->u.data = p->u.data & 0xffff;
   3.393 -            }        
   3.394 -            set_reg_value(size, index, 0, regs, p->u.data);
   3.395 +        if (p->state == STATE_IORESP_READY) {
   3.396 +	    p->state = STATE_INVALID;
   3.397 +            clear_bit(ARCH_VMX_IO_WAIT, &v->arch.arch_vmx.flags);
   3.398  
   3.399 -        }
   3.400 -        load_cpu_user_regs(regs);
   3.401 -        return;
   3.402 -    }
   3.403 -
   3.404 -    if (p->dir == IOREQ_WRITE) {
   3.405 -        if (p->pdata_valid) {
   3.406 -            regs->esi += sign * p->count * p->size;
   3.407 -            regs->ecx -= p->count;
   3.408 -        }
   3.409 -        return;
   3.410 -    } else {
   3.411 -        if (p->pdata_valid) {
   3.412 -            regs->edi += sign * p->count * p->size;
   3.413 -            regs->ecx -= p->count;
   3.414 -            return;
   3.415 -        }
   3.416 -    }
   3.417 -
   3.418 -    old_eax = regs->eax;
   3.419 -
   3.420 -    switch(p->size) {
   3.421 -    case 1:
   3.422 -        regs->eax = (old_eax & 0xffffff00) | (p->u.data & 0xff);
   3.423 -        break;
   3.424 -    case 2:
   3.425 -        regs->eax = (old_eax & 0xffff0000) | (p->u.data & 0xffff);
   3.426 -        break;
   3.427 -    case 4:
   3.428 -        regs->eax = (p->u.data & 0xffffffff);
   3.429 -        break;
   3.430 -    default:
   3.431 -        printk("Error: %s unknwon port size\n", __FUNCTION__);
   3.432 -        domain_crash_synchronous();
   3.433 +	    if (p->type == IOREQ_TYPE_PIO)
   3.434 +		vmx_pio_assist(regs, p, mpci_p);
   3.435 +	    else
   3.436 +		vmx_mmio_assist(regs, p, mpci_p);
   3.437 +	}
   3.438 +	/* else an interrupt send event raced us */
   3.439      }
   3.440  }
   3.441  
   3.442 @@ -456,8 +707,9 @@ void vmx_wait_io()
   3.443      int port = iopacket_port(current->domain);
   3.444  
   3.445      do {
   3.446 -        if(!test_bit(port, &current->domain->shared_info->evtchn_pending[0]))
   3.447 +        if (!test_bit(port, &current->domain->shared_info->evtchn_pending[0]))
   3.448              do_block();
   3.449 +
   3.450          vmx_check_events(current);
   3.451          if (!test_bit(ARCH_VMX_IO_WAIT, &current->arch.arch_vmx.flags))
   3.452              break;
     4.1 --- a/xen/arch/x86/vmx_platform.c	Fri Sep 02 17:53:52 2005 +0000
     4.2 +++ b/xen/arch/x86/vmx_platform.c	Fri Sep 02 17:54:34 2005 +0000
     4.3 @@ -64,37 +64,37 @@ static inline long __get_reg_value(unsig
     4.4          case QUAD:
     4.5              return (long)(reg);
     4.6          default:
     4.7 -            printk("Error: <__get_reg_value>Invalid reg size\n");
     4.8 +	printf("Error: (__get_reg_value) Invalid reg size\n");
     4.9              domain_crash_synchronous();
    4.10      }
    4.11  }
    4.12  
    4.13 -static long get_reg_value(int size, int index, int seg, struct cpu_user_regs *regs) 
    4.14 +long get_reg_value(int size, int index, int seg, struct cpu_user_regs *regs) 
    4.15  {
    4.16      if (size == BYTE) {
    4.17          switch (index) { 
    4.18 -            case 0: //%al
    4.19 +	case 0: /* %al */
    4.20                  return (char)(regs->rax & 0xFF);
    4.21 -            case 1: //%cl  
    4.22 +	case 1: /* %cl */
    4.23                  return (char)(regs->rcx & 0xFF);
    4.24 -            case 2: //%dl
    4.25 +	case 2: /* %dl */
    4.26                  return (char)(regs->rdx & 0xFF); 
    4.27 -            case 3: //%bl
    4.28 +	case 3: /* %bl */
    4.29                  return (char)(regs->rbx & 0xFF);
    4.30 -            case 4: //%ah
    4.31 +	case 4: /* %ah */
    4.32                  return (char)((regs->rax & 0xFF00) >> 8);
    4.33 -            case 5: //%ch 
    4.34 +	case 5: /* %ch */
    4.35                  return (char)((regs->rcx & 0xFF00) >> 8);
    4.36 -            case 6: //%dh
    4.37 +	case 6: /* %dh */
    4.38                  return (char)((regs->rdx & 0xFF00) >> 8);
    4.39 -            case 7: //%bh
    4.40 +	case 7: /* %bh */
    4.41                  return (char)((regs->rbx & 0xFF00) >> 8);
    4.42              default:
    4.43 -                printk("Error: (get_reg_value)Invalid index value\n"); 
    4.44 +	    printf("Error: (get_reg_value) Invalid index value\n"); 
    4.45                  domain_crash_synchronous();
    4.46          }
    4.47 +    }
    4.48  
    4.49 -    }
    4.50      switch (index) {
    4.51          case 0: return __get_reg_value(regs->rax, size);
    4.52          case 1: return __get_reg_value(regs->rcx, size);
    4.53 @@ -113,7 +113,7 @@ static long get_reg_value(int size, int 
    4.54          case 14: return __get_reg_value(regs->r14, size);
    4.55          case 15: return __get_reg_value(regs->r15, size);
    4.56          default:
    4.57 -            printk("Error: (get_reg_value)Invalid index value\n"); 
    4.58 +	printf("Error: (get_reg_value) Invalid index value\n"); 
    4.59              domain_crash_synchronous();
    4.60      }
    4.61  }
    4.62 @@ -129,117 +129,91 @@ void store_cpu_user_regs(struct cpu_user
    4.63      __vmread(GUEST_RIP, &regs->eip);
    4.64  }
    4.65  
    4.66 -static long get_reg_value(int size, int index, int seg, struct cpu_user_regs *regs)
    4.67 +static inline long __get_reg_value(unsigned long reg, int size)
    4.68  {                    
    4.69 -    /*               
    4.70 -     * Reference the db_reg[] table
    4.71 -     */              
    4.72 -    switch (size) {  
    4.73 -    case BYTE: 
    4.74 +    switch(size) {
    4.75 +    case WORD:
    4.76 +	return (short)(reg & 0xFFFF);
    4.77 +    case LONG:
    4.78 +	return (int)(reg & 0xFFFFFFFF);
    4.79 +    default:
    4.80 +	printf("Error: (__get_reg_value) Invalid reg size\n");
    4.81 +	domain_crash_synchronous();
    4.82 +    }
    4.83 +}
    4.84 +
    4.85 +long get_reg_value(int size, int index, int seg, struct cpu_user_regs *regs)
    4.86 +{                    
    4.87 +    if (size == BYTE) {
    4.88          switch (index) { 
    4.89 -        case 0: //%al
    4.90 +	case 0: /* %al */
    4.91              return (char)(regs->eax & 0xFF);
    4.92 -        case 1: //%cl  
    4.93 +	case 1: /* %cl */
    4.94              return (char)(regs->ecx & 0xFF);
    4.95 -        case 2: //%dl
    4.96 +	case 2: /* %dl */
    4.97              return (char)(regs->edx & 0xFF); 
    4.98 -        case 3: //%bl
    4.99 +	case 3: /* %bl */
   4.100              return (char)(regs->ebx & 0xFF);
   4.101 -        case 4: //%ah
   4.102 +	case 4: /* %ah */
   4.103              return (char)((regs->eax & 0xFF00) >> 8);
   4.104 -        case 5: //%ch 
   4.105 +	case 5: /* %ch */
   4.106              return (char)((regs->ecx & 0xFF00) >> 8);
   4.107 -        case 6: //%dh
   4.108 +	case 6: /* %dh */
   4.109              return (char)((regs->edx & 0xFF00) >> 8);
   4.110 -        case 7: //%bh
   4.111 +	case 7: /* %bh */
   4.112              return (char)((regs->ebx & 0xFF00) >> 8);
   4.113          default:
   4.114 -            printk("Error: (get_reg_value)size case 0 error\n"); 
   4.115 +	    printf("Error: (get_reg_value) Invalid index value\n"); 
   4.116              domain_crash_synchronous();
   4.117          }
   4.118 -    case WORD:
   4.119 -        switch (index) {
   4.120 -        case 0: //%ax
   4.121 -            return (short)(regs->eax & 0xFFFF);
   4.122 -        case 1: //%cx
   4.123 -            return (short)(regs->ecx & 0xFFFF);
   4.124 -        case 2: //%dx
   4.125 -            return (short)(regs->edx & 0xFFFF);
   4.126 -        case 3: //%bx
   4.127 -            return (short)(regs->ebx & 0xFFFF);
   4.128 -        case 4: //%sp
   4.129 -            return (short)(regs->esp & 0xFFFF);
   4.130 -            break;
   4.131 -        case 5: //%bp
   4.132 -            return (short)(regs->ebp & 0xFFFF);
   4.133 -        case 6: //%si
   4.134 -            return (short)(regs->esi & 0xFFFF);
   4.135 -        case 7: //%di
   4.136 -            return (short)(regs->edi & 0xFFFF);
   4.137 -        default:
   4.138 -            printk("Error: (get_reg_value)size case 1 error\n");
   4.139 -            domain_crash_synchronous();
   4.140          }
   4.141 -    case LONG:
   4.142 -        switch (index) {
   4.143 -        case 0: //%eax
   4.144 -            return regs->eax;
   4.145 -        case 1: //%ecx
   4.146 -            return regs->ecx;
   4.147 -        case 2: //%edx
   4.148 -            return regs->edx;
   4.149  
   4.150 -        case 3: //%ebx
   4.151 -            return regs->ebx;
   4.152 -        case 4: //%esp
   4.153 -            return regs->esp;
   4.154 -        case 5: //%ebp
   4.155 -            return regs->ebp;
   4.156 -        case 6: //%esi
   4.157 -            return regs->esi;
   4.158 -        case 7: //%edi
   4.159 -            return regs->edi;
   4.160 -        default:
   4.161 -            printk("Error: (get_reg_value)size case 2 error\n");
   4.162 -            domain_crash_synchronous();
   4.163 -        }
   4.164 +        switch (index) {
   4.165 +    case 0: return __get_reg_value(regs->eax, size);
   4.166 +    case 1: return __get_reg_value(regs->ecx, size);
   4.167 +    case 2: return __get_reg_value(regs->edx, size);
   4.168 +    case 3: return __get_reg_value(regs->ebx, size);
   4.169 +    case 4: return __get_reg_value(regs->esp, size);
   4.170 +    case 5: return __get_reg_value(regs->ebp, size);
   4.171 +    case 6: return __get_reg_value(regs->esi, size);
   4.172 +    case 7: return __get_reg_value(regs->edi, size);
   4.173      default:
   4.174 -        printk("Error: (get_reg_value)size case error\n");
   4.175 +	printf("Error: (get_reg_value) Invalid index value\n"); 
   4.176          domain_crash_synchronous();
   4.177      }
   4.178  }
   4.179  #endif
   4.180  
   4.181 -static inline const unsigned char *check_prefix(const unsigned char *inst, struct instruction *thread_inst, unsigned char *rex_p)
   4.182 +static inline unsigned char *check_prefix(unsigned char *inst,
   4.183 +		struct instruction *thread_inst, unsigned char *rex_p)
   4.184  {
   4.185      while (1) {
   4.186          switch (*inst) {
   4.187 -            /* rex prefix for em64t instructions*/
   4.188 +        /* rex prefix for em64t instructions */
   4.189              case 0x40 ... 0x4e:
   4.190                  *rex_p = *inst;
   4.191                  break;
   4.192 -
   4.193 -            case 0xf3: //REPZ
   4.194 +        case 0xf3: /* REPZ */
   4.195      	    	thread_inst->flags = REPZ;
   4.196 -	        	break;
   4.197 -            case 0xf2: //REPNZ
   4.198 +        	break;
   4.199 +        case 0xf2: /* REPNZ */
   4.200      	    	thread_inst->flags = REPNZ;
   4.201 -	        	break;
   4.202 -            case 0xf0: //LOCK
   4.203 +        	break;
   4.204 +        case 0xf0: /* LOCK */
   4.205      	    	break;
   4.206 -            case 0x2e: //CS
   4.207 -            case 0x36: //SS
   4.208 -            case 0x3e: //DS
   4.209 -            case 0x26: //ES
   4.210 -            case 0x64: //FS
   4.211 -            case 0x65: //GS
   4.212 -		        thread_inst->seg_sel = *inst;
   4.213 +        case 0x2e: /* CS */
   4.214 +        case 0x36: /* SS */
   4.215 +        case 0x3e: /* DS */
   4.216 +        case 0x26: /* ES */
   4.217 +        case 0x64: /* FS */
   4.218 +        case 0x65: /* GS */
   4.219 +	        thread_inst->seg_sel = *inst;
   4.220                  break;
   4.221 -            case 0x66: //32bit->16bit
   4.222 +        case 0x66: /* 32bit->16bit */
   4.223                  thread_inst->op_size = WORD;
   4.224                  break;
   4.225              case 0x67:
   4.226 -	        	printf("Error: Not handling 0x67 (yet)\n");
   4.227 +        	printf("Error: Not handling 0x67 (yet)\n");
   4.228                  domain_crash_synchronous();
   4.229                  break;
   4.230              default:
   4.231 @@ -249,7 +223,7 @@ static inline const unsigned char *check
   4.232      }
   4.233  }
   4.234  
   4.235 -static inline unsigned long get_immediate(int op16, const unsigned char *inst, int op_size)
   4.236 +static inline unsigned long get_immediate(int op16,const unsigned char *inst, int op_size)
   4.237  {
   4.238      int mod, reg, rm;
   4.239      unsigned long val = 0;
   4.240 @@ -317,197 +291,299 @@ static inline int get_index(const unsign
   4.241  
   4.242  static void init_instruction(struct instruction *mmio_inst)
   4.243  {
   4.244 -    memset(mmio_inst->i_name, '0', I_NAME_LEN);
   4.245 -    mmio_inst->op_size =  0;
   4.246 -    mmio_inst->offset = 0;
   4.247 +    mmio_inst->instr = 0;
   4.248 +    mmio_inst->op_size = 0;
   4.249      mmio_inst->immediate = 0;
   4.250      mmio_inst->seg_sel = 0;
   4.251 -    mmio_inst->op_num = 0;
   4.252  
   4.253      mmio_inst->operand[0] = 0;
   4.254      mmio_inst->operand[1] = 0;
   4.255 -    mmio_inst->operand[2] = 0;
   4.256          
   4.257      mmio_inst->flags = 0;
   4.258  }
   4.259  
   4.260  #define GET_OP_SIZE_FOR_BYTE(op_size)   \
   4.261 -    do {if (rex) op_size = BYTE_64;else op_size = BYTE;} while(0)
   4.262 +    do {				\
   4.263 +    	if (rex)			\
   4.264 +	    op_size = BYTE_64;		\
   4.265 +	else				\
   4.266 +	    op_size = BYTE;		\
   4.267 +    } while(0)
   4.268  
   4.269  #define GET_OP_SIZE_FOR_NONEBYTE(op_size)   \
   4.270 -    do {if (rex & 0x8) op_size = QUAD; else if (op_size != WORD) op_size = LONG;} while(0)
   4.271 +    do {				\
   4.272 +    	if (rex & 0x8)			\
   4.273 +	    op_size = QUAD;		\
   4.274 +	else if (op_size != WORD)	\
   4.275 +	    op_size = LONG;		\
   4.276 +    } while(0)
   4.277  
   4.278 -static int vmx_decode(const unsigned char *inst, struct instruction *thread_inst)
   4.279 +
   4.280 +/*
   4.281 + * Decode mem,accumulator operands (as in <opcode> m8/m16/m32, al,ax,eax)
   4.282 + */
   4.283 +static int mem_acc(unsigned char size, struct instruction *instr)
   4.284 +{
   4.285 +    instr->operand[0] = mk_operand(size, 0, 0, MEMORY);
   4.286 +    instr->operand[1] = mk_operand(size, 0, 0, REGISTER);
   4.287 +    return DECODE_success;
   4.288 +}
   4.289 +
   4.290 +/*
   4.291 + * Decode accumulator,mem operands (as in <opcode> al,ax,eax, m8/m16/m32)
   4.292 + */
   4.293 +static int acc_mem(unsigned char size, struct instruction *instr)
   4.294 +{
   4.295 +    instr->operand[0] = mk_operand(size, 0, 0, REGISTER);
   4.296 +    instr->operand[1] = mk_operand(size, 0, 0, MEMORY);
   4.297 +    return DECODE_success;
   4.298 +}
   4.299 +
   4.300 +/*
   4.301 + * Decode mem,reg operands (as in <opcode> r32/16, m32/16)
   4.302 + */
   4.303 +static int mem_reg(unsigned char size, unsigned char *opcode,
   4.304 +			struct instruction *instr, unsigned char rex)
   4.305 +{
   4.306 +    int index = get_index(opcode + 1, rex);
   4.307 +
   4.308 +    instr->operand[0] = mk_operand(size, 0, 0, MEMORY);
   4.309 +    instr->operand[1] = mk_operand(size, index, 0, REGISTER);
   4.310 +    return DECODE_success;
   4.311 +}
   4.312 +
   4.313 +/*
   4.314 + * Decode reg,mem operands (as in <opcode> m32/16, r32/16)
   4.315 + */
   4.316 +static int reg_mem(unsigned char size, unsigned char *opcode,
   4.317 +			struct instruction *instr, unsigned char rex)
   4.318 +{
   4.319 +    int index = get_index(opcode + 1, rex);
   4.320 +
   4.321 +    instr->operand[0] = mk_operand(size, index, 0, REGISTER);
   4.322 +    instr->operand[1] = mk_operand(size, 0, 0, MEMORY);
   4.323 +    return DECODE_success;
   4.324 +}
   4.325 +
   4.326 +static int vmx_decode(unsigned char *opcode, struct instruction *instr)
   4.327  {
   4.328      unsigned long eflags;
   4.329      int index, vm86 = 0;
   4.330      unsigned char rex = 0;
   4.331      unsigned char tmp_size = 0;
   4.332  
   4.333 +    init_instruction(instr);
   4.334  
   4.335 -    init_instruction(thread_inst);
   4.336 -
   4.337 -    inst = check_prefix(inst, thread_inst, &rex);
   4.338 +    opcode = check_prefix(opcode, instr, &rex);
   4.339  
   4.340      __vmread(GUEST_RFLAGS, &eflags);
   4.341      if (eflags & X86_EFLAGS_VM)
   4.342          vm86 = 1;
   4.343  
   4.344      if (vm86) { /* meaning is reversed */
   4.345 -       if (thread_inst->op_size == WORD)
   4.346 -           thread_inst->op_size = LONG;
   4.347 -       else if (thread_inst->op_size == LONG)
   4.348 -           thread_inst->op_size = WORD;
   4.349 -       else if (thread_inst->op_size == 0)
   4.350 -           thread_inst->op_size = WORD;
   4.351 +       if (instr->op_size == WORD)
   4.352 +           instr->op_size = LONG;
   4.353 +       else if (instr->op_size == LONG)
   4.354 +           instr->op_size = WORD;
   4.355 +       else if (instr->op_size == 0)
   4.356 +           instr->op_size = WORD;
   4.357      }
   4.358  
   4.359 -    switch(*inst) {
   4.360 -        case 0x81:
   4.361 -            /* This is only a workaround for cmpl instruction*/
   4.362 -            strcpy((char *)thread_inst->i_name, "cmp");
   4.363 -            return DECODE_success;
   4.364 -
   4.365 -        case 0x88:
   4.366 -            /* mov r8 to m8 */
   4.367 -            thread_inst->op_size = BYTE;
   4.368 -            index = get_index((inst + 1), rex);
   4.369 -            GET_OP_SIZE_FOR_BYTE(tmp_size);
   4.370 -            thread_inst->operand[0] = mk_operand(tmp_size, index, 0, REGISTER);
   4.371 -
   4.372 -            break;
   4.373 -        case 0x89:
   4.374 -            /* mov r32/16 to m32/16 */
   4.375 -            index = get_index((inst + 1), rex);
   4.376 -            GET_OP_SIZE_FOR_NONEBYTE(thread_inst->op_size);
   4.377 -            thread_inst->operand[0] = mk_operand(thread_inst->op_size, index, 0, REGISTER);
   4.378 -
   4.379 -            break;
   4.380 -        case 0x8a:
   4.381 -            /* mov m8 to r8 */
   4.382 -            thread_inst->op_size = BYTE;
   4.383 -            index = get_index((inst + 1), rex);
   4.384 -            GET_OP_SIZE_FOR_BYTE(tmp_size);
   4.385 -            thread_inst->operand[1] = mk_operand(tmp_size, index, 0, REGISTER);
   4.386 -            break;
   4.387 -        case 0x8b:
   4.388 -            /* mov r32/16 to m32/16 */
   4.389 -            index = get_index((inst + 1), rex);
   4.390 -            GET_OP_SIZE_FOR_NONEBYTE(thread_inst->op_size);
   4.391 -            thread_inst->operand[1] = mk_operand(thread_inst->op_size, index, 0, REGISTER);
   4.392 -            break;
   4.393 -        case 0x8c:
   4.394 -        case 0x8e:
   4.395 -            printk("%x, This opcode hasn't been handled yet!", *inst);
   4.396 -            return DECODE_failure;
   4.397 -            /* Not handle it yet. */
   4.398 -        case 0xa0:
   4.399 -            /* mov byte to al */
   4.400 -            thread_inst->op_size = BYTE;
   4.401 -            GET_OP_SIZE_FOR_BYTE(tmp_size);
   4.402 -            thread_inst->operand[1] = mk_operand(tmp_size, 0, 0, REGISTER);
   4.403 -            break;
   4.404 -        case 0xa1:
   4.405 -            /* mov word/doubleword to ax/eax */
   4.406 -	    GET_OP_SIZE_FOR_NONEBYTE(thread_inst->op_size);
   4.407 -	    thread_inst->operand[1] = mk_operand(thread_inst->op_size, 0, 0, REGISTER);
   4.408 +    switch (*opcode) {
   4.409 +    case 0x0B: /* or m32/16, r32/16 */
   4.410 +	instr->instr = INSTR_OR;
   4.411 +	GET_OP_SIZE_FOR_NONEBYTE(instr->op_size);
   4.412 +	return mem_reg(instr->op_size, opcode, instr, rex);
   4.413  
   4.414 -            break;
   4.415 -        case 0xa2:
   4.416 -            /* mov al to (seg:offset) */
   4.417 -            thread_inst->op_size = BYTE;
   4.418 -            GET_OP_SIZE_FOR_BYTE(tmp_size);
   4.419 -            thread_inst->operand[0] = mk_operand(tmp_size, 0, 0, REGISTER);
   4.420 -            break;
   4.421 -        case 0xa3:
   4.422 -            /* mov ax/eax to (seg:offset) */
   4.423 -            GET_OP_SIZE_FOR_NONEBYTE(thread_inst->op_size);
   4.424 -            thread_inst->operand[0] = mk_operand(thread_inst->op_size, 0, 0, REGISTER);
   4.425 -            break;
   4.426 -        case 0xa4:
   4.427 -            /* movsb */
   4.428 -            thread_inst->op_size = BYTE;
   4.429 -            strcpy((char *)thread_inst->i_name, "movs");
   4.430 -            return DECODE_success;
   4.431 -        case 0xa5:
   4.432 -            /* movsw/movsl */
   4.433 -            GET_OP_SIZE_FOR_NONEBYTE(thread_inst->op_size);
   4.434 -	    strcpy((char *)thread_inst->i_name, "movs");
   4.435 -            return DECODE_success;
   4.436 -        case 0xaa:
   4.437 -            /* stosb */
   4.438 -            thread_inst->op_size = BYTE;
   4.439 -            strcpy((char *)thread_inst->i_name, "stosb");
   4.440 +    case 0x20: /* and r8, m8 */
   4.441 +	instr->instr = INSTR_AND;
   4.442 +	GET_OP_SIZE_FOR_BYTE(instr->op_size);
   4.443 +	return reg_mem(instr->op_size, opcode, instr, rex);
   4.444 +
   4.445 +    case 0x21: /* and r32/16, m32/16 */
   4.446 +	instr->instr = INSTR_AND;
   4.447 +	GET_OP_SIZE_FOR_NONEBYTE(instr->op_size);
   4.448 +	return reg_mem(instr->op_size, opcode, instr, rex);
   4.449 +
   4.450 +    case 0x23: /* and m32/16, r32/16 */
   4.451 +	instr->instr = INSTR_AND;
   4.452 +	GET_OP_SIZE_FOR_NONEBYTE(instr->op_size);
   4.453 +	return mem_reg(instr->op_size, opcode, instr, rex);
   4.454 +
   4.455 +    case 0x30: /* xor r8, m8 */
   4.456 +	instr->instr = INSTR_XOR;
   4.457 +	GET_OP_SIZE_FOR_BYTE(instr->op_size);
   4.458 +	return reg_mem(instr->op_size, opcode, instr, rex);
   4.459 +
   4.460 +    case 0x31: /* xor r32/16, m32/16 */
   4.461 +	instr->instr = INSTR_XOR;
   4.462 +	GET_OP_SIZE_FOR_NONEBYTE(instr->op_size);
   4.463 +	return reg_mem(instr->op_size, opcode, instr, rex);
   4.464 +
   4.465 +    case 0x39: /* cmp r32/16, m32/16 */
   4.466 +	instr->instr = INSTR_CMP;
   4.467 +	GET_OP_SIZE_FOR_NONEBYTE(instr->op_size);
   4.468 +	return reg_mem(instr->op_size, opcode, instr, rex);
   4.469 +
   4.470 +    case 0x81:
   4.471 +	if (((opcode[1] >> 3) & 7) == 7) { /* cmp $imm, m32/16 */
   4.472 +	    instr->instr = INSTR_CMP;
   4.473 +	    GET_OP_SIZE_FOR_NONEBYTE(instr->op_size);
   4.474 +
   4.475 +	    instr->operand[0] = mk_operand(instr->op_size, 0, 0, IMMEDIATE);
   4.476 +	    instr->immediate = get_immediate(vm86, opcode+1, BYTE);
   4.477 +	    instr->operand[1] = mk_operand(instr->op_size, 0, 0, MEMORY);
   4.478 +
   4.479              return DECODE_success;
   4.480 -       case 0xab:
   4.481 -            /* stosw/stosl */
   4.482 -            if (thread_inst->op_size == WORD) {
   4.483 -                strcpy((char *)thread_inst->i_name, "stosw");
   4.484 -            } else {
   4.485 -                thread_inst->op_size = LONG;
   4.486 -                strcpy((char *)thread_inst->i_name, "stosl");
   4.487 -            }
   4.488 +	} else
   4.489 +	    return DECODE_failure;
   4.490 +
   4.491 +    case 0x84:  /* test m8, r8 */
   4.492 +	instr->instr = INSTR_TEST;
   4.493 +	instr->op_size = BYTE;
   4.494 +	GET_OP_SIZE_FOR_BYTE(tmp_size);
   4.495 +	return mem_reg(tmp_size, opcode, instr, rex);
   4.496 +
   4.497 +    case 0x88: /* mov r8, m8 */
   4.498 +	instr->instr = INSTR_MOV;
   4.499 +	instr->op_size = BYTE;
   4.500 +        GET_OP_SIZE_FOR_BYTE(tmp_size);
   4.501 +	return reg_mem(tmp_size, opcode, instr, rex);
   4.502 +
   4.503 +    case 0x89: /* mov r32/16, m32/16 */
   4.504 +	instr->instr = INSTR_MOV;
   4.505 +	GET_OP_SIZE_FOR_NONEBYTE(instr->op_size);
   4.506 +	return reg_mem(instr->op_size, opcode, instr, rex);
   4.507 +
   4.508 +    case 0x8A: /* mov m8, r8 */
   4.509 +	instr->instr = INSTR_MOV;
   4.510 +	instr->op_size = BYTE;
   4.511 +        GET_OP_SIZE_FOR_BYTE(tmp_size);
   4.512 +	return mem_reg(tmp_size, opcode, instr, rex);
   4.513 +
   4.514 +    case 0x8B: /* mov m32/16, r32/16 */
   4.515 +	instr->instr = INSTR_MOV;
   4.516 +	GET_OP_SIZE_FOR_NONEBYTE(instr->op_size);
   4.517 +	return mem_reg(instr->op_size, opcode, instr, rex);
   4.518 +
   4.519 +    case 0xA0: /* mov <addr>, al */
   4.520 +	instr->instr = INSTR_MOV;
   4.521 +	instr->op_size = BYTE;
   4.522 +        GET_OP_SIZE_FOR_BYTE(tmp_size);
   4.523 +	return mem_acc(tmp_size, instr);
   4.524 +
   4.525 +    case 0xA1: /* mov <addr>, ax/eax */
   4.526 +	instr->instr = INSTR_MOV;
   4.527 +	GET_OP_SIZE_FOR_NONEBYTE(instr->op_size);
   4.528 +	return mem_acc(instr->op_size, instr);
   4.529 +
   4.530 +    case 0xA2: /* mov al, <addr> */
   4.531 +	instr->instr = INSTR_MOV;
   4.532 +	instr->op_size = BYTE;
   4.533 +        GET_OP_SIZE_FOR_BYTE(tmp_size);
   4.534 +	return acc_mem(tmp_size, instr);
   4.535 +
   4.536 +    case 0xA3: /* mov ax/eax, <addr> */
   4.537 +	instr->instr = INSTR_MOV;
   4.538 +	GET_OP_SIZE_FOR_NONEBYTE(instr->op_size);
   4.539 +	return acc_mem(instr->op_size, instr);
   4.540 +
   4.541 +    case 0xA4: /* movsb */
   4.542 +	instr->instr = INSTR_MOVS;
   4.543 +	instr->op_size = BYTE;
   4.544 +        return DECODE_success;
   4.545 +            
   4.546 +    case 0xA5: /* movsw/movsl */
   4.547 +	instr->instr = INSTR_MOVS;
   4.548 +	GET_OP_SIZE_FOR_NONEBYTE(instr->op_size);
   4.549 +	return DECODE_success;
   4.550 +    
   4.551 +    case 0xAA: /* stosb */
   4.552 +	instr->instr = INSTR_STOS;
   4.553 +	instr->op_size = BYTE;
   4.554 +        return DECODE_success;
   4.555 +
   4.556 +    case 0xAB: /* stosw/stosl */
   4.557 +	instr->instr = INSTR_STOS;
   4.558 +	GET_OP_SIZE_FOR_NONEBYTE(instr->op_size);
   4.559 +	return DECODE_success;
   4.560 +                    
   4.561 +    case 0xC6:
   4.562 +	if (((opcode[1] >> 3) & 7) == 0) { /* mov $imm8, m8 */
   4.563 +	    instr->instr = INSTR_MOV;
   4.564 +	    instr->op_size = BYTE;
   4.565 +
   4.566 +	    instr->operand[0] = mk_operand(instr->op_size, 0, 0, IMMEDIATE);
   4.567 +	    instr->immediate = get_immediate(vm86, opcode+1, instr->op_size);
   4.568 +	    instr->operand[1] = mk_operand(instr->op_size, 0, 0, MEMORY);
   4.569 +            
   4.570              return DECODE_success;
   4.571 -        case 0xc6:
   4.572 -            /* mov imm8 to m8 */
   4.573 -            thread_inst->op_size = BYTE;
   4.574 -            thread_inst->operand[0] = mk_operand(BYTE, 0, 0, IMMEDIATE);
   4.575 -            thread_inst->immediate = get_immediate(vm86,
   4.576 -					(inst+1), thread_inst->op_size);
   4.577 -            break;
   4.578 -        case 0xc7:
   4.579 -            /* mov imm16/32 to m16/32 */
   4.580 -            GET_OP_SIZE_FOR_NONEBYTE(thread_inst->op_size);
   4.581 -            thread_inst->operand[0] = mk_operand(thread_inst->op_size, 0, 0, IMMEDIATE);
   4.582 -            thread_inst->immediate = get_immediate(vm86, (inst+1), thread_inst->op_size);
   4.583 +	} else
   4.584 +	    return DECODE_failure;
   4.585              
   4.586 -            break;
   4.587 -        case 0x0f:
   4.588 -            break;
   4.589 -        default:
   4.590 -            printk("%x, This opcode hasn't been handled yet!", *inst);
   4.591 -            return DECODE_failure;
   4.592 -    }
   4.593 -    
   4.594 -    strcpy((char *)thread_inst->i_name, "mov");
   4.595 -    if (*inst != 0x0f) {
   4.596 -        return DECODE_success;
   4.597 +    case 0xC7:
   4.598 +	if (((opcode[1] >> 3) & 7) == 0) { /* mov $imm16/32, m16/32 */
   4.599 +	    instr->instr = INSTR_MOV;
   4.600 +	    GET_OP_SIZE_FOR_NONEBYTE(instr->op_size);
   4.601 +
   4.602 +	    instr->operand[0] = mk_operand(instr->op_size, 0, 0, IMMEDIATE);
   4.603 +	    instr->immediate = get_immediate(vm86, opcode+1, instr->op_size);
   4.604 +	    instr->operand[1] = mk_operand(instr->op_size, 0, 0, MEMORY);
   4.605 +            
   4.606 +            return DECODE_success;
   4.607 +	} else
   4.608 +	    return DECODE_failure;
   4.609 +
   4.610 +    case 0xF6:
   4.611 +	if (((opcode[1] >> 3) & 7) == 0) { /* testb $imm8, m8 */
   4.612 +	    instr->instr = INSTR_TEST;
   4.613 +	    instr->op_size = BYTE;
   4.614 +
   4.615 +	    instr->operand[0] = mk_operand(instr->op_size, 0, 0, IMMEDIATE);
   4.616 +	    instr->immediate = get_immediate(vm86, opcode+1, instr->op_size);
   4.617 +	    instr->operand[1] = mk_operand(instr->op_size, 0, 0, MEMORY);
   4.618 +
   4.619 +	    return DECODE_success;
   4.620 +	} else
   4.621 +	    return DECODE_failure;
   4.622 +
   4.623 +    case 0x0F:
   4.624 +	break;
   4.625 +
   4.626 +    default:
   4.627 +	printf("%x, This opcode isn't handled yet!\n", *opcode);
   4.628 +        return DECODE_failure;
   4.629      }
   4.630  
   4.631 -    inst++;
   4.632 -    switch (*inst) {
   4.633 -                    
   4.634 -        /* movz */
   4.635 -        case 0xb6:
   4.636 -            index = get_index((inst + 1), rex);
   4.637 -            GET_OP_SIZE_FOR_NONEBYTE(thread_inst->op_size);
   4.638 -            thread_inst->operand[1] = mk_operand(thread_inst->op_size, index, 0, REGISTER);
   4.639 -            thread_inst->op_size = BYTE;
   4.640 -            strcpy((char *)thread_inst->i_name, "movzb");
   4.641 -            
   4.642 -            return DECODE_success;
   4.643 -        case 0xb7:
   4.644 -	    index = get_index((inst + 1), rex);
   4.645 -	    if (rex & 0x8) {
   4.646 -		    thread_inst->op_size = LONG;
   4.647 -		    thread_inst->operand[1] = mk_operand(QUAD, index, 0, REGISTER);
   4.648 -	    } else {
   4.649 -		    thread_inst->op_size = WORD;
   4.650 -		    thread_inst->operand[1] = mk_operand(LONG, index, 0, REGISTER);
   4.651 -	    }
   4.652 -            
   4.653 -            strcpy((char *)thread_inst->i_name, "movzw");
   4.654 -            
   4.655 -            return DECODE_success;
   4.656 -        default:
   4.657 -            printk("0f %x, This opcode hasn't been handled yet!", *inst);
   4.658 -            return DECODE_failure;
   4.659 +    switch (*++opcode) {
   4.660 +    case 0xB6: /* movz m8, r16/r32 */
   4.661 +	instr->instr = INSTR_MOVZ;
   4.662 +	GET_OP_SIZE_FOR_NONEBYTE(instr->op_size);
   4.663 +	index = get_index(opcode + 1, rex);
   4.664 +	instr->operand[0] = mk_operand(BYTE, 0, 0, MEMORY);
   4.665 +	instr->operand[1] = mk_operand(instr->op_size, index, 0, REGISTER);
   4.666 +	return DECODE_success;
   4.667 +
   4.668 +    case 0xB7: /* movz m16, r32 */
   4.669 +	instr->instr = INSTR_MOVZ;
   4.670 +	index = get_index(opcode + 1, rex);
   4.671 +	if (rex & 0x8) {
   4.672 +	   instr->op_size = LONG;
   4.673 +	   instr->operand[1] = mk_operand(QUAD, index, 0, REGISTER);
   4.674 +	} else {
   4.675 +	   instr->op_size = WORD;
   4.676 +	   instr->operand[1] = mk_operand(LONG, index, 0, REGISTER);
   4.677 +	}
   4.678 +	instr->operand[0] = mk_operand(instr->op_size, 0, 0, MEMORY);
   4.679 +	return DECODE_success;
   4.680 +
   4.681 +    default:
   4.682 +	printf("0f %x, This opcode isn't handled yet\n", *opcode);
   4.683 +	return DECODE_failure;
   4.684      }
   4.685 -
   4.686 -    /* will never reach here */
   4.687 -    return DECODE_failure;
   4.688  }
   4.689  
   4.690 +/* XXX use vmx_copy instead */
   4.691  int inst_copy_from_guest(unsigned char *buf, unsigned long guest_eip, int inst_len)
   4.692  {
   4.693      unsigned long gpa;
   4.694 @@ -552,40 +628,27 @@ int inst_copy_from_guest(unsigned char *
   4.695      return inst_len+remaining;
   4.696  }
   4.697  
   4.698 -static int read_from_mmio(struct instruction *inst_p)
   4.699 -{
   4.700 -    // Only for mov instruction now!!!
   4.701 -    if (inst_p->operand[1] & REGISTER)
   4.702 -        return 1;
   4.703 -
   4.704 -    return 0;
   4.705 -}
   4.706 -
   4.707 -// dir:  1 read from mmio
   4.708 -//       0 write to mmio
   4.709 -static void send_mmio_req(unsigned long gpa, 
   4.710 -                   struct instruction *inst_p, long value, int dir, int pvalid)
   4.711 +void send_mmio_req(unsigned char type, unsigned long gpa, 
   4.712 +	   unsigned long count, int size, long value, int dir, int pvalid)
   4.713  {
   4.714      struct vcpu *d = current;
   4.715      vcpu_iodata_t *vio;
   4.716      ioreq_t *p;
   4.717      int vm86;
   4.718 -    struct mi_per_cpu_info *mpci_p;
   4.719 -    struct cpu_user_regs *inst_decoder_regs;
   4.720 +    struct cpu_user_regs *regs;
   4.721      extern long evtchn_send(int lport);
   4.722  
   4.723 -    mpci_p = &current->domain->arch.vmx_platform.mpci;
   4.724 -    inst_decoder_regs = mpci_p->inst_decoder_regs;
   4.725 +    regs = current->domain->arch.vmx_platform.mpci.inst_decoder_regs;
   4.726  
   4.727      vio = get_vio(d->domain, d->vcpu_id);
   4.728 -
   4.729      if (vio == NULL) {
   4.730 -        printk("bad shared page\n");
   4.731 +        printf("bad shared page\n");
   4.732          domain_crash_synchronous(); 
   4.733      }
   4.734 +
   4.735      p = &vio->vp_ioreq;
   4.736  
   4.737 -    vm86 = inst_decoder_regs->eflags & X86_EFLAGS_VM;
   4.738 +    vm86 = regs->eflags & X86_EFLAGS_VM;
   4.739  
   4.740      if (test_bit(ARCH_VMX_IO_WAIT, &d->arch.arch_vmx.flags)) {
   4.741          printf("VMX I/O has not yet completed\n");
   4.742 @@ -596,25 +659,22 @@ static void send_mmio_req(unsigned long 
   4.743      p->dir = dir;
   4.744      p->pdata_valid = pvalid;
   4.745  
   4.746 -    p->port_mm = 1;
   4.747 -    p->size = inst_p->op_size;
   4.748 +    p->type = type;
   4.749 +    p->size = size;
   4.750      p->addr = gpa;
   4.751 -    p->u.data = value;
   4.752 +    p->count = count;
   4.753 +    p->df = regs->eflags & EF_DF ? 1 : 0;
   4.754 +
   4.755 +    if (pvalid) {
   4.756 +	if (vmx_paging_enabled(current))
   4.757 +	    p->u.pdata = (void *) gva_to_gpa(value);
   4.758 +        else
   4.759 +	    p->u.pdata = (void *) value; /* guest VA == guest PA */
   4.760 +    } else
   4.761 +	p->u.data = value;
   4.762  
   4.763      p->state = STATE_IOREQ_READY;
   4.764  
   4.765 -    if (inst_p->flags & REPZ) {
   4.766 -        if (vm86)
   4.767 -            p->count = inst_decoder_regs->ecx & 0xFFFF;
   4.768 -        else
   4.769 -            p->count = inst_decoder_regs->ecx;
   4.770 -        p->df = (inst_decoder_regs->eflags & EF_DF) ? 1 : 0;
   4.771 -    } else
   4.772 -        p->count = 1;
   4.773 -
   4.774 -    if ((pvalid) && vmx_paging_enabled(current))
   4.775 -        p->u.pdata = (void *) gva_to_gpa(p->u.data);
   4.776 -
   4.777      if (vmx_mmio_intercept(p)){
   4.778          p->state = STATE_IORESP_READY;
   4.779          vmx_io_assist(d);
   4.780 @@ -625,18 +685,50 @@ static void send_mmio_req(unsigned long 
   4.781      vmx_wait_io();
   4.782  }
   4.783  
   4.784 +static void mmio_operands(int type, unsigned long gpa, struct instruction *inst,
   4.785 +		struct mi_per_cpu_info *mpcip, struct cpu_user_regs *regs)
   4.786 +{
   4.787 +    unsigned long value = 0;
   4.788 +    int index, size;
   4.789 +    
   4.790 +    size = operand_size(inst->operand[0]);
   4.791 +
   4.792 +    mpcip->flags = inst->flags;
   4.793 +    mpcip->instr = inst->instr;
   4.794 +    mpcip->operand[0] = inst->operand[0]; /* source */
   4.795 +    mpcip->operand[1] = inst->operand[1]; /* destination */
   4.796 +
   4.797 +    if (inst->operand[0] & REGISTER) { /* dest is memory */
   4.798 +	index = operand_index(inst->operand[0]);
   4.799 +	value = get_reg_value(size, index, 0, regs);
   4.800 +	send_mmio_req(type, gpa, 1, size, value, IOREQ_WRITE, 0);
   4.801 +    } else if (inst->operand[0] & IMMEDIATE) { /* dest is memory */
   4.802 +	value = inst->immediate;
   4.803 +	send_mmio_req(type, gpa, 1, size, value, IOREQ_WRITE, 0);
   4.804 +    } else if (inst->operand[0] & MEMORY) { /* dest is register */
   4.805 +	/* send the request and wait for the value */
   4.806 +	send_mmio_req(type, gpa, 1, size, 0, IOREQ_READ, 0);
   4.807 +    } else {
   4.808 +	printf("mmio_operands: invalid operand\n");
   4.809 +	domain_crash_synchronous();
   4.810 +    }
   4.811 +}
   4.812 +
   4.813 +#define GET_REPEAT_COUNT() \
   4.814 +     (mmio_inst.flags & REPZ ? (vm86 ? regs->ecx & 0xFFFF : regs->ecx) : 1)
   4.815 +	
   4.816  void handle_mmio(unsigned long va, unsigned long gpa)
   4.817  {
   4.818      unsigned long eip, eflags, cs;
   4.819      unsigned long inst_len, inst_addr;
   4.820 -    struct mi_per_cpu_info *mpci_p;
   4.821 -    struct cpu_user_regs *inst_decoder_regs;
   4.822 +    struct mi_per_cpu_info *mpcip;
   4.823 +    struct cpu_user_regs *regs;
   4.824      struct instruction mmio_inst;
   4.825      unsigned char inst[MAX_INST_LEN];
   4.826 -    int vm86, ret;
   4.827 +    int i, vm86, ret;
   4.828       
   4.829 -    mpci_p = &current->domain->arch.vmx_platform.mpci;
   4.830 -    inst_decoder_regs = mpci_p->inst_decoder_regs;
   4.831 +    mpcip = &current->domain->arch.vmx_platform.mpci;
   4.832 +    regs = mpcip->inst_decoder_regs;
   4.833  
   4.834      __vmread(GUEST_RIP, &eip);
   4.835      __vmread(VM_EXIT_INSTRUCTION_LEN, &inst_len);
   4.836 @@ -647,108 +739,142 @@ void handle_mmio(unsigned long va, unsig
   4.837          __vmread(GUEST_CS_SELECTOR, &cs);
   4.838          inst_addr = (cs << 4) + eip;
   4.839      } else
   4.840 -        inst_addr = eip; /* XXX should really look at GDT[cs].base too */
   4.841 +        inst_addr = eip;
   4.842  
   4.843 -    memset(inst, '0', MAX_INST_LEN);
   4.844 +    memset(inst, 0, MAX_INST_LEN);
   4.845      ret = inst_copy_from_guest(inst, inst_addr, inst_len);
   4.846      if (ret != inst_len) {
   4.847 -        printk("handle_mmio - EXIT: get guest instruction fault\n");
   4.848 +        printf("handle_mmio - EXIT: get guest instruction fault\n");
   4.849          domain_crash_synchronous();
   4.850      }
   4.851  
   4.852 -
   4.853      init_instruction(&mmio_inst);
   4.854      
   4.855      if (vmx_decode(inst, &mmio_inst) == DECODE_failure) {
   4.856 -        printk("vmx decode failure: eip=%lx, va=%lx\n %x %x %x %x\n", eip, va, 
   4.857 -               inst[0], inst[1], inst[2], inst[3]);
   4.858 +	printf("mmio opcode: va 0x%lx, gpa 0x%lx, len %ld:",
   4.859 +		va, gpa, inst_len);
   4.860 +	for (i = 0; i < inst_len; i++)
   4.861 +	    printf(" %02x", inst[i] & 0xFF);
   4.862 +	printf("\n");
   4.863          domain_crash_synchronous();
   4.864      }
   4.865  
   4.866 -    __vmwrite(GUEST_RIP, eip + inst_len);
   4.867 -    store_cpu_user_regs(inst_decoder_regs);
   4.868 +    store_cpu_user_regs(regs);
   4.869 +    regs->eip += inst_len; /* advance %eip */
   4.870  
   4.871 -    // Only handle "mov" and "movs" instructions!
   4.872 -    if (!strncmp((char *)mmio_inst.i_name, "movz", 4)) {
   4.873 -        if (read_from_mmio(&mmio_inst)) {
   4.874 -            // Send the request and waiting for return value.
   4.875 -            mpci_p->mmio_target = mmio_inst.operand[1] | WZEROEXTEND;
   4.876 -            send_mmio_req(gpa, &mmio_inst, 0, IOREQ_READ, 0);
   4.877 -            return ;
   4.878 -        } else {
   4.879 -            printk("handle_mmio - EXIT: movz error!\n");
   4.880 -            domain_crash_synchronous();
   4.881 -        }
   4.882 -    }
   4.883 +    switch (mmio_inst.instr) {
   4.884 +    case INSTR_MOV:
   4.885 +	mmio_operands(IOREQ_TYPE_COPY, gpa, &mmio_inst, mpcip, regs);
   4.886 +	break;
   4.887  
   4.888 -    if (!strncmp((char *)mmio_inst.i_name, "movs", 4)) {
   4.889 +    case INSTR_MOVS:
   4.890 +    {
   4.891 +	unsigned long count = GET_REPEAT_COUNT();
   4.892 +	unsigned long size = mmio_inst.op_size;
   4.893 +	int sign = regs->eflags & EF_DF ? -1 : 1;
   4.894  	unsigned long addr = 0;
   4.895  	int dir;
   4.896  
   4.897 +	/* determine non-MMIO address */
   4.898  	if (vm86) {
   4.899  	    unsigned long seg;
   4.900  
   4.901  	    __vmread(GUEST_ES_SELECTOR, &seg);
   4.902 -	    if (((seg << 4) + (inst_decoder_regs->edi & 0xFFFF)) == va) {
   4.903 +	    if (((seg << 4) + (regs->edi & 0xFFFF)) == va) {
   4.904  		dir = IOREQ_WRITE;
   4.905  		__vmread(GUEST_DS_SELECTOR, &seg);
   4.906 -		addr = (seg << 4) + (inst_decoder_regs->esi & 0xFFFF);
   4.907 +		addr = (seg << 4) + (regs->esi & 0xFFFF);
   4.908  	    } else {
   4.909  		dir = IOREQ_READ;
   4.910 -		addr = (seg << 4) + (inst_decoder_regs->edi & 0xFFFF);
   4.911 +		addr = (seg << 4) + (regs->edi & 0xFFFF);
   4.912  	    }
   4.913 -	} else { /* XXX should really look at GDT[ds/es].base too */
   4.914 -	    if (va == inst_decoder_regs->edi) {
   4.915 +	} else {
   4.916 +	    if (va == regs->edi) {
   4.917  		dir = IOREQ_WRITE;
   4.918 -		addr = inst_decoder_regs->esi;
   4.919 +		addr = regs->esi;
   4.920  	    } else {
   4.921  		dir = IOREQ_READ;
   4.922 -		addr = inst_decoder_regs->edi;
   4.923 +		addr = regs->edi;
   4.924  	    }
   4.925  	}
   4.926  
   4.927 -	send_mmio_req(gpa, &mmio_inst, addr, dir, 1);
   4.928 -        return;
   4.929 +	mpcip->flags = mmio_inst.flags;
   4.930 +	mpcip->instr = mmio_inst.instr;
   4.931 +
   4.932 +	/*
   4.933 +	 * In case of a movs spanning multiple pages, we break the accesses
   4.934 +	 * up into multiple pages (the device model works with non-continguous
   4.935 +	 * physical guest pages). To copy just one page, we adjust %ecx and
   4.936 +	 * do not advance %eip so that the next "rep movs" copies the next page.
   4.937 +	 * Unaligned accesses, for example movsl starting at PGSZ-2, are
   4.938 +	 * turned into a single copy where we handle the overlapping memory
   4.939 +	 * copy ourself. After this copy succeeds, "rep movs" is executed
   4.940 +	 * again.
   4.941 +	 */
   4.942 +	if ((addr & PAGE_MASK) != ((addr + size - 1) & PAGE_MASK)) {
   4.943 +	    unsigned long value = 0;
   4.944 +
   4.945 +	    mpcip->flags |= OVERLAP;
   4.946 +
   4.947 +	    regs->eip -= inst_len; /* do not advance %eip */
   4.948 +
   4.949 +	    if (dir == IOREQ_WRITE)
   4.950 +		vmx_copy(&value, addr, size, VMX_COPY_IN);
   4.951 +	    send_mmio_req(IOREQ_TYPE_COPY, gpa, 1, size, value, dir, 0);
   4.952 +	} else {
   4.953 +	    if ((addr & PAGE_MASK) != ((addr + count * size - 1) & PAGE_MASK)) {
   4.954 +	        regs->eip -= inst_len; /* do not advance %eip */
   4.955 +
   4.956 +		if (sign > 0)
   4.957 +		    count = (PAGE_SIZE - (addr & ~PAGE_MASK)) / size;
   4.958 +		else
   4.959 +		    count = (addr & ~PAGE_MASK) / size;
   4.960 +	    }
   4.961 +
   4.962 +	    send_mmio_req(IOREQ_TYPE_COPY, gpa, count, size, addr, dir, 1);
   4.963 +	}
   4.964 +        break;
   4.965      }
   4.966  
   4.967 -    if (!strncmp((char *)mmio_inst.i_name, "mov", 3)) {
   4.968 -        long value = 0;
   4.969 -        int size, index;
   4.970 +    case INSTR_MOVZ:
   4.971 +	mmio_operands(IOREQ_TYPE_COPY, gpa, &mmio_inst, mpcip, regs);
   4.972 +	break;
   4.973  
   4.974 -        if (read_from_mmio(&mmio_inst)) {
   4.975 -            // Send the request and waiting for return value.
   4.976 -            mpci_p->mmio_target = mmio_inst.operand[1];
   4.977 -            send_mmio_req(gpa, &mmio_inst, value, IOREQ_READ, 0);
   4.978 -            return;
   4.979 -        } else {
   4.980 -            // Write to MMIO
   4.981 -            if (mmio_inst.operand[0] & IMMEDIATE) {
   4.982 -                value = mmio_inst.immediate;
   4.983 -            } else if (mmio_inst.operand[0] & REGISTER) {
   4.984 -                size = operand_size(mmio_inst.operand[0]);
   4.985 -                index = operand_index(mmio_inst.operand[0]);
   4.986 -                value = get_reg_value(size, index, 0, inst_decoder_regs);
   4.987 -            } else {
   4.988 -                domain_crash_synchronous();
   4.989 -            }
   4.990 -            send_mmio_req(gpa, &mmio_inst, value, IOREQ_WRITE, 0);
   4.991 -            return;
   4.992 -        }
   4.993 +    case INSTR_STOS:
   4.994 +	/*
   4.995 +	 * Since the destination is always in (contiguous) mmio space we don't
   4.996 +	 * need to break it up into pages.
   4.997 +	 */
   4.998 +	mpcip->flags = mmio_inst.flags;
   4.999 +	mpcip->instr = mmio_inst.instr;
  4.1000 +        send_mmio_req(IOREQ_TYPE_COPY, gpa,
  4.1001 +	    GET_REPEAT_COUNT(), mmio_inst.op_size, regs->eax, IOREQ_WRITE, 0);
  4.1002 +	break;
  4.1003 +
  4.1004 +    case INSTR_OR:
  4.1005 +	mmio_operands(IOREQ_TYPE_OR, gpa, &mmio_inst, mpcip, regs);
  4.1006 +	break;
  4.1007 +
  4.1008 +    case INSTR_AND:
  4.1009 +	mmio_operands(IOREQ_TYPE_AND, gpa, &mmio_inst, mpcip, regs);
  4.1010 +	break;
  4.1011 +
  4.1012 +    case INSTR_XOR:
  4.1013 +	mmio_operands(IOREQ_TYPE_XOR, gpa, &mmio_inst, mpcip, regs);
  4.1014 +	break;
  4.1015 +
  4.1016 +    case INSTR_CMP:
  4.1017 +	mmio_operands(IOREQ_TYPE_COPY, gpa, &mmio_inst, mpcip, regs);
  4.1018 +	break;
  4.1019 +
  4.1020 +    case INSTR_TEST:
  4.1021 +	mmio_operands(IOREQ_TYPE_COPY, gpa, &mmio_inst, mpcip, regs);
  4.1022 +    	break;
  4.1023 +
  4.1024 +    default:
  4.1025 +	printf("Unhandled MMIO instruction\n");
  4.1026 +	domain_crash_synchronous();
  4.1027      }
  4.1028 -
  4.1029 -    if (!strncmp((char *)mmio_inst.i_name, "stos", 4)) {
  4.1030 -        send_mmio_req(gpa, &mmio_inst,
  4.1031 -            inst_decoder_regs->eax, IOREQ_WRITE, 0);
  4.1032 -        return;
  4.1033 -    }
  4.1034 -    /* Workaround for cmp instruction */
  4.1035 -    if (!strncmp((char *)mmio_inst.i_name, "cmp", 3)) {
  4.1036 -        inst_decoder_regs->eflags &= ~X86_EFLAGS_ZF;
  4.1037 -        __vmwrite(GUEST_RFLAGS, inst_decoder_regs->eflags);
  4.1038 -        return;
  4.1039 -    }
  4.1040 -
  4.1041 -    domain_crash_synchronous();
  4.1042  }
  4.1043  
  4.1044  #endif /* CONFIG_VMX */
     5.1 --- a/xen/include/asm-x86/vmx_platform.h	Fri Sep 02 17:53:52 2005 +0000
     5.2 +++ b/xen/include/asm-x86/vmx_platform.h	Fri Sep 02 17:54:34 2005 +0000
     5.3 @@ -24,8 +24,7 @@
     5.4  #include <asm/vmx_virpit.h>
     5.5  #include <asm/vmx_intercept.h>
     5.6  
     5.7 -#define MAX_OPERAND_NUM 3
     5.8 -#define I_NAME_LEN  16
     5.9 +#define MAX_OPERAND_NUM 2
    5.10  
    5.11  #define mk_operand(size, index, seg, flag) \
    5.12      (((size) << 24) | ((index) << 16) | ((seg) << 8) | (flag))
    5.13 @@ -35,54 +34,60 @@
    5.14  
    5.15  #define operand_index(operand)  \
    5.16        ((operand >> 16) & 0xFF)
    5.17 -      //For instruction.operand[].size
    5.18 +
    5.19 +/* for instruction.operand[].size */
    5.20  #define BYTE    1
    5.21  #define WORD    2
    5.22  #define LONG    4
    5.23  #define QUAD    8
    5.24  #define BYTE_64 16
    5.25  
    5.26 -      //For instruction.operand[].flag
    5.27 +/* for instruction.operand[].flag */
    5.28  #define REGISTER    0x1
    5.29  #define MEMORY      0x2
    5.30  #define IMMEDIATE   0x4
    5.31 -#define WZEROEXTEND 0x8
    5.32  
    5.33 -      //For instruction.flags
    5.34 +/* for instruction.flags */
    5.35  #define REPZ    0x1
    5.36  #define REPNZ   0x2
    5.37 +#define OVERLAP 0x4
    5.38 +
    5.39 +#define	INSTR_PIO	1
    5.40 +#define INSTR_OR	2
    5.41 +#define INSTR_AND	3
    5.42 +#define INSTR_XOR	4
    5.43 +#define INSTR_CMP	5
    5.44 +#define INSTR_MOV	6
    5.45 +#define INSTR_MOVS	7
    5.46 +#define INSTR_MOVZ	8
    5.47 +#define INSTR_STOS	9
    5.48 +#define INSTR_TEST	10
    5.49  
    5.50  struct instruction {
    5.51 -    __s8    i_name[I_NAME_LEN];  //Instruction's name
    5.52 -    __s16   op_size;    //The operand's bit size, e.g. 16-bit or 32-bit.
    5.53 -
    5.54 -    __u64   offset;     //The effective address
    5.55 -          //offset = Base + (Index * Scale) + Displacement
    5.56 -
    5.57 +    __s8    instr;	/* instruction type */
    5.58 +    __s16   op_size;    /* the operand's bit size, e.g. 16-bit or 32-bit */
    5.59      __u64   immediate;
    5.60 -
    5.61 -    __u16   seg_sel;    //Segmentation selector
    5.62 -
    5.63 -    __u32   operand[MAX_OPERAND_NUM];   //The order of operand is from AT&T Assembly
    5.64 -    __s16   op_num; //The operand numbers
    5.65 -
    5.66 -    __u32   flags; //
    5.67 +    __u16   seg_sel;    /* segmentation selector */
    5.68 +    __u32   operand[MAX_OPERAND_NUM];   /* order is AT&T assembly */
    5.69 +    __u32   flags;
    5.70  };
    5.71  
    5.72  #define MAX_INST_LEN      32
    5.73  
    5.74 -struct mi_per_cpu_info
    5.75 -{
    5.76 -    unsigned long          mmio_target;
    5.77 -    struct cpu_user_regs        *inst_decoder_regs;
    5.78 +struct mi_per_cpu_info {
    5.79 +    int                    flags;
    5.80 +    int			   instr;		/* instruction */
    5.81 +    unsigned long          operand[2];		/* operands */
    5.82 +    unsigned long          immediate;		/* immediate portion */
    5.83 +    struct cpu_user_regs   *inst_decoder_regs;	/* current context */
    5.84  };
    5.85  
    5.86  struct virtual_platform_def {
    5.87 -    unsigned long          *real_mode_data; /* E820, etc. */
    5.88 +    unsigned long          *real_mode_data;	/* E820, etc. */
    5.89      unsigned long          shared_page_va;
    5.90      struct vmx_virpit_t    vmx_pit;
    5.91      struct vmx_handler_t   vmx_handler;
    5.92 -    struct mi_per_cpu_info mpci;            /* MMIO */
    5.93 +    struct mi_per_cpu_info mpci;		/* MMIO */
    5.94  };
    5.95  
    5.96  extern void handle_mmio(unsigned long, unsigned long);
     6.1 --- a/xen/include/public/io/ioreq.h	Fri Sep 02 17:53:52 2005 +0000
     6.2 +++ b/xen/include/public/io/ioreq.h	Fri Sep 02 17:54:34 2005 +0000
     6.3 @@ -29,9 +29,17 @@
     6.4  #define STATE_IORESP_READY      3
     6.5  #define STATE_IORESP_HOOK       4
     6.6  
     6.7 -/* VMExit dispatcher should cooperate with instruction decoder to
     6.8 -   prepare this structure and notify service OS and DM by sending
     6.9 -   virq */
    6.10 +#define IOREQ_TYPE_PIO		0	/* pio */
    6.11 +#define IOREQ_TYPE_COPY		1	/* mmio ops */
    6.12 +#define IOREQ_TYPE_AND		2
    6.13 +#define IOREQ_TYPE_OR		3
    6.14 +#define IOREQ_TYPE_XOR		4
    6.15 +
    6.16 +/*
    6.17 + * VMExit dispatcher should cooperate with instruction decoder to
    6.18 + * prepare this structure and notify service OS and DM by sending
    6.19 + * virq 
    6.20 + */
    6.21  typedef struct {
    6.22      u64     addr;               /*  physical address            */
    6.23      u64     size;               /*  size in bytes               */
    6.24 @@ -43,8 +51,8 @@ typedef struct {
    6.25      u8      state:4;
    6.26      u8      pdata_valid:1;	/* if 1, use pdata above        */
    6.27      u8      dir:1;		/*  1=read, 0=write             */
    6.28 -    u8      port_mm:1;		/*  0=portio, 1=mmio            */
    6.29      u8      df:1;
    6.30 +    u8      type;		/* I/O type			*/
    6.31  } ioreq_t;
    6.32  
    6.33  #define MAX_VECTOR    256