ia64/xen-unstable

changeset 11635:f34e37d0742d

merge with xen-unstable.hg
author awilliam@xenbuild.aw
date Tue Sep 26 19:11:33 2006 -0600 (2006-09-26)
parents 9da2d9b48ff8 bd811e94d293
children 3470d9cd27e5
files xen/arch/ia64/xen/xensetup.c xen/arch/x86/hvm/svm/instrlen.c
line diff
     1.1 --- a/docs/man/xm.pod.1	Tue Sep 26 16:15:45 2006 -0600
     1.2 +++ b/docs/man/xm.pod.1	Tue Sep 26 19:11:33 2006 -0600
     1.3 @@ -393,7 +393,9 @@ specified, VCPU information for all doma
     1.4  
     1.5  =item B<vcpu-pin> I<domain-id> I<vcpu> I<cpus>
     1.6  
     1.7 -Pins the the VCPU to only run on the specific CPUs.  
     1.8 +Pins the the VCPU to only run on the specific CPUs.  The keyword
     1.9 +I<all> can be used to apply the I<cpus> list to all VCPUs in the
    1.10 +domain.
    1.11  
    1.12  Normally VCPUs can float between available CPUs whenever Xen deems a
    1.13  different run state is appropriate.  Pinning can be used to restrict
     2.1 --- a/docs/src/user.tex	Tue Sep 26 16:15:45 2006 -0600
     2.2 +++ b/docs/src/user.tex	Tue Sep 26 19:11:33 2006 -0600
     2.3 @@ -3208,6 +3208,8 @@ editing \path{grub.conf}.
     2.4    respectively; if no suffix is specified, the parameter defaults to
     2.5    kilobytes. In previous versions of Xen, suffixes were not supported
     2.6    and the value is always interpreted as kilobytes.
     2.7 +\item [ dom0\_vcpus\_pin ] Pins domain 0 VCPUs on their respective
     2.8 +  physical CPUS (default=false).
     2.9  \item [ tbuf\_size=xxx ] Set the size of the per-cpu trace buffers, in
    2.10    pages (default 0).  
    2.11  \item [ sched=xxx ] Select the CPU scheduler Xen should use.  The
     3.1 --- a/tools/firmware/vmxassist/vm86.c	Tue Sep 26 16:15:45 2006 -0600
     3.2 +++ b/tools/firmware/vmxassist/vm86.c	Tue Sep 26 19:11:33 2006 -0600
     3.3 @@ -69,28 +69,23 @@ guest_linear_to_real(uint32_t base)
     3.4  
     3.5  	if (!(oldctx.cr4 & CR4_PAE)) {
     3.6  		l1_mfn = ((uint32_t *)gcr3)[(base >> 22) & 0x3ff];
     3.7 +		if (!(l1_mfn & PT_ENTRY_PRESENT))
     3.8 +			panic("l2 entry not present\n");
     3.9  
    3.10 -		if (oldctx.cr4 & CR4_PSE || l1_mfn & PDE_PS) {
    3.11 -                        /* 1 level page table */
    3.12 -			l0_mfn = l1_mfn;
    3.13 -			if (!(l0_mfn & PT_ENTRY_PRESENT))
    3.14 -				panic("l1 entry not present\n");
    3.15 -
    3.16 -			l0_mfn &= 0xffc00000;
    3.17 +		if ((oldctx.cr4 & CR4_PSE) && (l1_mfn & PDE_PS)) {
    3.18 +			l0_mfn = l1_mfn & 0xffc00000;
    3.19  			return l0_mfn + (base & 0x3fffff);
    3.20  		}
    3.21  
    3.22 -		if (!(l1_mfn & PT_ENTRY_PRESENT))
    3.23 -			panic("l2 entry not present\n");
    3.24 +		l1_mfn &= 0xfffff000;
    3.25  
    3.26 -		l1_mfn &= 0xfffff000;
    3.27  		l0_mfn = ((uint32_t *)l1_mfn)[(base >> 12) & 0x3ff];
    3.28  		if (!(l0_mfn & PT_ENTRY_PRESENT))
    3.29  			panic("l1 entry not present\n");
    3.30  		l0_mfn &= 0xfffff000;
    3.31  
    3.32  		return l0_mfn + (base & 0xfff);
    3.33 -	} else if (oldctx.cr4 & CR4_PAE && !(oldctx.cr4 & CR4_PSE)) {
    3.34 +	} else {
    3.35  		l2_mfn = ((uint64_t *)gcr3)[(base >> 30) & 0x3];
    3.36  		if (!(l2_mfn & PT_ENTRY_PRESENT))
    3.37  			panic("l3 entry not present\n");
    3.38 @@ -99,6 +94,12 @@ guest_linear_to_real(uint32_t base)
    3.39  		l1_mfn = ((uint64_t *)l2_mfn)[(base >> 21) & 0x1ff];
    3.40  		if (!(l1_mfn & PT_ENTRY_PRESENT))
    3.41  			panic("l2 entry not present\n");
    3.42 +
    3.43 +		if (l1_mfn & PDE_PS) { /* CR4.PSE is ignored in PAE mode */
    3.44 +			l0_mfn = l1_mfn & 0x3ffe00000ULL;
    3.45 +			return l0_mfn + (base & 0x1fffff);
    3.46 +		}
    3.47 +
    3.48  		l1_mfn &= 0x3fffff000ULL;
    3.49  
    3.50  		l0_mfn = ((uint64_t *)l1_mfn)[(base >> 12) & 0x1ff];
    3.51 @@ -107,18 +108,6 @@ guest_linear_to_real(uint32_t base)
    3.52  		l0_mfn &= 0x3fffff000ULL;
    3.53  
    3.54  		return l0_mfn + (base & 0xfff);
    3.55 -	} else { /* oldctx.cr4 & CR4_PAE && oldctx.cr4 & CR4_PSE */
    3.56 -		l1_mfn = ((uint64_t *)gcr3)[(base >> 30) & 0x3];
    3.57 -		if (!(l1_mfn & PT_ENTRY_PRESENT))
    3.58 -			panic("l2 entry not present\n");
    3.59 -		l1_mfn &= 0x3fffff000ULL;
    3.60 -
    3.61 -		l0_mfn = ((uint64_t *)l1_mfn)[(base >> 21) & 0x1ff];
    3.62 -		if (!(l0_mfn & PT_ENTRY_PRESENT))
    3.63 -			panic("l1 entry not present\n");
    3.64 -		l0_mfn &= 0x3ffe00000ULL;
    3.65 -
    3.66 -		return l0_mfn + (base & 0x1fffff);
    3.67  	}
    3.68  }
    3.69  
     4.1 --- a/tools/ioemu/hw/serial.c	Tue Sep 26 16:15:45 2006 -0600
     4.2 +++ b/tools/ioemu/hw/serial.c	Tue Sep 26 19:11:33 2006 -0600
     4.3 @@ -22,6 +22,9 @@
     4.4   * THE SOFTWARE.
     4.5   */
     4.6  #include "vl.h"
     4.7 +#include <sys/time.h>
     4.8 +#include <time.h>
     4.9 +#include <assert.h>
    4.10  
    4.11  //#define DEBUG_SERIAL
    4.12  
    4.13 @@ -140,6 +143,67 @@ static void serial_update_parameters(Ser
    4.14  #endif
    4.15  }
    4.16  
    4.17 +/* Rate limit serial requests so that e.g. grub on a serial console
    4.18 +   doesn't kill dom0.  Simple token bucket.  If we get some actual
    4.19 +   data from the user, instantly refil the bucket. */
    4.20 +
    4.21 +/* How long it takes to generate a token, in microseconds. */
    4.22 +#define TOKEN_PERIOD 1000
    4.23 +/* Maximum and initial size of token bucket */
    4.24 +#define TOKENS_MAX 100000
    4.25 +
    4.26 +static int tokens_avail;
    4.27 +
    4.28 +static void serial_get_token(void)
    4.29 +{
    4.30 +    static struct timeval last_refil_time;
    4.31 +    static int started;
    4.32 +
    4.33 +    assert(tokens_avail >= 0);
    4.34 +    if (!tokens_avail) {
    4.35 +	struct timeval delta, now;
    4.36 +	int generated;
    4.37 +
    4.38 +	if (!started) {
    4.39 +	    gettimeofday(&last_refil_time, NULL);
    4.40 +	    tokens_avail = TOKENS_MAX;
    4.41 +	    started = 1;
    4.42 +	    return;
    4.43 +	}
    4.44 +    retry:
    4.45 +	gettimeofday(&now, NULL);
    4.46 +	delta.tv_sec = now.tv_sec - last_refil_time.tv_sec;
    4.47 +	delta.tv_usec = now.tv_usec - last_refil_time.tv_usec;
    4.48 +	if (delta.tv_usec < 0) {
    4.49 +	    delta.tv_usec += 1000000;
    4.50 +	    delta.tv_sec--;
    4.51 +	}
    4.52 +	assert(delta.tv_usec >= 0 && delta.tv_sec >= 0);
    4.53 +	if (delta.tv_usec < TOKEN_PERIOD) {
    4.54 +	    struct timespec ts;
    4.55 +	    /* Wait until at least one token is available. */
    4.56 +	    ts.tv_sec = TOKEN_PERIOD / 1000000;
    4.57 +	    ts.tv_nsec = (TOKEN_PERIOD % 1000000) * 1000;
    4.58 +	    while (nanosleep(&ts, &ts) < 0 && errno == EINTR)
    4.59 +		;
    4.60 +	    goto retry;
    4.61 +	}
    4.62 +	generated = (delta.tv_sec * 1000000) / TOKEN_PERIOD;
    4.63 +	generated +=
    4.64 +	    ((delta.tv_sec * 1000000) % TOKEN_PERIOD + delta.tv_usec) / TOKEN_PERIOD;
    4.65 +	assert(generated > 0);
    4.66 +
    4.67 +	last_refil_time.tv_usec += (generated * TOKEN_PERIOD) % 1000000;
    4.68 +	last_refil_time.tv_sec  += last_refil_time.tv_usec / 1000000;
    4.69 +	last_refil_time.tv_usec %= 1000000;
    4.70 +	last_refil_time.tv_sec  += (generated * TOKEN_PERIOD) / 1000000;
    4.71 +	if (generated > TOKENS_MAX)
    4.72 +	    generated = TOKENS_MAX;
    4.73 +	tokens_avail = generated;
    4.74 +    }
    4.75 +    tokens_avail--;
    4.76 +}
    4.77 +
    4.78  static void serial_ioport_write(void *opaque, uint32_t addr, uint32_t val)
    4.79  {
    4.80      SerialState *s = opaque;
    4.81 @@ -245,9 +309,11 @@ static uint32_t serial_ioport_read(void 
    4.82          ret = s->mcr;
    4.83          break;
    4.84      case 5:
    4.85 +	serial_get_token();
    4.86          ret = s->lsr;
    4.87          break;
    4.88      case 6:
    4.89 +	serial_get_token();
    4.90          if (s->mcr & UART_MCR_LOOP) {
    4.91              /* in loopback, the modem output pins are connected to the
    4.92                 inputs */
    4.93 @@ -296,12 +362,14 @@ static int serial_can_receive1(void *opa
    4.94  static void serial_receive1(void *opaque, const uint8_t *buf, int size)
    4.95  {
    4.96      SerialState *s = opaque;
    4.97 +    tokens_avail = TOKENS_MAX;
    4.98      serial_receive_byte(s, buf[0]);
    4.99  }
   4.100  
   4.101  static void serial_event(void *opaque, int event)
   4.102  {
   4.103      SerialState *s = opaque;
   4.104 +    tokens_avail = TOKENS_MAX;
   4.105      if (event == CHR_EVENT_BREAK)
   4.106          serial_receive_break(s);
   4.107  }
     5.1 --- a/tools/ioemu/usb-linux.c	Tue Sep 26 16:15:45 2006 -0600
     5.2 +++ b/tools/ioemu/usb-linux.c	Tue Sep 26 19:11:33 2006 -0600
     5.3 @@ -26,6 +26,9 @@
     5.4  #if defined(__linux__)
     5.5  #include <dirent.h>
     5.6  #include <sys/ioctl.h>
     5.7 +/* Some versions of usbdevice_fs.h need __user to be defined for them.   */
     5.8 +/* This may (harmlessly) conflict with a definition in linux/compiler.h. */
     5.9 +#define __user
    5.10  #include <linux/usbdevice_fs.h>
    5.11  #include <linux/version.h>
    5.12  
     6.1 --- a/tools/ioemu/vl.c	Tue Sep 26 16:15:45 2006 -0600
     6.2 +++ b/tools/ioemu/vl.c	Tue Sep 26 19:11:33 2006 -0600
     6.3 @@ -727,7 +727,7 @@ void qemu_del_timer(QEMUTimer *ts)
     6.4  
     6.5  void qemu_advance_timer(QEMUTimer *ts, int64_t expire_time)
     6.6  {
     6.7 -    if (ts->expire_time > expire_time)
     6.8 +    if (ts->expire_time > expire_time || !qemu_timer_pending(ts))
     6.9  	qemu_mod_timer(ts, expire_time);
    6.10  }
    6.11  
     7.1 --- a/tools/ioemu/vnc.c	Tue Sep 26 16:15:45 2006 -0600
     7.2 +++ b/tools/ioemu/vnc.c	Tue Sep 26 19:11:33 2006 -0600
     7.3 @@ -26,6 +26,7 @@
     7.4  
     7.5  #include "vl.h"
     7.6  #include "qemu_socket.h"
     7.7 +#include <assert.h>
     7.8  
     7.9  /* The refresh interval starts at BASE.  If we scan the buffer and
    7.10     find no change, we increase by INC, up to MAX.  If the mouse moves
    7.11 @@ -580,12 +581,16 @@ static void _vnc_update_client(void *opa
    7.12  	       interested (e.g. minimised) it'll ignore this, and we
    7.13  	       can stop scanning the buffer until it sends another
    7.14  	       update request. */
    7.15 -	    /* Note that there are bugs in xvncviewer which prevent
    7.16 -	       this from actually working.  Leave the code in place
    7.17 -	       for correct clients. */
    7.18 +	    /* It turns out that there's a bug in realvncviewer 4.1.2
    7.19 +	       which means that if you send a proper null update (with
    7.20 +	       no update rectangles), it gets a bit out of sync and
    7.21 +	       never sends any further requests, regardless of whether
    7.22 +	       it needs one or not.  Fix this by sending a single 1x1
    7.23 +	       update rectangle instead. */
    7.24  	    vnc_write_u8(vs, 0);
    7.25  	    vnc_write_u8(vs, 0);
    7.26 -	    vnc_write_u16(vs, 0);
    7.27 +	    vnc_write_u16(vs, 1);
    7.28 +	    send_framebuffer_update(vs, 0, 0, 1, 1);
    7.29  	    vnc_flush(vs);
    7.30  	    vs->last_update_time = now;
    7.31  	    return;
    7.32 @@ -728,8 +733,10 @@ static void vnc_client_read(void *opaque
    7.33  	    memmove(vs->input.buffer, vs->input.buffer + len,
    7.34  		    vs->input.offset - len);
    7.35  	    vs->input.offset -= len;
    7.36 -	} else
    7.37 +	} else {
    7.38 +	    assert(ret > vs->read_handler_expect);
    7.39  	    vs->read_handler_expect = ret;
    7.40 +	}
    7.41      }
    7.42  }
    7.43  
    7.44 @@ -1076,8 +1083,12 @@ static int protocol_client_msg(VncState 
    7.45  	if (len == 1)
    7.46  	    return 4;
    7.47  
    7.48 -	if (len == 4)
    7.49 -	    return 4 + (read_u16(data, 2) * 4);
    7.50 +	if (len == 4) {
    7.51 +	    uint16_t v;
    7.52 +	    v = read_u16(data, 2);
    7.53 +	    if (v)
    7.54 +		return 4 + v * 4;
    7.55 +	}
    7.56  
    7.57  	limit = read_u16(data, 2);
    7.58  	for (i = 0; i < limit; i++) {
    7.59 @@ -1117,8 +1128,12 @@ static int protocol_client_msg(VncState 
    7.60  	if (len == 1)
    7.61  	    return 8;
    7.62  
    7.63 -	if (len == 8)
    7.64 -	    return 8 + read_u32(data, 4);
    7.65 +	if (len == 8) {
    7.66 +	    uint32_t v;
    7.67 +	    v = read_u32(data, 4);
    7.68 +	    if (v)
    7.69 +		return 8 + v;
    7.70 +	}
    7.71  
    7.72  	client_cut_text(vs, read_u32(data, 4), data + 8);
    7.73  	break;
     8.1 --- a/tools/python/xen/xend/XendCheckpoint.py	Tue Sep 26 16:15:45 2006 -0600
     8.2 +++ b/tools/python/xen/xend/XendCheckpoint.py	Tue Sep 26 19:11:33 2006 -0600
     8.3 @@ -161,8 +161,8 @@ def restore(xd, fd):
     8.4          if handler.store_mfn is None or handler.console_mfn is None:
     8.5              raise XendError('Could not read store/console MFN')
     8.6  
     8.7 -        #Block until src closes connection
     8.8 -        os.read(fd, 1)
     8.9 +        os.read(fd, 1)           # Wait for source to close connection
    8.10 +        dominfo.waitForDevices() # Wait for backends to set up
    8.11          dominfo.unpause()
    8.12          
    8.13          dominfo.completeRestore(handler.store_mfn, handler.console_mfn)
     9.1 --- a/tools/python/xen/xend/XendDomain.py	Tue Sep 26 16:15:45 2006 -0600
     9.2 +++ b/tools/python/xen/xend/XendDomain.py	Tue Sep 26 19:11:33 2006 -0600
     9.3 @@ -487,10 +487,19 @@ class XendDomain:
     9.4          if not dominfo:
     9.5              raise XendInvalidDomain(str(domid))
     9.6  
     9.7 -        try:
     9.8 -            return xc.vcpu_setaffinity(dominfo.getDomid(), vcpu, cpumap)
     9.9 -        except Exception, ex:
    9.10 -            raise XendError(str(ex))
    9.11 +        # if vcpu is keyword 'all', apply the cpumap to all vcpus
    9.12 +        vcpus = [ vcpu ]
    9.13 +        if str(vcpu).lower() == "all":
    9.14 +            vcpus = range(0, int(dominfo.getVCpuCount()))
    9.15 +       
    9.16 +        # set the same cpumask for all vcpus
    9.17 +        rc = 0
    9.18 +        for v in vcpus:
    9.19 +            try:
    9.20 +                rc = xc.vcpu_setaffinity(dominfo.getDomid(), int(v), cpumap)
    9.21 +            except Exception, ex:
    9.22 +                raise XendError(str(ex))
    9.23 +        return rc
    9.24  
    9.25      def domain_cpu_sedf_set(self, domid, period, slice_, latency, extratime,
    9.26                              weight):
    10.1 --- a/tools/python/xen/xend/server/SrvDomain.py	Tue Sep 26 16:15:45 2006 -0600
    10.2 +++ b/tools/python/xen/xend/server/SrvDomain.py	Tue Sep 26 19:11:33 2006 -0600
    10.3 @@ -97,7 +97,7 @@ class SrvDomain(SrvDir):
    10.4      def op_pincpu(self, _, req):
    10.5          fn = FormFn(self.xd.domain_pincpu,
    10.6                      [['dom', 'int'],
    10.7 -                     ['vcpu', 'int'],
    10.8 +                     ['vcpu', 'str'],
    10.9                       ['cpumap', 'str']])
   10.10          val = fn(req.args, {'dom': self.dom.domid})
   10.11          return val
    11.1 --- a/tools/python/xen/xm/main.py	Tue Sep 26 16:15:45 2006 -0600
    11.2 +++ b/tools/python/xen/xm/main.py	Tue Sep 26 19:11:33 2006 -0600
    11.3 @@ -759,12 +759,16 @@ def xm_vcpu_pin(args):
    11.4                  for i in range(int(x),int(y)+1):
    11.5                      cpus.append(int(i))
    11.6              else:
    11.7 -                cpus.append(int(c))
    11.8 +                # remove this element from the list
    11.9 +                if c[0] == '^':
   11.10 +                    cpus = [x for x in cpus if x != int(c[1:])]
   11.11 +                else:
   11.12 +                    cpus.append(int(c))
   11.13          cpus.sort()
   11.14          return cpus
   11.15  
   11.16      dom  = args[0]
   11.17 -    vcpu = int(args[1])
   11.18 +    vcpu = args[1]
   11.19      cpumap = cpu_make_map(args[2])
   11.20      
   11.21      server.xend.domain.pincpu(dom, vcpu, cpumap)
    12.1 --- a/xen/arch/ia64/xen/domain.c	Tue Sep 26 16:15:45 2006 -0600
    12.2 +++ b/xen/arch/ia64/xen/domain.c	Tue Sep 26 19:11:33 2006 -0600
    12.3 @@ -54,7 +54,6 @@ unsigned long dom0_align = 64*1024*1024;
    12.4  static unsigned int dom0_max_vcpus = 1;
    12.5  integer_param("dom0_max_vcpus", dom0_max_vcpus); 
    12.6  
    12.7 -extern int opt_dom0_vcpus_pin;
    12.8  extern unsigned long running_on_sim;
    12.9  
   12.10  extern char dom0_command_line[];
   12.11 @@ -1021,12 +1020,9 @@ int construct_dom0(struct domain *d,
   12.12  	    dom0_max_vcpus = MAX_VIRT_CPUS;
   12.13  	
   12.14  	printf ("Dom0 max_vcpus=%d\n", dom0_max_vcpus);
   12.15 -	for ( i = 1; i < dom0_max_vcpus; i++ ) {
   12.16 +	for ( i = 1; i < dom0_max_vcpus; i++ )
   12.17  	    if (alloc_vcpu(d, i, i) == NULL)
   12.18  		printf ("Cannot allocate dom0 vcpu %d\n", i);
   12.19 -	    else if (opt_dom0_vcpus_pin)
   12.20 -		d->vcpu[i]->cpu_affinity = cpumask_of_cpu(i);
   12.21 -	}
   12.22  
   12.23  	/* Copy the OS image. */
   12.24  	loaddomainelfimage(d,image_start);
    13.1 --- a/xen/arch/ia64/xen/xensetup.c	Tue Sep 26 16:15:45 2006 -0600
    13.2 +++ b/xen/arch/ia64/xen/xensetup.c	Tue Sep 26 19:11:33 2006 -0600
    13.3 @@ -49,10 +49,6 @@ extern void mem_init(void);
    13.4  extern void init_IRQ(void);
    13.5  extern void trap_init(void);
    13.6  
    13.7 -/* opt_dom0_vcpus_pin: If true, dom0 VCPUs are pinned. */
    13.8 -unsigned int opt_dom0_vcpus_pin = 0;
    13.9 -boolean_param("dom0_vcpus_pin", opt_dom0_vcpus_pin);
   13.10 -
   13.11  /* opt_nosmp: If true, secondary processors are ignored. */
   13.12  static int opt_nosmp = 0;
   13.13  boolean_param("nosmp", opt_nosmp);
   13.14 @@ -521,10 +517,6 @@ printk("num_online_cpus=%d, max_cpus=%d\
   13.15    			0) != 0)
   13.16          panic("Could not set up DOM0 guest OS\n");
   13.17  
   13.18 -    /* PIN domain0 VCPU 0 on CPU 0. */
   13.19 -    if (opt_dom0_vcpus_pin)
   13.20 -        dom0->vcpu[0]->cpu_affinity = cpumask_of_cpu(0);
   13.21 -
   13.22      if (!running_on_sim)  // slow on ski and pages are pre-initialized to zero
   13.23  	scrub_heap_pages();
   13.24  
    14.1 --- a/xen/arch/x86/Rules.mk	Tue Sep 26 16:15:45 2006 -0600
    14.2 +++ b/xen/arch/x86/Rules.mk	Tue Sep 26 19:11:33 2006 -0600
    14.3 @@ -44,7 +44,7 @@ CFLAGS  += -mno-red-zone -fpic -fno-reor
    14.4  CFLAGS  += -fno-asynchronous-unwind-tables
    14.5  # -fvisibility=hidden reduces -fpic cost, if it's available
    14.6  CFLAGS  += $(shell $(CC) -v --help 2>&1 | grep " -fvisibility=" | \
    14.7 -             grep -q hidden && echo "-fvisibility=hidden")
    14.8 +             grep -q hidden && echo "-DGCC_HAS_VISIBILITY_ATTRIBUTE")
    14.9  LDFLAGS += -m elf_x86_64
   14.10  x86_32 := n
   14.11  x86_64 := y
    15.1 --- a/xen/arch/x86/boot/x86_32.S	Tue Sep 26 16:15:45 2006 -0600
    15.2 +++ b/xen/arch/x86/boot/x86_32.S	Tue Sep 26 19:11:33 2006 -0600
    15.3 @@ -218,28 +218,24 @@ nopaging_gdt_descr:
    15.4          .word   LAST_RESERVED_GDT_BYTE
    15.5          .long   gdt_table - FIRST_RESERVED_GDT_BYTE - __PAGE_OFFSET
    15.6          
    15.7 -        .org 0x1000
    15.8 -/* NB. Rings != 0 get access up to 0xFC400000. This allows access to the */
    15.9 -/*     machine->physical mapping table. Ring 0 can access all memory.    */
   15.10 +        .align PAGE_SIZE, 0
   15.11 +/* NB. Rings != 0 get access up to MACH2PHYS_VIRT_END. This allows access to */
   15.12 +/*     the machine->physical mapping table. Ring 0 can access all memory.    */
   15.13 +#define GUEST_DESC(d)                                                   \
   15.14 +        .long ((MACH2PHYS_VIRT_END - 1) >> 12) & 0xffff,                \
   15.15 +              ((MACH2PHYS_VIRT_END - 1) >> 12) & (0xf << 16) | (d)
   15.16  ENTRY(gdt_table)
   15.17          .quad 0x0000000000000000     /* unused */
   15.18          .quad 0x00cf9a000000ffff     /* 0xe008 ring 0 4.00GB code at 0x0 */
   15.19          .quad 0x00cf92000000ffff     /* 0xe010 ring 0 4.00GB data at 0x0 */
   15.20 -#ifdef CONFIG_X86_PAE
   15.21 -        .quad 0x00cfba00000067ff
   15.22 -        .quad 0x00cfb200000067ff
   15.23 -        .quad 0x00cffa00000067ff
   15.24 -        .quad 0x00cff200000067ff
   15.25 -#else
   15.26 -        .quad 0x00cfba000000c3ff     /* 0xe019 ring 1 3.95GB code at 0x0 */
   15.27 -        .quad 0x00cfb2000000c3ff     /* 0xe021 ring 1 3.95GB data at 0x0 */
   15.28 -        .quad 0x00cffa000000c3ff     /* 0xe02b ring 3 3.95GB code at 0x0 */
   15.29 -        .quad 0x00cff2000000c3ff     /* 0xe033 ring 3 3.95GB data at 0x0 */
   15.30 -#endif
   15.31 +        GUEST_DESC(0x00c0ba00)       /* 0xe019 ring 1 3.xxGB code at 0x0 */
   15.32 +        GUEST_DESC(0x00c0b200)       /* 0xe021 ring 1 3.xxGB data at 0x0 */
   15.33 +        GUEST_DESC(0x00c0fa00)       /* 0xe02b ring 3 3.xxGB code at 0x0 */
   15.34 +        GUEST_DESC(0x00c0f200)       /* 0xe033 ring 3 3.xxGB data at 0x0 */
   15.35          .quad 0x0000000000000000     /* unused                           */
   15.36          .fill 2*NR_CPUS,8,0          /* space for TSS and LDT per CPU    */
   15.37  
   15.38 -        .org 0x2000
   15.39 +        .align PAGE_SIZE, 0
   15.40  
   15.41  #ifdef CONFIG_X86_PAE
   15.42  ENTRY(idle_pg_table)
    16.1 --- a/xen/arch/x86/hvm/Makefile	Tue Sep 26 16:15:45 2006 -0600
    16.2 +++ b/xen/arch/x86/hvm/Makefile	Tue Sep 26 19:11:33 2006 -0600
    16.3 @@ -4,6 +4,7 @@ subdir-y += vmx
    16.4  obj-y += hvm.o
    16.5  obj-y += i8254.o
    16.6  obj-y += i8259.o
    16.7 +obj-y += instrlen.o
    16.8  obj-y += intercept.o
    16.9  obj-y += io.o
   16.10  obj-y += platform.o
    17.1 --- a/xen/arch/x86/hvm/hvm.c	Tue Sep 26 16:15:45 2006 -0600
    17.2 +++ b/xen/arch/x86/hvm/hvm.c	Tue Sep 26 19:11:33 2006 -0600
    17.3 @@ -337,6 +337,33 @@ int cpu_get_interrupt(struct vcpu *v, in
    17.4      return -1;
    17.5  }
    17.6  
    17.7 +static void hvm_vcpu_down(void)
    17.8 +{
    17.9 +    struct vcpu *v = current;
   17.10 +    struct domain *d = v->domain;
   17.11 +    int online_count = 0;
   17.12 +
   17.13 +    DPRINTK("DOM%d/VCPU%d: going offline.\n", d->domain_id, v->vcpu_id);
   17.14 +
   17.15 +    /* Doesn't halt us immediately, but we'll never return to guest context. */
   17.16 +    set_bit(_VCPUF_down, &v->vcpu_flags);
   17.17 +    vcpu_sleep_nosync(v);
   17.18 +
   17.19 +    /* Any other VCPUs online? ... */
   17.20 +    LOCK_BIGLOCK(d);
   17.21 +    for_each_vcpu ( d, v )
   17.22 +        if ( !test_bit(_VCPUF_down, &v->vcpu_flags) )
   17.23 +            online_count++;
   17.24 +    UNLOCK_BIGLOCK(d);
   17.25 +
   17.26 +    /* ... Shut down the domain if not. */
   17.27 +    if ( online_count == 0 )
   17.28 +    {
   17.29 +        DPRINTK("DOM%d: all CPUs offline -- powering off.\n", d->domain_id);
   17.30 +        domain_shutdown(d, SHUTDOWN_poweroff);
   17.31 +    }
   17.32 +}
   17.33 +
   17.34  void hvm_hlt(unsigned long rflags)
   17.35  {
   17.36      struct vcpu *v = current;
   17.37 @@ -344,18 +371,12 @@ void hvm_hlt(unsigned long rflags)
   17.38      s_time_t next_pit = -1, next_wakeup;
   17.39  
   17.40      /*
   17.41 -     * Detect machine shutdown.  Only do this for vcpu 0, to avoid potentially 
   17.42 -     * shutting down the domain early. If we halt with interrupts disabled, 
   17.43 -     * that's a pretty sure sign that we want to shut down.  In a real 
   17.44 -     * processor, NMIs are the only way to break out of this.
   17.45 +     * If we halt with interrupts disabled, that's a pretty sure sign that we
   17.46 +     * want to shut down. In a real processor, NMIs are the only way to break
   17.47 +     * out of this.
   17.48       */
   17.49 -    if ( (v->vcpu_id == 0) && !(rflags & X86_EFLAGS_IF) )
   17.50 -    {
   17.51 -        printk("D%d: HLT with interrupts disabled -- shutting down.\n",
   17.52 -               current->domain->domain_id);
   17.53 -        domain_shutdown(current->domain, SHUTDOWN_poweroff);
   17.54 -        return;
   17.55 -    }
   17.56 +    if ( unlikely(!(rflags & X86_EFLAGS_IF)) )
   17.57 +        return hvm_vcpu_down();
   17.58  
   17.59      if ( !v->vcpu_id )
   17.60          next_pit = get_scheduled(v, pt->irq, pt);
   17.61 @@ -578,17 +599,20 @@ int hvm_bringup_ap(int vcpuid, int tramp
   17.62      struct vcpu_guest_context *ctxt;
   17.63      int rc = 0;
   17.64  
   17.65 -    /* current must be HVM domain BSP */
   17.66 -    if ( !(hvm_guest(bsp) && bsp->vcpu_id == 0) ) {
   17.67 -        printk("Not calling hvm_bringup_ap from BSP context.\n");
   17.68 +    BUG_ON(!hvm_guest(bsp));
   17.69 +
   17.70 +    if ( bsp->vcpu_id != 0 )
   17.71 +    {
   17.72 +        DPRINTK("Not calling hvm_bringup_ap from BSP context.\n");
   17.73          domain_crash_synchronous();
   17.74      }
   17.75  
   17.76      if ( (v = d->vcpu[vcpuid]) == NULL )
   17.77          return -ENOENT;
   17.78  
   17.79 -    if ( (ctxt = xmalloc(struct vcpu_guest_context)) == NULL ) {
   17.80 -        printk("Failed to allocate memory in hvm_bringup_ap.\n");
   17.81 +    if ( (ctxt = xmalloc(struct vcpu_guest_context)) == NULL )
   17.82 +    {
   17.83 +        DPRINTK("Failed to allocate memory in hvm_bringup_ap.\n");
   17.84          return -ENOMEM;
   17.85      }
   17.86  
   17.87 @@ -601,13 +625,15 @@ int hvm_bringup_ap(int vcpuid, int tramp
   17.88      UNLOCK_BIGLOCK(d);
   17.89  
   17.90      if ( rc != 0 )
   17.91 -        printk("AP %d bringup failed in boot_vcpu %x.\n", vcpuid, rc);
   17.92 -    else {
   17.93 -        if ( test_and_clear_bit(_VCPUF_down, &d->vcpu[vcpuid]->vcpu_flags) )
   17.94 -            vcpu_wake(d->vcpu[vcpuid]);
   17.95 -        printk("AP %d bringup suceeded.\n", vcpuid);
   17.96 +    {
   17.97 +        DPRINTK("AP %d bringup failed in boot_vcpu %x.\n", vcpuid, rc);
   17.98 +        return rc;
   17.99      }
  17.100  
  17.101 +    if ( test_and_clear_bit(_VCPUF_down, &d->vcpu[vcpuid]->vcpu_flags) )
  17.102 +        vcpu_wake(d->vcpu[vcpuid]);
  17.103 +    DPRINTK("AP %d bringup suceeded.\n", vcpuid);
  17.104 +
  17.105      xfree(ctxt);
  17.106  
  17.107      return rc;
    18.1 --- a/xen/arch/x86/hvm/i8259.c	Tue Sep 26 16:15:45 2006 -0600
    18.2 +++ b/xen/arch/x86/hvm/i8259.c	Tue Sep 26 19:11:33 2006 -0600
    18.3 @@ -447,6 +447,10 @@ static void pic_init1(int io_addr, int e
    18.4      ASSERT(spin_is_locked(&s->pics_state->lock));
    18.5  
    18.6      pic_reset(s);
    18.7 +
    18.8 +    /* XXX We set the ELCR to level triggered here, but that should
    18.9 +       really be done by the BIOS, and only for PCI IRQs. */
   18.10 +    s->elcr = 0xff & s->elcr_mask;
   18.11  }
   18.12  
   18.13  void pic_init(struct hvm_virpic *s, void (*irq_request)(void *, int),
   18.14 @@ -458,12 +462,12 @@ void pic_init(struct hvm_virpic *s, void
   18.15      spin_lock_init(&s->lock);
   18.16      s->pics[0].pics_state = s;
   18.17      s->pics[1].pics_state = s;
   18.18 +    s->pics[0].elcr_mask = 0xf8;
   18.19 +    s->pics[1].elcr_mask = 0xde;
   18.20      spin_lock_irqsave(&s->lock, flags);
   18.21      pic_init1(0x20, 0x4d0, &s->pics[0]);
   18.22      pic_init1(0xa0, 0x4d1, &s->pics[1]);
   18.23      spin_unlock_irqrestore(&s->lock, flags);
   18.24 -    s->pics[0].elcr_mask = 0xf8;
   18.25 -    s->pics[1].elcr_mask = 0xde;
   18.26      s->irq_request = irq_request;
   18.27      s->irq_request_opaque = irq_request_opaque;
   18.28  }
    19.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
    19.2 +++ b/xen/arch/x86/hvm/instrlen.c	Tue Sep 26 19:11:33 2006 -0600
    19.3 @@ -0,0 +1,474 @@
    19.4 +/*
    19.5 + * instrlen.c - calculates the instruction length for all operating modes
    19.6 + * 
    19.7 + * Travis Betak, travis.betak@amd.com
    19.8 + * Copyright (c) 2005,2006 AMD
    19.9 + * Copyright (c) 2005 Keir Fraser
   19.10 + *
   19.11 + * Essentially a very, very stripped version of Keir Fraser's work in
   19.12 + * x86_emulate.c.  Used for MMIO.
   19.13 + */
   19.14 +
   19.15 +/*
   19.16 + * TODO: The way in which we use hvm_instruction_length is very inefficient as
   19.17 + * it now stands. It will be worthwhile to return the actual instruction buffer
   19.18 + * along with the instruction length since one of the reasons we are getting
   19.19 + * the instruction length is to know how many instruction bytes we need to
   19.20 + * fetch.
   19.21 + */
   19.22 +
   19.23 +#include <xen/config.h>
   19.24 +#include <xen/sched.h>
   19.25 +#include <xen/mm.h>
   19.26 +#include <asm/regs.h>
   19.27 +#include <asm-x86/x86_emulate.h>
   19.28 +
   19.29 +/* read from guest memory */
   19.30 +extern int inst_copy_from_guest(unsigned char *buf, unsigned long eip,
   19.31 +        int length);
   19.32 +
   19.33 +/*
   19.34 + * Opcode effective-address decode tables.
   19.35 + * Note that we only emulate instructions that have at least one memory
   19.36 + * operand (excluding implicit stack references). We assume that stack
   19.37 + * references and instruction fetches will never occur in special memory
   19.38 + * areas that require emulation. So, for example, 'mov <imm>,<reg>' need
   19.39 + * not be handled.
   19.40 + */
   19.41 +
   19.42 +/* Operand sizes: 8-bit operands or specified/overridden size. */
   19.43 +#define ByteOp      (1<<0) /* 8-bit operands. */
   19.44 +/* Destination operand type. */
   19.45 +#define ImplicitOps (1<<1) /* Implicit in opcode. No generic decode. */
   19.46 +#define DstReg      (2<<1) /* Register operand. */
   19.47 +#define DstMem      (3<<1) /* Memory operand. */
   19.48 +#define DstMask     (3<<1)
   19.49 +/* Source operand type. */
   19.50 +#define SrcNone     (0<<3) /* No source operand. */
   19.51 +#define SrcImplicit (0<<3) /* Source operand is implicit in the opcode. */
   19.52 +#define SrcReg      (1<<3) /* Register operand. */
   19.53 +#define SrcMem      (2<<3) /* Memory operand. */
   19.54 +#define SrcMem16    (3<<3) /* Memory operand (16-bit). */
   19.55 +#define SrcMem32    (4<<3) /* Memory operand (32-bit). */
   19.56 +#define SrcImm      (5<<3) /* Immediate operand. */
   19.57 +#define SrcImmByte  (6<<3) /* 8-bit sign-extended immediate operand. */
   19.58 +#define SrcMask     (7<<3)
   19.59 +/* Generic ModRM decode. */
   19.60 +#define ModRM       (1<<6)
   19.61 +/* Destination is only written; never read. */
   19.62 +#define Mov         (1<<7)
   19.63 +
   19.64 +static uint8_t opcode_table[256] = {
   19.65 +    /* 0x00 - 0x07 */
   19.66 +    ByteOp|DstMem|SrcReg|ModRM, DstMem|SrcReg|ModRM,
   19.67 +    ByteOp|DstReg|SrcMem|ModRM, DstReg|SrcMem|ModRM,
   19.68 +    0, 0, 0, 0,
   19.69 +    /* 0x08 - 0x0F */
   19.70 +    ByteOp|DstMem|SrcReg|ModRM, DstMem|SrcReg|ModRM,
   19.71 +    ByteOp|DstReg|SrcMem|ModRM, DstReg|SrcMem|ModRM,
   19.72 +    0, 0, 0, 0,
   19.73 +    /* 0x10 - 0x17 */
   19.74 +    ByteOp|DstMem|SrcReg|ModRM, DstMem|SrcReg|ModRM,
   19.75 +    ByteOp|DstReg|SrcMem|ModRM, DstReg|SrcMem|ModRM,
   19.76 +    0, 0, 0, 0,
   19.77 +    /* 0x18 - 0x1F */
   19.78 +    ByteOp|DstMem|SrcReg|ModRM, DstMem|SrcReg|ModRM,
   19.79 +    ByteOp|DstReg|SrcMem|ModRM, DstReg|SrcMem|ModRM,
   19.80 +    0, 0, 0, 0,
   19.81 +    /* 0x20 - 0x27 */
   19.82 +    ByteOp|DstMem|SrcReg|ModRM, DstMem|SrcReg|ModRM,
   19.83 +    ByteOp|DstReg|SrcMem|ModRM, DstReg|SrcMem|ModRM,
   19.84 +    0, 0, 0, 0,
   19.85 +    /* 0x28 - 0x2F */
   19.86 +    ByteOp|DstMem|SrcReg|ModRM, DstMem|SrcReg|ModRM,
   19.87 +    ByteOp|DstReg|SrcMem|ModRM, DstReg|SrcMem|ModRM,
   19.88 +    0, 0, 0, 0,
   19.89 +    /* 0x30 - 0x37 */
   19.90 +    ByteOp|DstMem|SrcReg|ModRM, DstMem|SrcReg|ModRM,
   19.91 +    ByteOp|DstReg|SrcMem|ModRM, DstReg|SrcMem|ModRM,
   19.92 +    0, 0, 0, 0,
   19.93 +    /* 0x38 - 0x3F */
   19.94 +    ByteOp|DstMem|SrcReg|ModRM, DstMem|SrcReg|ModRM,
   19.95 +    ByteOp|DstReg|SrcMem|ModRM, DstReg|SrcMem|ModRM,
   19.96 +    0, 0, 0, 0,
   19.97 +    /* 0x40 - 0x4F */
   19.98 +    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
   19.99 +    /* 0x50 - 0x5F */
  19.100 +    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
  19.101 +    /* 0x60 - 0x6F */
  19.102 +    0, 0, 0, DstReg|SrcMem32|ModRM|Mov /* movsxd (x86/64) */,
  19.103 +    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
  19.104 +    /* 0x70 - 0x7F */
  19.105 +    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
  19.106 +    /* 0x80 - 0x87 */
  19.107 +    ByteOp|DstMem|SrcImm|ModRM, DstMem|SrcImm|ModRM,
  19.108 +    ByteOp|DstMem|SrcImm|ModRM, DstMem|SrcImmByte|ModRM,
  19.109 +    ByteOp|DstMem|SrcReg|ModRM, DstMem|SrcReg|ModRM,
  19.110 +    ByteOp|DstMem|SrcReg|ModRM, DstMem|SrcReg|ModRM,
  19.111 +    /* 0x88 - 0x8F */
  19.112 +    ByteOp|DstMem|SrcReg|ModRM, DstMem|SrcReg|ModRM,
  19.113 +    ByteOp|DstReg|SrcMem|ModRM, DstReg|SrcMem|ModRM,
  19.114 +    0, 0, 0, DstMem|SrcNone|ModRM|Mov,
  19.115 +    /* 0x90 - 0x9F */
  19.116 +    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
  19.117 +    /* 0xA0 - 0xA7 */
  19.118 +    ByteOp|DstReg|SrcMem|Mov, DstReg|SrcMem|Mov,
  19.119 +    ByteOp|DstMem|SrcReg|Mov, DstMem|SrcReg|Mov,
  19.120 +    ByteOp|ImplicitOps|Mov, ImplicitOps|Mov,
  19.121 +    ByteOp|ImplicitOps, ImplicitOps,
  19.122 +    /* 0xA8 - 0xAF */
  19.123 +    0, 0, ByteOp|ImplicitOps|Mov, ImplicitOps|Mov,
  19.124 +    ByteOp|ImplicitOps|Mov, ImplicitOps|Mov,
  19.125 +    ByteOp|ImplicitOps, ImplicitOps,
  19.126 +    /* 0xB0 - 0xBF */
  19.127 +    SrcImmByte, SrcImmByte, SrcImmByte, SrcImmByte, 
  19.128 +    SrcImmByte, SrcImmByte, SrcImmByte, SrcImmByte, 
  19.129 +    0, 0, 0, 0, 0, 0, 0, 0,
  19.130 +    /* 0xC0 - 0xC7 */
  19.131 +    ByteOp|DstMem|SrcImm|ModRM, DstMem|SrcImmByte|ModRM, 0, 0,
  19.132 +    0, 0, ByteOp|DstMem|SrcImm|ModRM, DstMem|SrcImm|ModRM,
  19.133 +    /* 0xC8 - 0xCF */
  19.134 +    0, 0, 0, 0, 0, 0, 0, 0,
  19.135 +    /* 0xD0 - 0xD7 */
  19.136 +    ByteOp|DstMem|SrcImplicit|ModRM, DstMem|SrcImplicit|ModRM, 
  19.137 +    ByteOp|DstMem|SrcImplicit|ModRM, DstMem|SrcImplicit|ModRM, 
  19.138 +    0, 0, 0, 0,
  19.139 +    /* 0xD8 - 0xDF */
  19.140 +    0, 0, 0, 0, 0, 0, 0, 0,
  19.141 +    /* 0xE0 - 0xEF */
  19.142 +    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
  19.143 +    /* 0xF0 - 0xF7 */
  19.144 +    0, 0, 0, 0,
  19.145 +    0, 0, ByteOp|DstMem|SrcNone|ModRM, DstMem|SrcNone|ModRM,
  19.146 +    /* 0xF8 - 0xFF */
  19.147 +    0, 0, 0, 0,
  19.148 +    0, 0, ByteOp|DstMem|SrcNone|ModRM, DstMem|SrcNone|ModRM
  19.149 +};
  19.150 +
  19.151 +static uint8_t twobyte_table[256] = {
  19.152 +    /* 0x00 - 0x0F */
  19.153 +    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ImplicitOps|ModRM, 0, 0,
  19.154 +    /* 0x10 - 0x1F */
  19.155 +    0, 0, 0, 0, 0, 0, 0, 0, ImplicitOps|ModRM, 0, 0, 0, 0, 0, 0, 0,
  19.156 +    /* 0x20 - 0x2F */
  19.157 +    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
  19.158 +    /* 0x30 - 0x3F */
  19.159 +    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
  19.160 +    /* 0x40 - 0x47 */
  19.161 +    DstReg|SrcMem|ModRM|Mov, DstReg|SrcMem|ModRM|Mov,
  19.162 +    DstReg|SrcMem|ModRM|Mov, DstReg|SrcMem|ModRM|Mov,
  19.163 +    DstReg|SrcMem|ModRM|Mov, DstReg|SrcMem|ModRM|Mov,
  19.164 +    DstReg|SrcMem|ModRM|Mov, DstReg|SrcMem|ModRM|Mov,
  19.165 +    /* 0x48 - 0x4F */
  19.166 +    DstReg|SrcMem|ModRM|Mov, DstReg|SrcMem|ModRM|Mov,
  19.167 +    DstReg|SrcMem|ModRM|Mov, DstReg|SrcMem|ModRM|Mov,
  19.168 +    DstReg|SrcMem|ModRM|Mov, DstReg|SrcMem|ModRM|Mov,
  19.169 +    DstReg|SrcMem|ModRM|Mov, DstReg|SrcMem|ModRM|Mov,
  19.170 +    /* 0x50 - 0x5F */
  19.171 +    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
  19.172 +    /* 0x60 - 0x6F */
  19.173 +    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
  19.174 +    /* 0x70 - 0x7F */
  19.175 +    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
  19.176 +    /* 0x80 - 0x8F */
  19.177 +    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
  19.178 +    /* 0x90 - 0x9F */
  19.179 +    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
  19.180 +    /* 0xA0 - 0xA7 */
  19.181 +    0, 0, 0, DstMem|SrcReg|ModRM, 0, 0, 0, 0, 
  19.182 +    /* 0xA8 - 0xAF */
  19.183 +    0, 0, 0, DstMem|SrcReg|ModRM, 0, 0, 0, 0,
  19.184 +    /* 0xB0 - 0xB7 */
  19.185 +    ByteOp|DstMem|SrcReg|ModRM, DstMem|SrcReg|ModRM, 0, DstMem|SrcReg|ModRM,
  19.186 +    0, 0, ByteOp|DstReg|SrcMem|ModRM|Mov, DstReg|SrcMem16|ModRM|Mov,
  19.187 +    /* 0xB8 - 0xBF */
  19.188 +    0, 0, DstMem|SrcImmByte|ModRM, DstMem|SrcReg|ModRM,
  19.189 +    0, 0, ByteOp|DstReg|SrcMem|ModRM|Mov, DstReg|SrcMem16|ModRM|Mov,
  19.190 +    /* 0xC0 - 0xCF */
  19.191 +    0, 0, 0, 0, 0, 0, 0, ImplicitOps|ModRM, 0, 0, 0, 0, 0, 0, 0, 0,
  19.192 +    /* 0xD0 - 0xDF */
  19.193 +    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
  19.194 +    /* 0xE0 - 0xEF */
  19.195 +    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
  19.196 +    /* 0xF0 - 0xFF */
  19.197 +    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
  19.198 +};
  19.199 +
  19.200 +/* 
  19.201 + * insn_fetch - fetch the next 1 to 4 bytes from instruction stream 
  19.202 + * 
  19.203 + * @_type:   u8, u16, u32, s8, s16, or s32
  19.204 + * @_size:   1, 2, or 4 bytes
  19.205 + * @_eip:    address to fetch from guest memory
  19.206 + * @_length: increments the current instruction length counter by _size
  19.207 + *
  19.208 + * This is used internally by hvm_instruction_length to fetch the next byte,
  19.209 + * word, or dword from guest memory at location _eip.  we currently use a local
  19.210 + * unsigned long as the storage buffer since the most bytes we're gonna get
  19.211 + * is limited to 4.
  19.212 + */
  19.213 +#define insn_fetch(_type, _size, _eip, _length)                         \
  19.214 +({  unsigned long _x;                                                   \
  19.215 +        if ((rc = inst_copy_from_guest((unsigned char *)(&(_x)),        \
  19.216 +                (unsigned long)(_eip), _size))                          \
  19.217 +                    != _size)                                           \
  19.218 +        goto done;                                                      \
  19.219 +    (_eip) += (_size);                                                  \
  19.220 +    (_length) += (_size);                                               \
  19.221 +    (_type)_x;                                                          \
  19.222 +})
  19.223 +
  19.224 +/**
  19.225 + * hvm_instruction_length - returns the current instructions length
  19.226 + *
  19.227 + * @regs: guest register state
  19.228 + * @mode: guest operating mode
  19.229 + *
  19.230 + * EXTERNAL this routine calculates the length of the current instruction
  19.231 + * pointed to by eip.  The guest state is _not_ changed by this routine.
  19.232 + */
  19.233 +int hvm_instruction_length(struct cpu_user_regs *regs, int mode)
  19.234 +{
  19.235 +    uint8_t b, d, twobyte = 0, rex_prefix = 0;
  19.236 +    uint8_t modrm, modrm_mod = 0, modrm_reg = 0, modrm_rm = 0;
  19.237 +    unsigned int op_bytes, ad_bytes, lock_prefix = 0, rep_prefix = 0, i;
  19.238 +    int rc = 0;
  19.239 +    int length = 0;
  19.240 +    unsigned int tmp;
  19.241 +
  19.242 +    /* Shadow copy of register state. Committed on successful emulation. */
  19.243 +    struct cpu_user_regs _regs = *regs;
  19.244 +
  19.245 +    /* include CS for 16-bit modes */
  19.246 +    if (mode == X86EMUL_MODE_REAL || mode == X86EMUL_MODE_PROT16)
  19.247 +        _regs.eip += (_regs.cs << 4);
  19.248 +
  19.249 +    switch ( mode )
  19.250 +    {
  19.251 +    case X86EMUL_MODE_REAL:
  19.252 +    case X86EMUL_MODE_PROT16:
  19.253 +        op_bytes = ad_bytes = 2;
  19.254 +        break;
  19.255 +    case X86EMUL_MODE_PROT32:
  19.256 +        op_bytes = ad_bytes = 4;
  19.257 +        break;
  19.258 +#ifdef __x86_64__
  19.259 +    case X86EMUL_MODE_PROT64:
  19.260 +        op_bytes = 4;
  19.261 +        ad_bytes = 8;
  19.262 +        break;
  19.263 +#endif
  19.264 +    default:
  19.265 +        return -1;
  19.266 +    }
  19.267 +
  19.268 +    /* Legacy prefixes. */
  19.269 +    for ( i = 0; i < 8; i++ )
  19.270 +    {
  19.271 +        switch ( b = insn_fetch(uint8_t, 1, _regs.eip, length) )
  19.272 +        {
  19.273 +        case 0x66: /* operand-size override */
  19.274 +            op_bytes ^= 6;      /* switch between 2/4 bytes */
  19.275 +            break;
  19.276 +        case 0x67: /* address-size override */
  19.277 +            if ( mode == X86EMUL_MODE_PROT64 )
  19.278 +                ad_bytes ^= 12; /* switch between 4/8 bytes */
  19.279 +            else
  19.280 +                ad_bytes ^= 6;  /* switch between 2/4 bytes */
  19.281 +            break;
  19.282 +        case 0x2e: /* CS override */
  19.283 +        case 0x3e: /* DS override */
  19.284 +        case 0x26: /* ES override */
  19.285 +        case 0x64: /* FS override */
  19.286 +        case 0x65: /* GS override */
  19.287 +        case 0x36: /* SS override */
  19.288 +            break;
  19.289 +        case 0xf0: /* LOCK */
  19.290 +            lock_prefix = 1;
  19.291 +            break;
  19.292 +        case 0xf3: /* REP/REPE/REPZ */
  19.293 +            rep_prefix = 1;
  19.294 +            break;
  19.295 +        case 0xf2: /* REPNE/REPNZ */
  19.296 +            break;
  19.297 +        default:
  19.298 +            goto done_prefixes;
  19.299 +        }
  19.300 +    }
  19.301 +done_prefixes:
  19.302 +
  19.303 +    /* Note quite the same as 80386 real mode, but hopefully good enough. */
  19.304 +    if ( (mode == X86EMUL_MODE_REAL) && (ad_bytes != 2) ) {
  19.305 +        printf("sonofabitch!! we don't support 32-bit addresses in realmode\n");
  19.306 +        goto cannot_emulate;
  19.307 +    }
  19.308 +
  19.309 +    /* REX prefix. */
  19.310 +    if ( (mode == X86EMUL_MODE_PROT64) && ((b & 0xf0) == 0x40) )
  19.311 +    {
  19.312 +        rex_prefix = b;
  19.313 +        if ( b & 8 )
  19.314 +            op_bytes = 8;          /* REX.W */
  19.315 +        modrm_reg = (b & 4) << 1;  /* REX.R */
  19.316 +        /* REX.B and REX.X do not need to be decoded. */
  19.317 +        b = insn_fetch(uint8_t, 1, _regs.eip, length);
  19.318 +    }
  19.319 +
  19.320 +    /* Opcode byte(s). */
  19.321 +    d = opcode_table[b];
  19.322 +    if ( d == 0 )
  19.323 +    {
  19.324 +        /* Two-byte opcode? */
  19.325 +        if ( b == 0x0f )
  19.326 +        {
  19.327 +            twobyte = 1;
  19.328 +            b = insn_fetch(uint8_t, 1, _regs.eip, length);
  19.329 +            d = twobyte_table[b];
  19.330 +        }
  19.331 +
  19.332 +        /* Unrecognised? */
  19.333 +        if ( d == 0 )
  19.334 +            goto cannot_emulate;
  19.335 +    }
  19.336 +
  19.337 +    /* ModRM and SIB bytes. */
  19.338 +    if ( d & ModRM )
  19.339 +    {
  19.340 +        modrm = insn_fetch(uint8_t, 1, _regs.eip, length);
  19.341 +        modrm_mod |= (modrm & 0xc0) >> 6;
  19.342 +        modrm_reg |= (modrm & 0x38) >> 3;
  19.343 +        modrm_rm  |= (modrm & 0x07);
  19.344 +
  19.345 +        if ( modrm_mod == 3 )
  19.346 +        {
  19.347 +            DPRINTK("Cannot parse ModRM.mod == 3.\n");
  19.348 +            goto cannot_emulate;
  19.349 +        }
  19.350 +
  19.351 +        if ( ad_bytes == 2 )
  19.352 +        {
  19.353 +            /* 16-bit ModR/M decode. */
  19.354 +            switch ( modrm_mod )
  19.355 +            {
  19.356 +            case 0:
  19.357 +                if ( modrm_rm == 6 ) 
  19.358 +                {
  19.359 +                    length += 2;
  19.360 +                    _regs.eip += 2; /* skip disp16 */
  19.361 +                }
  19.362 +                break;
  19.363 +            case 1:
  19.364 +                length += 1;
  19.365 +                _regs.eip += 1; /* skip disp8 */
  19.366 +                break;
  19.367 +            case 2:
  19.368 +                length += 2;
  19.369 +                _regs.eip += 2; /* skip disp16 */
  19.370 +                break;
  19.371 +            }
  19.372 +        }
  19.373 +        else
  19.374 +        {
  19.375 +            /* 32/64-bit ModR/M decode. */
  19.376 +            switch ( modrm_mod )
  19.377 +            {
  19.378 +            case 0:
  19.379 +                if ( (modrm_rm == 4) && 
  19.380 +                     (((insn_fetch(uint8_t, 1, _regs.eip, length)) & 7) 
  19.381 +                        == 5) )
  19.382 +                {
  19.383 +                    length += 4;
  19.384 +                    _regs.eip += 4; /* skip disp32 specified by SIB.base */
  19.385 +                }
  19.386 +                else if ( modrm_rm == 5 )
  19.387 +                {
  19.388 +                    length += 4;
  19.389 +                    _regs.eip += 4; /* skip disp32 */
  19.390 +                }
  19.391 +                break;
  19.392 +            case 1:
  19.393 +                if ( modrm_rm == 4 )
  19.394 +                {
  19.395 +                    insn_fetch(uint8_t, 1, _regs.eip, length);
  19.396 +                }
  19.397 +                length += 1;
  19.398 +                _regs.eip += 1; /* skip disp8 */
  19.399 +                break;
  19.400 +            case 2:
  19.401 +                if ( modrm_rm == 4 )
  19.402 +                {
  19.403 +                    insn_fetch(uint8_t, 1, _regs.eip, length);
  19.404 +                }
  19.405 +                length += 4;
  19.406 +                _regs.eip += 4; /* skip disp32 */
  19.407 +                break;
  19.408 +            }
  19.409 +        }
  19.410 +    }
  19.411 +
  19.412 +    /* Decode and fetch the destination operand: register or memory. */
  19.413 +    switch ( d & DstMask )
  19.414 +    {
  19.415 +    case ImplicitOps:
  19.416 +        /* Special instructions do their own operand decoding. */
  19.417 +        goto done;
  19.418 +    }
  19.419 +
  19.420 +    /* Decode and fetch the source operand: register, memory or immediate. */
  19.421 +    switch ( d & SrcMask )
  19.422 +    {
  19.423 +    case SrcImm:
  19.424 +        tmp = (d & ByteOp) ? 1 : op_bytes;
  19.425 +        if ( tmp == 8 ) tmp = 4;
  19.426 +        /* NB. Immediates are sign-extended as necessary. */
  19.427 +        switch ( tmp )
  19.428 +        {
  19.429 +        case 1: insn_fetch(int8_t,  1, _regs.eip, length); break;
  19.430 +        case 2: insn_fetch(int16_t, 2, _regs.eip, length); break;
  19.431 +        case 4: insn_fetch(int32_t, 4, _regs.eip, length); break;
  19.432 +        }
  19.433 +        break;
  19.434 +    case SrcImmByte:
  19.435 +        insn_fetch(int8_t,  1, _regs.eip, length);
  19.436 +        break;
  19.437 +    }
  19.438 +
  19.439 +    if ( twobyte )
  19.440 +        goto done;
  19.441 +
  19.442 +    switch ( b )
  19.443 +    {
  19.444 +    case 0xa0 ... 0xa1: /* mov */
  19.445 +        length += ad_bytes;
  19.446 +        _regs.eip += ad_bytes; /* skip src displacement */
  19.447 +        break;
  19.448 +    case 0xa2 ... 0xa3: /* mov */
  19.449 +        length += ad_bytes;
  19.450 +        _regs.eip += ad_bytes; /* skip dst displacement */
  19.451 +        break;
  19.452 +    case 0xf6 ... 0xf7: /* Grp3 */
  19.453 +        switch ( modrm_reg )
  19.454 +        {
  19.455 +        case 0 ... 1: /* test */
  19.456 +            /* Special case in Grp3: test has an immediate source operand. */
  19.457 +            tmp = (d & ByteOp) ? 1 : op_bytes;
  19.458 +            if ( tmp == 8 ) tmp = 4;
  19.459 +            switch ( tmp )
  19.460 +            {
  19.461 +            case 1: insn_fetch(int8_t,  1, _regs.eip, length); break;
  19.462 +            case 2: insn_fetch(int16_t, 2, _regs.eip, length); break;
  19.463 +            case 4: insn_fetch(int32_t, 4, _regs.eip, length); break;
  19.464 +            }
  19.465 +            goto done;
  19.466 +        }
  19.467 +        break;
  19.468 +    }
  19.469 +
  19.470 +done:
  19.471 +    return length;
  19.472 +
  19.473 +cannot_emulate:
  19.474 +    DPRINTK("Cannot emulate %02x at address %lx (eip %lx, mode %d)\n",
  19.475 +            b, (unsigned long)_regs.eip, (unsigned long)regs->eip, mode);
  19.476 +    return -1;
  19.477 +}
    20.1 --- a/xen/arch/x86/hvm/platform.c	Tue Sep 26 16:15:45 2006 -0600
    20.2 +++ b/xen/arch/x86/hvm/platform.c	Tue Sep 26 19:11:33 2006 -0600
    20.3 @@ -52,7 +52,7 @@ static inline long __get_reg_value(unsig
    20.4      case QUAD:
    20.5          return (long)(reg);
    20.6      default:
    20.7 -        printf("Error: (__get_reg_value) Invalid reg size\n");
    20.8 +        printk("Error: (__get_reg_value) Invalid reg size\n");
    20.9          domain_crash_synchronous();
   20.10      }
   20.11  }
   20.12 @@ -78,7 +78,7 @@ long get_reg_value(int size, int index, 
   20.13          case 7: /* %bh */
   20.14              return (char)((regs->rbx & 0xFF00) >> 8);
   20.15          default:
   20.16 -            printf("Error: (get_reg_value) Invalid index value\n");
   20.17 +            printk("Error: (get_reg_value) Invalid index value\n");
   20.18              domain_crash_synchronous();
   20.19          }
   20.20          /* NOTREACHED */
   20.21 @@ -102,7 +102,7 @@ long get_reg_value(int size, int index, 
   20.22      case 14: return __get_reg_value(regs->r14, size);
   20.23      case 15: return __get_reg_value(regs->r15, size);
   20.24      default:
   20.25 -        printf("Error: (get_reg_value) Invalid index value\n");
   20.26 +        printk("Error: (get_reg_value) Invalid index value\n");
   20.27          domain_crash_synchronous();
   20.28      }
   20.29  }
   20.30 @@ -115,7 +115,7 @@ static inline long __get_reg_value(unsig
   20.31      case LONG:
   20.32          return (int)(reg & 0xFFFFFFFF);
   20.33      default:
   20.34 -        printf("Error: (__get_reg_value) Invalid reg size\n");
   20.35 +        printk("Error: (__get_reg_value) Invalid reg size\n");
   20.36          domain_crash_synchronous();
   20.37      }
   20.38  }
   20.39 @@ -141,7 +141,7 @@ long get_reg_value(int size, int index, 
   20.40          case 7: /* %bh */
   20.41              return (char)((regs->ebx & 0xFF00) >> 8);
   20.42          default:
   20.43 -            printf("Error: (get_reg_value) Invalid index value\n");
   20.44 +            printk("Error: (get_reg_value) Invalid index value\n");
   20.45              domain_crash_synchronous();
   20.46          }
   20.47      }
   20.48 @@ -156,7 +156,7 @@ long get_reg_value(int size, int index, 
   20.49      case 6: return __get_reg_value(regs->esi, size);
   20.50      case 7: return __get_reg_value(regs->edi, size);
   20.51      default:
   20.52 -        printf("Error: (get_reg_value) Invalid index value\n");
   20.53 +        printk("Error: (get_reg_value) Invalid index value\n");
   20.54          domain_crash_synchronous();
   20.55      }
   20.56  }
   20.57 @@ -464,7 +464,7 @@ static int hvm_decode(int realmode, unsi
   20.58                      return DECODE_success;
   20.59  
   20.60                  default:
   20.61 -                    printf("%x/%x, This opcode isn't handled yet!\n",
   20.62 +                    printk("%x/%x, This opcode isn't handled yet!\n",
   20.63                             *opcode, ins_subtype);
   20.64                      return DECODE_failure;
   20.65              }
   20.66 @@ -614,7 +614,7 @@ static int hvm_decode(int realmode, unsi
   20.67          break;
   20.68  
   20.69      default:
   20.70 -        printf("%x, This opcode isn't handled yet!\n", *opcode);
   20.71 +        printk("%x, This opcode isn't handled yet!\n", *opcode);
   20.72          return DECODE_failure;
   20.73      }
   20.74  
   20.75 @@ -675,12 +675,12 @@ static int hvm_decode(int realmode, unsi
   20.76          }
   20.77          else
   20.78          {
   20.79 -            printf("0f %x, This opcode subtype isn't handled yet\n", *opcode);
   20.80 +            printk("0f %x, This opcode subtype isn't handled yet\n", *opcode);
   20.81              return DECODE_failure;
   20.82          }
   20.83  
   20.84      default:
   20.85 -        printf("0f %x, This opcode isn't handled yet\n", *opcode);
   20.86 +        printk("0f %x, This opcode isn't handled yet\n", *opcode);
   20.87          return DECODE_failure;
   20.88      }
   20.89  }
   20.90 @@ -702,7 +702,7 @@ static void hvm_send_assist_req(struct v
   20.91      if ( unlikely(p->state != STATE_INVALID) ) {
   20.92          /* This indicates a bug in the device model.  Crash the
   20.93             domain. */
   20.94 -        printf("Device model set bad IO state %d.\n", p->state);
   20.95 +        printk("Device model set bad IO state %d.\n", p->state);
   20.96          domain_crash(v->domain);
   20.97          return;
   20.98      }
   20.99 @@ -733,7 +733,7 @@ void send_pio_req(struct cpu_user_regs *
  20.100  
  20.101      p = &vio->vp_ioreq;
  20.102      if ( p->state != STATE_INVALID )
  20.103 -        printf("WARNING: send pio with something already pending (%d)?\n",
  20.104 +        printk("WARNING: send pio with something already pending (%d)?\n",
  20.105                 p->state);
  20.106      p->dir = dir;
  20.107      p->pdata_valid = pvalid;
  20.108 @@ -776,14 +776,14 @@ void send_mmio_req(
  20.109  
  20.110      vio = get_vio(v->domain, v->vcpu_id);
  20.111      if (vio == NULL) {
  20.112 -        printf("bad shared page\n");
  20.113 +        printk("bad shared page\n");
  20.114          domain_crash_synchronous();
  20.115      }
  20.116  
  20.117      p = &vio->vp_ioreq;
  20.118  
  20.119      if ( p->state != STATE_INVALID )
  20.120 -        printf("WARNING: send mmio with something already pending (%d)?\n",
  20.121 +        printk("WARNING: send mmio with something already pending (%d)?\n",
  20.122                 p->state);
  20.123      p->dir = dir;
  20.124      p->pdata_valid = pvalid;
  20.125 @@ -841,7 +841,7 @@ static void mmio_operands(int type, unsi
  20.126          else
  20.127              send_mmio_req(type, gpa, 1, inst->op_size, 0, IOREQ_READ, 0);
  20.128      } else {
  20.129 -        printf("mmio_operands: invalid operand\n");
  20.130 +        printk("mmio_operands: invalid operand\n");
  20.131          domain_crash_synchronous();
  20.132      }
  20.133  }
  20.134 @@ -866,8 +866,10 @@ void handle_mmio(unsigned long va, unsig
  20.135      memcpy(regs, guest_cpu_user_regs(), HVM_CONTEXT_STACK_BYTES);
  20.136      hvm_store_cpu_guest_regs(v, regs, NULL);
  20.137  
  20.138 -    if ((inst_len = hvm_instruction_length(v)) <= 0) {
  20.139 -        printf("handle_mmio: failed to get instruction length\n");
  20.140 +    inst_len = hvm_instruction_length(regs, hvm_guest_x86_mode(v));
  20.141 +    if ( inst_len <= 0 )
  20.142 +    {
  20.143 +        printk("handle_mmio: failed to get instruction length\n");
  20.144          domain_crash_synchronous();
  20.145      }
  20.146  
  20.147 @@ -880,19 +882,19 @@ void handle_mmio(unsigned long va, unsig
  20.148      memset(inst, 0, MAX_INST_LEN);
  20.149      ret = inst_copy_from_guest(inst, inst_addr, inst_len);
  20.150      if (ret != inst_len) {
  20.151 -        printf("handle_mmio: failed to copy instruction\n");
  20.152 +        printk("handle_mmio: failed to copy instruction\n");
  20.153          domain_crash_synchronous();
  20.154      }
  20.155  
  20.156      init_instruction(&mmio_inst);
  20.157  
  20.158      if (hvm_decode(realmode, inst, &mmio_inst) == DECODE_failure) {
  20.159 -        printf("handle_mmio: failed to decode instruction\n");
  20.160 -        printf("mmio opcode: va 0x%lx, gpa 0x%lx, len %d:",
  20.161 +        printk("handle_mmio: failed to decode instruction\n");
  20.162 +        printk("mmio opcode: va 0x%lx, gpa 0x%lx, len %d:",
  20.163                 va, gpa, inst_len);
  20.164          for (i = 0; i < inst_len; i++)
  20.165 -            printf(" %02x", inst[i] & 0xFF);
  20.166 -        printf("\n");
  20.167 +            printk(" %02x", inst[i] & 0xFF);
  20.168 +        printk("\n");
  20.169          domain_crash_synchronous();
  20.170      }
  20.171  
  20.172 @@ -1073,7 +1075,7 @@ void handle_mmio(unsigned long va, unsig
  20.173          break;
  20.174  
  20.175      default:
  20.176 -        printf("Unhandled MMIO instruction\n");
  20.177 +        printk("Unhandled MMIO instruction\n");
  20.178          domain_crash_synchronous();
  20.179      }
  20.180  }
    21.1 --- a/xen/arch/x86/hvm/svm/Makefile	Tue Sep 26 16:15:45 2006 -0600
    21.2 +++ b/xen/arch/x86/hvm/svm/Makefile	Tue Sep 26 19:11:33 2006 -0600
    21.3 @@ -2,7 +2,6 @@ subdir-$(x86_32) += x86_32
    21.4  subdir-$(x86_64) += x86_64
    21.5  
    21.6  obj-y += emulate.o
    21.7 -obj-y += instrlen.o
    21.8  obj-y += intr.o
    21.9  obj-y += svm.o
   21.10  obj-y += vmcb.o
    22.1 --- a/xen/arch/x86/hvm/svm/instrlen.c	Tue Sep 26 16:15:45 2006 -0600
    22.2 +++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
    22.3 @@ -1,479 +0,0 @@
    22.4 -/*
    22.5 - * instrlen.c - calculates the instruction length for all operating modes
    22.6 - * 
    22.7 - * Travis Betak, travis.betak@amd.com
    22.8 - * Copyright (c) 2005,2006 AMD
    22.9 - * Copyright (c) 2005 Keir Fraser
   22.10 - *
   22.11 - * Essentially a very, very stripped version of Keir Fraser's work in
   22.12 - * x86_emulate.c.  Used for MMIO.
   22.13 - */
   22.14 -
   22.15 -/*
   22.16 - * TODO: the way in which we use svm_instrlen is very inefficient as is now
   22.17 - * stands.  It will be worth while to return the actual instruction buffer
   22.18 - * along with the instruction length since one of the reasons we are getting
   22.19 - * the instruction length is to know how many instruction bytes we need to
   22.20 - * fetch.
   22.21 - */
   22.22 -
   22.23 -#include <xen/config.h>
   22.24 -#include <xen/types.h>
   22.25 -#include <xen/lib.h>
   22.26 -#include <xen/mm.h>
   22.27 -#include <asm/regs.h>
   22.28 -#define DPRINTF DPRINTK
   22.29 -#include <asm-x86/x86_emulate.h>
   22.30 -
   22.31 -/* read from guest memory */
   22.32 -extern int inst_copy_from_guest(unsigned char *buf, unsigned long eip,
   22.33 -        int length);
   22.34 -extern void svm_dump_inst(unsigned long eip);
   22.35 -
   22.36 -/*
   22.37 - * Opcode effective-address decode tables.
   22.38 - * Note that we only emulate instructions that have at least one memory
   22.39 - * operand (excluding implicit stack references). We assume that stack
   22.40 - * references and instruction fetches will never occur in special memory
   22.41 - * areas that require emulation. So, for example, 'mov <imm>,<reg>' need
   22.42 - * not be handled.
   22.43 - */
   22.44 -
   22.45 -/* Operand sizes: 8-bit operands or specified/overridden size. */
   22.46 -#define ByteOp      (1<<0) /* 8-bit operands. */
   22.47 -/* Destination operand type. */
   22.48 -#define ImplicitOps (1<<1) /* Implicit in opcode. No generic decode. */
   22.49 -#define DstReg      (2<<1) /* Register operand. */
   22.50 -#define DstMem      (3<<1) /* Memory operand. */
   22.51 -#define DstMask     (3<<1)
   22.52 -/* Source operand type. */
   22.53 -#define SrcNone     (0<<3) /* No source operand. */
   22.54 -#define SrcImplicit (0<<3) /* Source operand is implicit in the opcode. */
   22.55 -#define SrcReg      (1<<3) /* Register operand. */
   22.56 -#define SrcMem      (2<<3) /* Memory operand. */
   22.57 -#define SrcMem16    (3<<3) /* Memory operand (16-bit). */
   22.58 -#define SrcMem32    (4<<3) /* Memory operand (32-bit). */
   22.59 -#define SrcImm      (5<<3) /* Immediate operand. */
   22.60 -#define SrcImmByte  (6<<3) /* 8-bit sign-extended immediate operand. */
   22.61 -#define SrcMask     (7<<3)
   22.62 -/* Generic ModRM decode. */
   22.63 -#define ModRM       (1<<6)
   22.64 -/* Destination is only written; never read. */
   22.65 -#define Mov         (1<<7)
   22.66 -
   22.67 -static uint8_t opcode_table[256] = {
   22.68 -    /* 0x00 - 0x07 */
   22.69 -    ByteOp|DstMem|SrcReg|ModRM, DstMem|SrcReg|ModRM,
   22.70 -    ByteOp|DstReg|SrcMem|ModRM, DstReg|SrcMem|ModRM,
   22.71 -    0, 0, 0, 0,
   22.72 -    /* 0x08 - 0x0F */
   22.73 -    ByteOp|DstMem|SrcReg|ModRM, DstMem|SrcReg|ModRM,
   22.74 -    ByteOp|DstReg|SrcMem|ModRM, DstReg|SrcMem|ModRM,
   22.75 -    0, 0, 0, 0,
   22.76 -    /* 0x10 - 0x17 */
   22.77 -    ByteOp|DstMem|SrcReg|ModRM, DstMem|SrcReg|ModRM,
   22.78 -    ByteOp|DstReg|SrcMem|ModRM, DstReg|SrcMem|ModRM,
   22.79 -    0, 0, 0, 0,
   22.80 -    /* 0x18 - 0x1F */
   22.81 -    ByteOp|DstMem|SrcReg|ModRM, DstMem|SrcReg|ModRM,
   22.82 -    ByteOp|DstReg|SrcMem|ModRM, DstReg|SrcMem|ModRM,
   22.83 -    0, 0, 0, 0,
   22.84 -    /* 0x20 - 0x27 */
   22.85 -    ByteOp|DstMem|SrcReg|ModRM, DstMem|SrcReg|ModRM,
   22.86 -    ByteOp|DstReg|SrcMem|ModRM, DstReg|SrcMem|ModRM,
   22.87 -    0, 0, 0, 0,
   22.88 -    /* 0x28 - 0x2F */
   22.89 -    ByteOp|DstMem|SrcReg|ModRM, DstMem|SrcReg|ModRM,
   22.90 -    ByteOp|DstReg|SrcMem|ModRM, DstReg|SrcMem|ModRM,
   22.91 -    0, 0, 0, 0,
   22.92 -    /* 0x30 - 0x37 */
   22.93 -    ByteOp|DstMem|SrcReg|ModRM, DstMem|SrcReg|ModRM,
   22.94 -    ByteOp|DstReg|SrcMem|ModRM, DstReg|SrcMem|ModRM,
   22.95 -    0, 0, 0, 0,
   22.96 -    /* 0x38 - 0x3F */
   22.97 -    ByteOp|DstMem|SrcReg|ModRM, DstMem|SrcReg|ModRM,
   22.98 -    ByteOp|DstReg|SrcMem|ModRM, DstReg|SrcMem|ModRM,
   22.99 -    0, 0, 0, 0,
  22.100 -    /* 0x40 - 0x4F */
  22.101 -    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
  22.102 -    /* 0x50 - 0x5F */
  22.103 -    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
  22.104 -    /* 0x60 - 0x6F */
  22.105 -    0, 0, 0, DstReg|SrcMem32|ModRM|Mov /* movsxd (x86/64) */,
  22.106 -    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
  22.107 -    /* 0x70 - 0x7F */
  22.108 -    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
  22.109 -    /* 0x80 - 0x87 */
  22.110 -    ByteOp|DstMem|SrcImm|ModRM, DstMem|SrcImm|ModRM,
  22.111 -    ByteOp|DstMem|SrcImm|ModRM, DstMem|SrcImmByte|ModRM,
  22.112 -    ByteOp|DstMem|SrcReg|ModRM, DstMem|SrcReg|ModRM,
  22.113 -    ByteOp|DstMem|SrcReg|ModRM, DstMem|SrcReg|ModRM,
  22.114 -    /* 0x88 - 0x8F */
  22.115 -    ByteOp|DstMem|SrcReg|ModRM, DstMem|SrcReg|ModRM,
  22.116 -    ByteOp|DstReg|SrcMem|ModRM, DstReg|SrcMem|ModRM,
  22.117 -    0, 0, 0, DstMem|SrcNone|ModRM|Mov,
  22.118 -    /* 0x90 - 0x9F */
  22.119 -    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
  22.120 -    /* 0xA0 - 0xA7 */
  22.121 -    ByteOp|DstReg|SrcMem|Mov, DstReg|SrcMem|Mov,
  22.122 -    ByteOp|DstMem|SrcReg|Mov, DstMem|SrcReg|Mov,
  22.123 -    ByteOp|ImplicitOps|Mov, ImplicitOps|Mov,
  22.124 -    ByteOp|ImplicitOps, ImplicitOps,
  22.125 -    /* 0xA8 - 0xAF */
  22.126 -    0, 0, ByteOp|ImplicitOps|Mov, ImplicitOps|Mov,
  22.127 -    ByteOp|ImplicitOps|Mov, ImplicitOps|Mov,
  22.128 -    ByteOp|ImplicitOps, ImplicitOps,
  22.129 -    /* 0xB0 - 0xBF */
  22.130 -    SrcImmByte, SrcImmByte, SrcImmByte, SrcImmByte, 
  22.131 -    SrcImmByte, SrcImmByte, SrcImmByte, SrcImmByte, 
  22.132 -    0, 0, 0, 0, 0, 0, 0, 0,
  22.133 -    /* 0xC0 - 0xC7 */
  22.134 -    ByteOp|DstMem|SrcImm|ModRM, DstMem|SrcImmByte|ModRM, 0, 0,
  22.135 -    0, 0, ByteOp|DstMem|SrcImm|ModRM, DstMem|SrcImm|ModRM,
  22.136 -    /* 0xC8 - 0xCF */
  22.137 -    0, 0, 0, 0, 0, 0, 0, 0,
  22.138 -    /* 0xD0 - 0xD7 */
  22.139 -    ByteOp|DstMem|SrcImplicit|ModRM, DstMem|SrcImplicit|ModRM, 
  22.140 -    ByteOp|DstMem|SrcImplicit|ModRM, DstMem|SrcImplicit|ModRM, 
  22.141 -    0, 0, 0, 0,
  22.142 -    /* 0xD8 - 0xDF */
  22.143 -    0, 0, 0, 0, 0, 0, 0, 0,
  22.144 -    /* 0xE0 - 0xEF */
  22.145 -    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
  22.146 -    /* 0xF0 - 0xF7 */
  22.147 -    0, 0, 0, 0,
  22.148 -    0, 0, ByteOp|DstMem|SrcNone|ModRM, DstMem|SrcNone|ModRM,
  22.149 -    /* 0xF8 - 0xFF */
  22.150 -    0, 0, 0, 0,
  22.151 -    0, 0, ByteOp|DstMem|SrcNone|ModRM, DstMem|SrcNone|ModRM
  22.152 -};
  22.153 -
  22.154 -static uint8_t twobyte_table[256] = {
  22.155 -    /* 0x00 - 0x0F */
  22.156 -    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ImplicitOps|ModRM, 0, 0,
  22.157 -    /* 0x10 - 0x1F */
  22.158 -    0, 0, 0, 0, 0, 0, 0, 0, ImplicitOps|ModRM, 0, 0, 0, 0, 0, 0, 0,
  22.159 -    /* 0x20 - 0x2F */
  22.160 -    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
  22.161 -    /* 0x30 - 0x3F */
  22.162 -    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
  22.163 -    /* 0x40 - 0x47 */
  22.164 -    DstReg|SrcMem|ModRM|Mov, DstReg|SrcMem|ModRM|Mov,
  22.165 -    DstReg|SrcMem|ModRM|Mov, DstReg|SrcMem|ModRM|Mov,
  22.166 -    DstReg|SrcMem|ModRM|Mov, DstReg|SrcMem|ModRM|Mov,
  22.167 -    DstReg|SrcMem|ModRM|Mov, DstReg|SrcMem|ModRM|Mov,
  22.168 -    /* 0x48 - 0x4F */
  22.169 -    DstReg|SrcMem|ModRM|Mov, DstReg|SrcMem|ModRM|Mov,
  22.170 -    DstReg|SrcMem|ModRM|Mov, DstReg|SrcMem|ModRM|Mov,
  22.171 -    DstReg|SrcMem|ModRM|Mov, DstReg|SrcMem|ModRM|Mov,
  22.172 -    DstReg|SrcMem|ModRM|Mov, DstReg|SrcMem|ModRM|Mov,
  22.173 -    /* 0x50 - 0x5F */
  22.174 -    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
  22.175 -    /* 0x60 - 0x6F */
  22.176 -    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
  22.177 -    /* 0x70 - 0x7F */
  22.178 -    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
  22.179 -    /* 0x80 - 0x8F */
  22.180 -    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
  22.181 -    /* 0x90 - 0x9F */
  22.182 -    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
  22.183 -    /* 0xA0 - 0xA7 */
  22.184 -    0, 0, 0, DstMem|SrcReg|ModRM, 0, 0, 0, 0, 
  22.185 -    /* 0xA8 - 0xAF */
  22.186 -    0, 0, 0, DstMem|SrcReg|ModRM, 0, 0, 0, 0,
  22.187 -    /* 0xB0 - 0xB7 */
  22.188 -    ByteOp|DstMem|SrcReg|ModRM, DstMem|SrcReg|ModRM, 0, DstMem|SrcReg|ModRM,
  22.189 -    0, 0, ByteOp|DstReg|SrcMem|ModRM|Mov, DstReg|SrcMem16|ModRM|Mov,
  22.190 -    /* 0xB8 - 0xBF */
  22.191 -    0, 0, DstMem|SrcImmByte|ModRM, DstMem|SrcReg|ModRM,
  22.192 -    0, 0, ByteOp|DstReg|SrcMem|ModRM|Mov, DstReg|SrcMem16|ModRM|Mov,
  22.193 -    /* 0xC0 - 0xCF */
  22.194 -    0, 0, 0, 0, 0, 0, 0, ImplicitOps|ModRM, 0, 0, 0, 0, 0, 0, 0, 0,
  22.195 -    /* 0xD0 - 0xDF */
  22.196 -    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
  22.197 -    /* 0xE0 - 0xEF */
  22.198 -    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
  22.199 -    /* 0xF0 - 0xFF */
  22.200 -    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
  22.201 -};
  22.202 -
  22.203 -/* 
  22.204 - * insn_fetch - fetch the next 1 to 4 bytes from instruction stream 
  22.205 - * 
  22.206 - * @_type:   u8, u16, u32, s8, s16, or s32
  22.207 - * @_size:   1, 2, or 4 bytes
  22.208 - * @_eip:    address to fetch from guest memory
  22.209 - * @_length: updated! increments the current instruction length counter by _size
  22.210 - *
  22.211 - * INTERNAL this is used internally by svm_instrlen to fetch the next byte,
  22.212 - * word, or dword from guest memory at location _eip.  we currently use a local
  22.213 - * unsigned long as the storage buffer since the most bytes we're gonna get
  22.214 - * is limited to 4.
  22.215 - */
  22.216 -#define insn_fetch(_type, _size, _eip, _length) \
  22.217 -({  unsigned long _x; \
  22.218 -        if ((rc = inst_copy_from_guest((unsigned char *)(&(_x)), \
  22.219 -                (unsigned long)(_eip), _size)) \
  22.220 -                    != _size) \
  22.221 -        goto done; \
  22.222 -    (_eip) += (_size); \
  22.223 -    (_length) += (_size); \
  22.224 -    (_type)_x; \
  22.225 -})
  22.226 -
  22.227 -
  22.228 -/**
  22.229 - * svn_instrlen - returns the current instructions length
  22.230 - *
  22.231 - * @regs: guest register state
  22.232 - * @mode: guest operating mode
  22.233 - *
  22.234 - * EXTERNAL this routine calculates the length of the current instruction
  22.235 - * pointed to by eip.  The guest state is _not_ changed by this routine.
  22.236 - */
  22.237 -int svm_instrlen(struct cpu_user_regs *regs, int mode)
  22.238 -{
  22.239 -    uint8_t b, d, twobyte = 0, rex_prefix = 0;
  22.240 -    uint8_t modrm, modrm_mod = 0, modrm_reg = 0, modrm_rm = 0;
  22.241 -    unsigned int op_bytes, ad_bytes, lock_prefix = 0, rep_prefix = 0, i;
  22.242 -    int rc = 0;
  22.243 -    int length = 0;
  22.244 -    unsigned int tmp;
  22.245 -
  22.246 -    /* Shadow copy of register state. Committed on successful emulation. */
  22.247 -    struct cpu_user_regs _regs = *regs;
  22.248 -
  22.249 -    /* include CS for 16-bit modes */
  22.250 -    if (mode == X86EMUL_MODE_REAL || mode == X86EMUL_MODE_PROT16)
  22.251 -        _regs.eip += (_regs.cs << 4);
  22.252 -
  22.253 -    switch ( mode )
  22.254 -    {
  22.255 -    case X86EMUL_MODE_REAL:
  22.256 -    case X86EMUL_MODE_PROT16:
  22.257 -        op_bytes = ad_bytes = 2;
  22.258 -        break;
  22.259 -    case X86EMUL_MODE_PROT32:
  22.260 -        op_bytes = ad_bytes = 4;
  22.261 -        break;
  22.262 -#ifdef __x86_64__
  22.263 -    case X86EMUL_MODE_PROT64:
  22.264 -        op_bytes = 4;
  22.265 -        ad_bytes = 8;
  22.266 -        break;
  22.267 -#endif
  22.268 -    default:
  22.269 -        return -1;
  22.270 -    }
  22.271 -
  22.272 -    /* Legacy prefixes. */
  22.273 -    for ( i = 0; i < 8; i++ )
  22.274 -    {
  22.275 -        switch ( b = insn_fetch(uint8_t, 1, _regs.eip, length) )
  22.276 -        {
  22.277 -        case 0x66: /* operand-size override */
  22.278 -            op_bytes ^= 6;      /* switch between 2/4 bytes */
  22.279 -            break;
  22.280 -        case 0x67: /* address-size override */
  22.281 -            if ( mode == X86EMUL_MODE_PROT64 )
  22.282 -                ad_bytes ^= 12; /* switch between 4/8 bytes */
  22.283 -            else
  22.284 -                ad_bytes ^= 6;  /* switch between 2/4 bytes */
  22.285 -            break;
  22.286 -        case 0x2e: /* CS override */
  22.287 -        case 0x3e: /* DS override */
  22.288 -        case 0x26: /* ES override */
  22.289 -        case 0x64: /* FS override */
  22.290 -        case 0x65: /* GS override */
  22.291 -        case 0x36: /* SS override */
  22.292 -            break;
  22.293 -        case 0xf0: /* LOCK */
  22.294 -            lock_prefix = 1;
  22.295 -            break;
  22.296 -        case 0xf3: /* REP/REPE/REPZ */
  22.297 -            rep_prefix = 1;
  22.298 -            break;
  22.299 -        case 0xf2: /* REPNE/REPNZ */
  22.300 -            break;
  22.301 -        default:
  22.302 -            goto done_prefixes;
  22.303 -        }
  22.304 -    }
  22.305 -done_prefixes:
  22.306 -
  22.307 -    /* Note quite the same as 80386 real mode, but hopefully good enough. */
  22.308 -    if ( (mode == X86EMUL_MODE_REAL) && (ad_bytes != 2) ) {
  22.309 -        printf("sonofabitch!! we don't support 32-bit addresses in realmode\n");
  22.310 -        goto cannot_emulate;
  22.311 -    }
  22.312 -
  22.313 -    /* REX prefix. */
  22.314 -    if ( (mode == X86EMUL_MODE_PROT64) && ((b & 0xf0) == 0x40) )
  22.315 -    {
  22.316 -        rex_prefix = b;
  22.317 -        if ( b & 8 )
  22.318 -            op_bytes = 8;          /* REX.W */
  22.319 -        modrm_reg = (b & 4) << 1;  /* REX.R */
  22.320 -        /* REX.B and REX.X do not need to be decoded. */
  22.321 -        b = insn_fetch(uint8_t, 1, _regs.eip, length);
  22.322 -    }
  22.323 -
  22.324 -    /* Opcode byte(s). */
  22.325 -    d = opcode_table[b];
  22.326 -    if ( d == 0 )
  22.327 -    {
  22.328 -        /* Two-byte opcode? */
  22.329 -        if ( b == 0x0f )
  22.330 -        {
  22.331 -            twobyte = 1;
  22.332 -            b = insn_fetch(uint8_t, 1, _regs.eip, length);
  22.333 -            d = twobyte_table[b];
  22.334 -        }
  22.335 -
  22.336 -        /* Unrecognised? */
  22.337 -        if ( d == 0 )
  22.338 -            goto cannot_emulate;
  22.339 -    }
  22.340 -
  22.341 -    /* ModRM and SIB bytes. */
  22.342 -    if ( d & ModRM )
  22.343 -    {
  22.344 -        modrm = insn_fetch(uint8_t, 1, _regs.eip, length);
  22.345 -        modrm_mod |= (modrm & 0xc0) >> 6;
  22.346 -        modrm_reg |= (modrm & 0x38) >> 3;
  22.347 -        modrm_rm  |= (modrm & 0x07);
  22.348 -
  22.349 -        if ( modrm_mod == 3 )
  22.350 -        {
  22.351 -            DPRINTF("Cannot parse ModRM.mod == 3.\n");
  22.352 -            goto cannot_emulate;
  22.353 -        }
  22.354 -
  22.355 -        if ( ad_bytes == 2 )
  22.356 -        {
  22.357 -            /* 16-bit ModR/M decode. */
  22.358 -            switch ( modrm_mod )
  22.359 -            {
  22.360 -            case 0:
  22.361 -                if ( modrm_rm == 6 ) 
  22.362 -                {
  22.363 -                    length += 2;
  22.364 -                    _regs.eip += 2; /* skip disp16 */
  22.365 -                }
  22.366 -                break;
  22.367 -            case 1:
  22.368 -                length += 1;
  22.369 -                _regs.eip += 1; /* skip disp8 */
  22.370 -                break;
  22.371 -            case 2:
  22.372 -                length += 2;
  22.373 -                _regs.eip += 2; /* skip disp16 */
  22.374 -                break;
  22.375 -            }
  22.376 -        }
  22.377 -        else
  22.378 -        {
  22.379 -            /* 32/64-bit ModR/M decode. */
  22.380 -            switch ( modrm_mod )
  22.381 -            {
  22.382 -            case 0:
  22.383 -                if ( (modrm_rm == 4) && 
  22.384 -                     (((insn_fetch(uint8_t, 1, _regs.eip, length)) & 7) 
  22.385 -                        == 5) )
  22.386 -                {
  22.387 -                    length += 4;
  22.388 -                    _regs.eip += 4; /* skip disp32 specified by SIB.base */
  22.389 -                }
  22.390 -                else if ( modrm_rm == 5 )
  22.391 -                {
  22.392 -                    length += 4;
  22.393 -                    _regs.eip += 4; /* skip disp32 */
  22.394 -                }
  22.395 -                break;
  22.396 -            case 1:
  22.397 -                if ( modrm_rm == 4 )
  22.398 -                {
  22.399 -                    insn_fetch(uint8_t, 1, _regs.eip, length);
  22.400 -                }
  22.401 -                length += 1;
  22.402 -                _regs.eip += 1; /* skip disp8 */
  22.403 -                break;
  22.404 -            case 2:
  22.405 -                if ( modrm_rm == 4 )
  22.406 -                {
  22.407 -                    insn_fetch(uint8_t, 1, _regs.eip, length);
  22.408 -                }
  22.409 -                length += 4;
  22.410 -                _regs.eip += 4; /* skip disp32 */
  22.411 -                break;
  22.412 -            }
  22.413 -        }
  22.414 -    }
  22.415 -
  22.416 -    /* Decode and fetch the destination operand: register or memory. */
  22.417 -    switch ( d & DstMask )
  22.418 -    {
  22.419 -    case ImplicitOps:
  22.420 -        /* Special instructions do their own operand decoding. */
  22.421 -        goto done;
  22.422 -    }
  22.423 -
  22.424 -    /* Decode and fetch the source operand: register, memory or immediate. */
  22.425 -    switch ( d & SrcMask )
  22.426 -    {
  22.427 -    case SrcImm:
  22.428 -        tmp = (d & ByteOp) ? 1 : op_bytes;
  22.429 -        if ( tmp == 8 ) tmp = 4;
  22.430 -        /* NB. Immediates are sign-extended as necessary. */
  22.431 -        switch ( tmp )
  22.432 -        {
  22.433 -        case 1: insn_fetch(int8_t,  1, _regs.eip, length); break;
  22.434 -        case 2: insn_fetch(int16_t, 2, _regs.eip, length); break;
  22.435 -        case 4: insn_fetch(int32_t, 4, _regs.eip, length); break;
  22.436 -        }
  22.437 -        break;
  22.438 -    case SrcImmByte:
  22.439 -        insn_fetch(int8_t,  1, _regs.eip, length);
  22.440 -        break;
  22.441 -    }
  22.442 -
  22.443 -    if ( twobyte )
  22.444 -        goto done;
  22.445 -
  22.446 -    switch ( b )
  22.447 -    {
  22.448 -    case 0xa0 ... 0xa1: /* mov */
  22.449 -        length += ad_bytes;
  22.450 -        _regs.eip += ad_bytes; /* skip src displacement */
  22.451 -        break;
  22.452 -    case 0xa2 ... 0xa3: /* mov */
  22.453 -        length += ad_bytes;
  22.454 -        _regs.eip += ad_bytes; /* skip dst displacement */
  22.455 -        break;
  22.456 -    case 0xf6 ... 0xf7: /* Grp3 */
  22.457 -        switch ( modrm_reg )
  22.458 -        {
  22.459 -        case 0 ... 1: /* test */
  22.460 -            /* Special case in Grp3: test has an immediate source operand. */
  22.461 -            tmp = (d & ByteOp) ? 1 : op_bytes;
  22.462 -            if ( tmp == 8 ) tmp = 4;
  22.463 -            switch ( tmp )
  22.464 -            {
  22.465 -            case 1: insn_fetch(int8_t,  1, _regs.eip, length); break;
  22.466 -            case 2: insn_fetch(int16_t, 2, _regs.eip, length); break;
  22.467 -            case 4: insn_fetch(int32_t, 4, _regs.eip, length); break;
  22.468 -            }
  22.469 -            goto done;
  22.470 -        }
  22.471 -        break;
  22.472 -    }
  22.473 -
  22.474 -done:
  22.475 -    return length;
  22.476 -
  22.477 -cannot_emulate:
  22.478 -    DPRINTF("Cannot emulate %02x at address %lx (eip %lx, mode %d)\n",
  22.479 -            b, (unsigned long)_regs.eip, (unsigned long)regs->eip, mode);
  22.480 -    svm_dump_inst(_regs.eip);
  22.481 -    return -1;
  22.482 -}
    23.1 --- a/xen/arch/x86/hvm/svm/svm.c	Tue Sep 26 16:15:45 2006 -0600
    23.2 +++ b/xen/arch/x86/hvm/svm/svm.c	Tue Sep 26 19:11:33 2006 -0600
    23.3 @@ -44,6 +44,7 @@
    23.4  #include <asm/hvm/svm/emulate.h>
    23.5  #include <asm/hvm/svm/vmmcall.h>
    23.6  #include <asm/hvm/svm/intr.h>
    23.7 +#include <asm/x86_emulate.h>
    23.8  #include <public/sched.h>
    23.9  
   23.10  #define SVM_EXTRA_DEBUG
   23.11 @@ -60,7 +61,6 @@ extern int inst_copy_from_guest(unsigned
   23.12  extern asmlinkage void do_IRQ(struct cpu_user_regs *);
   23.13  extern void send_pio_req(struct cpu_user_regs *regs, unsigned long port,
   23.14                           unsigned long count, int size, long value, int dir, int pvalid);
   23.15 -extern int svm_instrlen(struct cpu_user_regs *regs, int mode);
   23.16  extern void svm_dump_inst(unsigned long eip);
   23.17  extern int svm_dbg_on;
   23.18  void svm_dump_regs(const char *from, struct cpu_user_regs *regs);
   23.19 @@ -468,21 +468,19 @@ static int svm_realmode(struct vcpu *v)
   23.20      return (eflags & X86_EFLAGS_VM) || !(cr0 & X86_CR0_PE);
   23.21  }
   23.22  
   23.23 -int svm_guest_x86_mode(struct vcpu *v)
   23.24 +static int svm_guest_x86_mode(struct vcpu *v)
   23.25  {
   23.26      struct vmcb_struct *vmcb = v->arch.hvm_svm.vmcb;
   23.27 -    unsigned long cr0 = vmcb->cr0, eflags = vmcb->rflags, mode;
   23.28 -    /* check which operating mode the guest is running */
   23.29 -    if( vmcb->efer & EFER_LMA )
   23.30 -        mode = vmcb->cs.attributes.fields.l ? 8 : 4;
   23.31 -    else
   23.32 -        mode = (eflags & X86_EFLAGS_VM) || !(cr0 & X86_CR0_PE) ? 2 : 4;
   23.33 -    return mode;
   23.34 -}
   23.35 -
   23.36 -int svm_instruction_length(struct vcpu *v)
   23.37 -{
   23.38 -    return svm_instrlen(guest_cpu_user_regs(), svm_guest_x86_mode(v));
   23.39 +
   23.40 +    if ( vmcb->efer & EFER_LMA )
   23.41 +        return (vmcb->cs.attributes.fields.l ?
   23.42 +                X86EMUL_MODE_PROT64 : X86EMUL_MODE_PROT32);
   23.43 +
   23.44 +    if ( svm_realmode(v) )
   23.45 +        return X86EMUL_MODE_REAL;
   23.46 +
   23.47 +    return (vmcb->cs.attributes.fields.db ?
   23.48 +            X86EMUL_MODE_PROT32 : X86EMUL_MODE_PROT16);
   23.49  }
   23.50  
   23.51  void svm_update_host_cr3(struct vcpu *v)
   23.52 @@ -878,7 +876,6 @@ int start_svm(void)
   23.53      hvm_funcs.long_mode_enabled = svm_long_mode_enabled;
   23.54      hvm_funcs.pae_enabled = svm_pae_enabled;
   23.55      hvm_funcs.guest_x86_mode = svm_guest_x86_mode;
   23.56 -    hvm_funcs.instruction_length = svm_instruction_length;
   23.57      hvm_funcs.get_guest_ctrl_reg = svm_get_ctrl_reg;
   23.58  
   23.59      hvm_funcs.update_host_cr3 = svm_update_host_cr3;
    24.1 --- a/xen/arch/x86/hvm/vmx/vmcs.c	Tue Sep 26 16:15:45 2006 -0600
    24.2 +++ b/xen/arch/x86/hvm/vmx/vmcs.c	Tue Sep 26 19:11:33 2006 -0600
    24.3 @@ -37,36 +37,119 @@
    24.4  #include <xen/keyhandler.h>
    24.5  #include <asm/shadow.h>
    24.6  
    24.7 -static int vmcs_size;
    24.8 -static int vmcs_order;
    24.9 +/* Basic flags for Pin-based VM-execution controls. */
   24.10 +#define MONITOR_PIN_BASED_EXEC_CONTROLS                 \
   24.11 +    ( PIN_BASED_EXT_INTR_MASK |                         \
   24.12 +      PIN_BASED_NMI_EXITING )
   24.13 +
   24.14 +/* Basic flags for CPU-based VM-execution controls. */
   24.15 +#ifdef __x86_64__
   24.16 +#define MONITOR_CPU_BASED_EXEC_CONTROLS_SUBARCH         \
   24.17 +    ( CPU_BASED_CR8_LOAD_EXITING |                      \
   24.18 +      CPU_BASED_CR8_STORE_EXITING )
   24.19 +#else
   24.20 +#define MONITOR_CPU_BASED_EXEC_CONTROLS_SUBARCH 0
   24.21 +#endif
   24.22 +#define MONITOR_CPU_BASED_EXEC_CONTROLS                 \
   24.23 +    ( MONITOR_CPU_BASED_EXEC_CONTROLS_SUBARCH |         \
   24.24 +      CPU_BASED_HLT_EXITING |                           \
   24.25 +      CPU_BASED_INVDPG_EXITING |                        \
   24.26 +      CPU_BASED_MWAIT_EXITING |                         \
   24.27 +      CPU_BASED_MOV_DR_EXITING |                        \
   24.28 +      CPU_BASED_ACTIVATE_IO_BITMAP |                    \
   24.29 +      CPU_BASED_USE_TSC_OFFSETING )
   24.30 +
   24.31 +/* Basic flags for VM-Exit controls. */
   24.32 +#ifdef __x86_64__
   24.33 +#define MONITOR_VM_EXIT_CONTROLS_SUBARCH VM_EXIT_IA32E_MODE
   24.34 +#else
   24.35 +#define MONITOR_VM_EXIT_CONTROLS_SUBARCH 0
   24.36 +#endif
   24.37 +#define MONITOR_VM_EXIT_CONTROLS                        \
   24.38 +    ( MONITOR_VM_EXIT_CONTROLS_SUBARCH |                \
   24.39 +      VM_EXIT_ACK_INTR_ON_EXIT )
   24.40 +
   24.41 +/* Basic flags for VM-Entry controls. */
   24.42 +#define MONITOR_VM_ENTRY_CONTROLS                       0x00000000
   24.43 +
   24.44 +/* Dynamic (run-time adjusted) execution control flags. */
   24.45 +static u32 vmx_pin_based_exec_control;
   24.46 +static u32 vmx_cpu_based_exec_control;
   24.47 +static u32 vmx_vmexit_control;
   24.48 +static u32 vmx_vmentry_control;
   24.49 +
   24.50  static u32 vmcs_revision_id;
   24.51  
   24.52 +static u32 adjust_vmx_controls(u32 ctrls, u32 msr)
   24.53 +{
   24.54 +    u32 vmx_msr_low, vmx_msr_high;
   24.55 +
   24.56 +    rdmsr(msr, vmx_msr_low, vmx_msr_high);
   24.57 +
   24.58 +    /* Bit == 0 means must be zero. */
   24.59 +    BUG_ON(ctrls & ~vmx_msr_high);
   24.60 +
   24.61 +    /* Bit == 1 means must be one. */
   24.62 +    ctrls |= vmx_msr_low;
   24.63 +
   24.64 +    return ctrls;
   24.65 +}
   24.66 +
   24.67  void vmx_init_vmcs_config(void)
   24.68  {
   24.69      u32 vmx_msr_low, vmx_msr_high;
   24.70 +    u32 _vmx_pin_based_exec_control;
   24.71 +    u32 _vmx_cpu_based_exec_control;
   24.72 +    u32 _vmx_vmexit_control;
   24.73 +    u32 _vmx_vmentry_control;
   24.74  
   24.75 -    if ( vmcs_size )
   24.76 -        return;
   24.77 +    _vmx_pin_based_exec_control =
   24.78 +        adjust_vmx_controls(MONITOR_PIN_BASED_EXEC_CONTROLS,
   24.79 +                            MSR_IA32_VMX_PINBASED_CTLS_MSR);
   24.80 +    _vmx_cpu_based_exec_control =
   24.81 +        adjust_vmx_controls(MONITOR_CPU_BASED_EXEC_CONTROLS,
   24.82 +                            MSR_IA32_VMX_PROCBASED_CTLS_MSR);
   24.83 +    _vmx_vmexit_control =
   24.84 +        adjust_vmx_controls(MONITOR_VM_EXIT_CONTROLS,
   24.85 +                            MSR_IA32_VMX_EXIT_CTLS_MSR);
   24.86 +    _vmx_vmentry_control =
   24.87 +        adjust_vmx_controls(MONITOR_VM_ENTRY_CONTROLS,
   24.88 +                            MSR_IA32_VMX_ENTRY_CTLS_MSR);
   24.89  
   24.90      rdmsr(MSR_IA32_VMX_BASIC_MSR, vmx_msr_low, vmx_msr_high);
   24.91  
   24.92 -    vmcs_revision_id = vmx_msr_low;
   24.93 +    if ( smp_processor_id() == 0 )
   24.94 +    {
   24.95 +        vmcs_revision_id = vmx_msr_low;
   24.96 +        vmx_pin_based_exec_control = _vmx_pin_based_exec_control;
   24.97 +        vmx_cpu_based_exec_control = _vmx_cpu_based_exec_control;
   24.98 +        vmx_vmexit_control         = _vmx_vmexit_control;
   24.99 +        vmx_vmentry_control        = _vmx_vmentry_control;
  24.100 +    }
  24.101 +    else
  24.102 +    {
  24.103 +        BUG_ON(vmcs_revision_id != vmx_msr_low);
  24.104 +        BUG_ON(vmx_pin_based_exec_control != _vmx_pin_based_exec_control);
  24.105 +        BUG_ON(vmx_cpu_based_exec_control != _vmx_cpu_based_exec_control);
  24.106 +        BUG_ON(vmx_vmexit_control != _vmx_vmexit_control);
  24.107 +        BUG_ON(vmx_vmentry_control != _vmx_vmentry_control);
  24.108 +    }
  24.109  
  24.110 -    vmcs_size  = vmx_msr_high & 0x1fff;
  24.111 -    vmcs_order = get_order_from_bytes(vmcs_size);
  24.112 +    /* IA-32 SDM Vol 3B: VMCS size is never greater than 4kB. */
  24.113 +    BUG_ON((vmx_msr_high & 0x1fff) > PAGE_SIZE);
  24.114  }
  24.115  
  24.116  static struct vmcs_struct *vmx_alloc_vmcs(void)
  24.117  {
  24.118      struct vmcs_struct *vmcs;
  24.119  
  24.120 -    if ( (vmcs = alloc_xenheap_pages(vmcs_order)) == NULL )
  24.121 +    if ( (vmcs = alloc_xenheap_page()) == NULL )
  24.122      {
  24.123          DPRINTK("Failed to allocate VMCS.\n");
  24.124          return NULL;
  24.125      }
  24.126  
  24.127 -    memset(vmcs, 0, vmcs_size); /* don't remove this */
  24.128 +    memset(vmcs, 0, PAGE_SIZE);
  24.129      vmcs->vmcs_revision_id = vmcs_revision_id;
  24.130  
  24.131      return vmcs;
  24.132 @@ -74,7 +157,7 @@ static struct vmcs_struct *vmx_alloc_vmc
  24.133  
  24.134  static void vmx_free_vmcs(struct vmcs_struct *vmcs)
  24.135  {
  24.136 -    free_xenheap_pages(vmcs, vmcs_order);
  24.137 +    free_xenheap_page(vmcs);
  24.138  }
  24.139  
  24.140  static void __vmx_clear_vmcs(void *info)
  24.141 @@ -156,12 +239,11 @@ static inline int construct_vmcs_control
  24.142  {
  24.143      int error = 0;
  24.144  
  24.145 -    error |= __vmwrite(PIN_BASED_VM_EXEC_CONTROL,
  24.146 -                       MONITOR_PIN_BASED_EXEC_CONTROLS);
  24.147 +    error |= __vmwrite(PIN_BASED_VM_EXEC_CONTROL, vmx_pin_based_exec_control);
  24.148  
  24.149 -    error |= __vmwrite(VM_EXIT_CONTROLS, MONITOR_VM_EXIT_CONTROLS);
  24.150 +    error |= __vmwrite(VM_EXIT_CONTROLS, vmx_vmexit_control);
  24.151  
  24.152 -    error |= __vmwrite(VM_ENTRY_CONTROLS, MONITOR_VM_ENTRY_CONTROLS);
  24.153 +    error |= __vmwrite(VM_ENTRY_CONTROLS, vmx_vmentry_control);
  24.154  
  24.155      error |= __vmwrite(IO_BITMAP_A, virt_to_maddr(arch_vmx->io_bitmap_a));
  24.156      error |= __vmwrite(IO_BITMAP_B, virt_to_maddr(arch_vmx->io_bitmap_b));
  24.157 @@ -246,9 +328,8 @@ static void vmx_do_launch(struct vcpu *v
  24.158      error |= __vmwrite(GUEST_CR0, cr0);
  24.159      cr0 &= ~X86_CR0_PG;
  24.160      error |= __vmwrite(CR0_READ_SHADOW, cr0);
  24.161 -    error |= __vmwrite(CPU_BASED_VM_EXEC_CONTROL,
  24.162 -                       MONITOR_CPU_BASED_EXEC_CONTROLS);
  24.163 -    v->arch.hvm_vcpu.u.vmx.exec_control = MONITOR_CPU_BASED_EXEC_CONTROLS;
  24.164 +    error |= __vmwrite(CPU_BASED_VM_EXEC_CONTROL, vmx_cpu_based_exec_control);
  24.165 +    v->arch.hvm_vcpu.u.vmx.exec_control = vmx_cpu_based_exec_control;
  24.166  
  24.167      __asm__ __volatile__ ("mov %%cr4,%0" : "=r" (cr4) : );
  24.168  
  24.169 @@ -297,22 +378,22 @@ static inline int construct_init_vmcs_gu
  24.170      /* MSR */
  24.171      error |= __vmwrite(VM_EXIT_MSR_LOAD_ADDR, 0);
  24.172      error |= __vmwrite(VM_EXIT_MSR_STORE_ADDR, 0);
  24.173 -
  24.174      error |= __vmwrite(VM_EXIT_MSR_STORE_COUNT, 0);
  24.175      error |= __vmwrite(VM_EXIT_MSR_LOAD_COUNT, 0);
  24.176      error |= __vmwrite(VM_ENTRY_MSR_LOAD_COUNT, 0);
  24.177 -    /* interrupt */
  24.178 +
  24.179      error |= __vmwrite(VM_ENTRY_INTR_INFO_FIELD, 0);
  24.180 -    /* mask */
  24.181 -    error |= __vmwrite(CR0_GUEST_HOST_MASK, -1UL);
  24.182 -    error |= __vmwrite(CR4_GUEST_HOST_MASK, -1UL);
  24.183 +
  24.184 +    error |= __vmwrite(CR0_GUEST_HOST_MASK, ~0UL);
  24.185 +    error |= __vmwrite(CR4_GUEST_HOST_MASK, ~0UL);
  24.186  
  24.187      error |= __vmwrite(PAGE_FAULT_ERROR_CODE_MASK, 0);
  24.188      error |= __vmwrite(PAGE_FAULT_ERROR_CODE_MATCH, 0);
  24.189  
  24.190 -    /* TSC */
  24.191      error |= __vmwrite(CR3_TARGET_COUNT, 0);
  24.192  
  24.193 +    error |= __vmwrite(GUEST_ACTIVITY_STATE, 0);
  24.194 +
  24.195      /* Guest Selectors */
  24.196      error |= __vmwrite(GUEST_ES_SELECTOR, GUEST_LAUNCH_DS);
  24.197      error |= __vmwrite(GUEST_SS_SELECTOR, GUEST_LAUNCH_DS);
    25.1 --- a/xen/arch/x86/hvm/vmx/vmx.c	Tue Sep 26 16:15:45 2006 -0600
    25.2 +++ b/xen/arch/x86/hvm/vmx/vmx.c	Tue Sep 26 19:11:33 2006 -0600
    25.3 @@ -45,6 +45,7 @@
    25.4  #include <public/hvm/ioreq.h>
    25.5  #include <asm/hvm/vpic.h>
    25.6  #include <asm/hvm/vlapic.h>
    25.7 +#include <asm/x86_emulate.h>
    25.8  
    25.9  extern uint32_t vlapic_update_ppr(struct vlapic *vlapic);
   25.10  
   25.11 @@ -593,15 +594,6 @@ static void vmx_load_cpu_guest_regs(stru
   25.12      vmx_vmcs_exit(v);
   25.13  }
   25.14  
   25.15 -static int vmx_instruction_length(struct vcpu *v)
   25.16 -{
   25.17 -    unsigned long inst_len;
   25.18 -
   25.19 -    if ( __vmread(VM_EXIT_INSTRUCTION_LEN, &inst_len) ) /* XXX Unsafe XXX */
   25.20 -        return 0;
   25.21 -    return inst_len;
   25.22 -}
   25.23 -
   25.24  static unsigned long vmx_get_ctrl_reg(struct vcpu *v, unsigned int num)
   25.25  {
   25.26      switch ( num )
   25.27 @@ -692,21 +684,6 @@ static void vmx_init_ap_context(struct v
   25.28  
   25.29  void do_nmi(struct cpu_user_regs *);
   25.30  
   25.31 -static int check_vmx_controls(u32 ctrls, u32 msr)
   25.32 -{
   25.33 -    u32 vmx_msr_low, vmx_msr_high;
   25.34 -
   25.35 -    rdmsr(msr, vmx_msr_low, vmx_msr_high);
   25.36 -    if ( (ctrls < vmx_msr_low) || (ctrls > vmx_msr_high) )
   25.37 -    {
   25.38 -        printk("Insufficient VMX capability 0x%x, "
   25.39 -               "msr=0x%x,low=0x%8x,high=0x%x\n",
   25.40 -               ctrls, msr, vmx_msr_low, vmx_msr_high);
   25.41 -        return 0;
   25.42 -    }
   25.43 -    return 1;
   25.44 -}
   25.45 -
   25.46  static void vmx_init_hypercall_page(struct domain *d, void *hypercall_page)
   25.47  {
   25.48      char *p;
   25.49 @@ -729,6 +706,35 @@ static void vmx_init_hypercall_page(stru
   25.50      *(u16 *)(hypercall_page + (__HYPERVISOR_iret * 32)) = 0x0b0f; /* ud2 */
   25.51  }
   25.52  
   25.53 +static int vmx_realmode(struct vcpu *v)
   25.54 +{
   25.55 +    unsigned long rflags;
   25.56 +
   25.57 +    ASSERT(v == current);
   25.58 +
   25.59 +    __vmread(GUEST_RFLAGS, &rflags);
   25.60 +    return rflags & X86_EFLAGS_VM;
   25.61 +}
   25.62 +
   25.63 +static int vmx_guest_x86_mode(struct vcpu *v)
   25.64 +{
   25.65 +    unsigned long cs_ar_bytes;
   25.66 +
   25.67 +    ASSERT(v == current);
   25.68 +
   25.69 +    __vmread(GUEST_CS_AR_BYTES, &cs_ar_bytes);
   25.70 +
   25.71 +    if ( vmx_long_mode_enabled(v) )
   25.72 +        return ((cs_ar_bytes & (1u<<13)) ?
   25.73 +                X86EMUL_MODE_PROT64 : X86EMUL_MODE_PROT32);
   25.74 +
   25.75 +    if ( vmx_realmode(v) )
   25.76 +        return X86EMUL_MODE_REAL;
   25.77 +
   25.78 +    return ((cs_ar_bytes & (1u<<14)) ?
   25.79 +            X86EMUL_MODE_PROT32 : X86EMUL_MODE_PROT16);
   25.80 +}
   25.81 +
   25.82  /* Setup HVM interfaces */
   25.83  static void vmx_setup_hvm_funcs(void)
   25.84  {
   25.85 @@ -748,7 +754,6 @@ static void vmx_setup_hvm_funcs(void)
   25.86      hvm_funcs.long_mode_enabled = vmx_long_mode_enabled;
   25.87      hvm_funcs.pae_enabled = vmx_pae_enabled;
   25.88      hvm_funcs.guest_x86_mode = vmx_guest_x86_mode;
   25.89 -    hvm_funcs.instruction_length = vmx_instruction_length;
   25.90      hvm_funcs.get_guest_ctrl_reg = vmx_get_ctrl_reg;
   25.91  
   25.92      hvm_funcs.update_host_cr3 = vmx_update_host_cr3;
   25.93 @@ -771,7 +776,7 @@ int start_vmx(void)
   25.94       */
   25.95      boot_cpu_data.x86_capability[4] = cpuid_ecx(1);
   25.96  
   25.97 -    if (!(test_bit(X86_FEATURE_VMXE, &boot_cpu_data.x86_capability)))
   25.98 +    if ( !test_bit(X86_FEATURE_VMXE, &boot_cpu_data.x86_capability) )
   25.99          return 0;
  25.100  
  25.101      rdmsr(IA32_FEATURE_CONTROL_MSR, eax, edx);
  25.102 @@ -791,24 +796,11 @@ int start_vmx(void)
  25.103                IA32_FEATURE_CONTROL_MSR_ENABLE_VMXON, 0);
  25.104      }
  25.105  
  25.106 -    if ( !check_vmx_controls(MONITOR_PIN_BASED_EXEC_CONTROLS,
  25.107 -                             MSR_IA32_VMX_PINBASED_CTLS_MSR) )
  25.108 -        return 0;
  25.109 -    if ( !check_vmx_controls(MONITOR_CPU_BASED_EXEC_CONTROLS,
  25.110 -                             MSR_IA32_VMX_PROCBASED_CTLS_MSR) )
  25.111 -        return 0;
  25.112 -    if ( !check_vmx_controls(MONITOR_VM_EXIT_CONTROLS,
  25.113 -                             MSR_IA32_VMX_EXIT_CTLS_MSR) )
  25.114 -        return 0;
  25.115 -    if ( !check_vmx_controls(MONITOR_VM_ENTRY_CONTROLS,
  25.116 -                             MSR_IA32_VMX_ENTRY_CTLS_MSR) )
  25.117 -        return 0;
  25.118 -
  25.119      set_in_cr4(X86_CR4_VMXE);
  25.120  
  25.121      vmx_init_vmcs_config();
  25.122 -    
  25.123 -    if(!smp_processor_id())
  25.124 +
  25.125 +    if ( smp_processor_id() == 0 )
  25.126          setup_vmcs_dump();
  25.127  
  25.128      if ( (vmcs = vmx_alloc_host_vmcs()) == NULL )
  25.129 @@ -1499,7 +1491,7 @@ static int vmx_set_cr0(unsigned long val
  25.130                      &v->arch.hvm_vmx.cpu_state);
  25.131  
  25.132              __vmread(VM_ENTRY_CONTROLS, &vm_entry_value);
  25.133 -            vm_entry_value |= VM_ENTRY_CONTROLS_IA32E_MODE;
  25.134 +            vm_entry_value |= VM_ENTRY_IA32E_MODE;
  25.135              __vmwrite(VM_ENTRY_CONTROLS, vm_entry_value);
  25.136          }
  25.137  #endif
  25.138 @@ -1553,7 +1545,7 @@ static int vmx_set_cr0(unsigned long val
  25.139                  clear_bit(VMX_CPU_STATE_LMA_ENABLED,
  25.140                            &v->arch.hvm_vmx.cpu_state);
  25.141                  __vmread(VM_ENTRY_CONTROLS, &vm_entry_value);
  25.142 -                vm_entry_value &= ~VM_ENTRY_CONTROLS_IA32E_MODE;
  25.143 +                vm_entry_value &= ~VM_ENTRY_IA32E_MODE;
  25.144                  __vmwrite(VM_ENTRY_CONTROLS, vm_entry_value);
  25.145              }
  25.146          }
  25.147 @@ -2276,15 +2268,8 @@ asmlinkage void vmx_vmexit_handler(struc
  25.148          domain_crash_synchronous();
  25.149          break;
  25.150      case EXIT_REASON_PENDING_INTERRUPT:
  25.151 -        /*
  25.152 -         * Not sure exactly what the purpose of this is.  The only bits set
  25.153 -         * and cleared at this point are CPU_BASED_VIRTUAL_INTR_PENDING.
  25.154 -         * (in io.c:{enable,disable}_irq_window().  So presumably we want to
  25.155 -         * set it to the original value...
  25.156 -         */
  25.157 +        /* Disable the interrupt window. */
  25.158          v->arch.hvm_vcpu.u.vmx.exec_control &= ~CPU_BASED_VIRTUAL_INTR_PENDING;
  25.159 -        v->arch.hvm_vcpu.u.vmx.exec_control |=
  25.160 -            (MONITOR_CPU_BASED_EXEC_CONTROLS & CPU_BASED_VIRTUAL_INTR_PENDING);
  25.161          __vmwrite(CPU_BASED_VM_EXEC_CONTROL,
  25.162                    v->arch.hvm_vcpu.u.vmx.exec_control);
  25.163          break;
    26.1 --- a/xen/arch/x86/irq.c	Tue Sep 26 16:15:45 2006 -0600
    26.2 +++ b/xen/arch/x86/irq.c	Tue Sep 26 19:11:33 2006 -0600
    26.3 @@ -351,11 +351,15 @@ int pirq_acktype(int irq)
    26.4  
    26.5      desc = &irq_desc[vector];
    26.6  
    26.7 +    if ( desc->handler == &no_irq_type )
    26.8 +        return ACKTYPE_NONE;
    26.9 +
   26.10      /*
   26.11 -     * Edge-triggered IO-APIC interrupts need no final acknowledgement:
   26.12 -     * we ACK early during interrupt processing.
   26.13 +     * Edge-triggered IO-APIC and LAPIC interrupts need no final
   26.14 +     * acknowledgement: we ACK early during interrupt processing.
   26.15       */
   26.16 -    if ( !strcmp(desc->handler->typename, "IO-APIC-edge") )
   26.17 +    if ( !strcmp(desc->handler->typename, "IO-APIC-edge") ||
   26.18 +         !strcmp(desc->handler->typename, "local-APIC-edge") )
   26.19          return ACKTYPE_NONE;
   26.20  
   26.21      /*
   26.22 @@ -376,7 +380,9 @@ int pirq_acktype(int irq)
   26.23          return ACKTYPE_NONE; /* edge-triggered => no final EOI */
   26.24      }
   26.25  
   26.26 +    printk("Unknown PIC type '%s' for IRQ %d\n", desc->handler->typename, irq);
   26.27      BUG();
   26.28 +
   26.29      return 0;
   26.30  }
   26.31  
    27.1 --- a/xen/arch/x86/setup.c	Tue Sep 26 16:15:45 2006 -0600
    27.2 +++ b/xen/arch/x86/setup.c	Tue Sep 26 19:11:33 2006 -0600
    27.3 @@ -272,6 +272,13 @@ void __init __start_xen(multiboot_info_t
    27.4          EARLY_FAIL();
    27.5      }
    27.6  
    27.7 +    /*
    27.8 +     * Since there are some stubs getting built on the stacks which use
    27.9 +     * direct calls/jumps, the heap must be confined to the lower 2G so
   27.10 +     * that those branches can reach their targets.
   27.11 +     */
   27.12 +    if ( opt_xenheap_megabytes > 2048 )
   27.13 +        opt_xenheap_megabytes = 2048;
   27.14      xenheap_phys_end = opt_xenheap_megabytes << 20;
   27.15  
   27.16      if ( mbi->flags & MBI_MEMMAP )
    28.1 --- a/xen/common/domain.c	Tue Sep 26 16:15:45 2006 -0600
    28.2 +++ b/xen/common/domain.c	Tue Sep 26 19:11:33 2006 -0600
    28.3 @@ -82,20 +82,16 @@ struct vcpu *alloc_vcpu(
    28.4  
    28.5      v->domain = d;
    28.6      v->vcpu_id = vcpu_id;
    28.7 -    v->processor = cpu_id;
    28.8      v->vcpu_info = &d->shared_info->vcpu_info[vcpu_id];
    28.9      spin_lock_init(&v->pause_lock);
   28.10  
   28.11 -    v->cpu_affinity = is_idle_domain(d) ?
   28.12 -        cpumask_of_cpu(cpu_id) : CPU_MASK_ALL;
   28.13 -
   28.14      v->runstate.state = is_idle_vcpu(v) ? RUNSTATE_running : RUNSTATE_offline;
   28.15      v->runstate.state_entry_time = NOW();
   28.16  
   28.17      if ( (vcpu_id != 0) && !is_idle_domain(d) )
   28.18          set_bit(_VCPUF_down, &v->vcpu_flags);
   28.19  
   28.20 -    if ( sched_init_vcpu(v) < 0 )
   28.21 +    if ( sched_init_vcpu(v, cpu_id) < 0 )
   28.22      {
   28.23          free_vcpu_struct(v);
   28.24          return NULL;
    29.1 --- a/xen/common/gdbstub.c	Tue Sep 26 16:15:45 2006 -0600
    29.2 +++ b/xen/common/gdbstub.c	Tue Sep 26 19:11:33 2006 -0600
    29.3 @@ -53,6 +53,8 @@
    29.4  static char opt_gdb[30] = "none";
    29.5  string_param("gdb", opt_gdb);
    29.6  
    29.7 +static void gdbstub_console_puts(const char *str);
    29.8 +
    29.9  /* value <-> char (de)serialzers */
   29.10  char
   29.11  hex2char(unsigned long x)
   29.12 @@ -360,7 +362,6 @@ gdb_cmd_write_mem(unsigned long addr, un
   29.13  static void
   29.14  gdbstub_attach(struct gdb_context *ctx)
   29.15  {
   29.16 -    static void gdbstub_console_puts(const char *str);
   29.17      if ( ctx->currently_attached )
   29.18          return;    
   29.19      ctx->currently_attached = 1;
    30.1 --- a/xen/common/schedule.c	Tue Sep 26 16:15:45 2006 -0600
    30.2 +++ b/xen/common/schedule.c	Tue Sep 26 19:11:33 2006 -0600
    30.3 @@ -37,6 +37,10 @@ extern void arch_getdomaininfo_ctxt(stru
    30.4  static char opt_sched[10] = "credit";
    30.5  string_param("sched", opt_sched);
    30.6  
    30.7 +/* opt_dom0_vcpus_pin: If true, dom0 VCPUs are pinned. */
    30.8 +static unsigned int opt_dom0_vcpus_pin;
    30.9 +boolean_param("dom0_vcpus_pin", opt_dom0_vcpus_pin);
   30.10 +
   30.11  #define TIME_SLOP      (s32)MICROSECS(50)     /* allow time to slip a bit */
   30.12  
   30.13  /* Various timer handlers. */
   30.14 @@ -97,13 +101,26 @@ void vcpu_runstate_get(struct vcpu *v, s
   30.15      }
   30.16  }
   30.17  
   30.18 -int sched_init_vcpu(struct vcpu *v) 
   30.19 +int sched_init_vcpu(struct vcpu *v, unsigned int processor) 
   30.20  {
   30.21 +    struct domain *d = v->domain;
   30.22 +
   30.23 +    /*
   30.24 +     * Initialize processor and affinity settings. The idler, and potentially
   30.25 +     * domain-0 VCPUs, are pinned onto their respective physical CPUs.
   30.26 +     */
   30.27 +    v->processor = processor;
   30.28 +    if ( is_idle_domain(d) || ((d->domain_id == 0) && opt_dom0_vcpus_pin) )
   30.29 +        v->cpu_affinity = cpumask_of_cpu(processor);
   30.30 +    else
   30.31 +        v->cpu_affinity = CPU_MASK_ALL;
   30.32 +
   30.33      /* Initialise the per-domain timers. */
   30.34      init_timer(&v->timer, vcpu_timer_fn, v, v->processor);
   30.35      init_timer(&v->poll_timer, poll_timer_fn, v, v->processor);
   30.36  
   30.37 -    if ( is_idle_vcpu(v) )
   30.38 +    /* Idle VCPUs are scheduled immediately. */
   30.39 +    if ( is_idle_domain(d) )
   30.40      {
   30.41          per_cpu(schedule_data, v->processor).curr = v;
   30.42          per_cpu(schedule_data, v->processor).idle = v;
   30.43 @@ -212,6 +229,9 @@ int vcpu_set_affinity(struct vcpu *v, cp
   30.44      cpumask_t online_affinity;
   30.45      unsigned long flags;
   30.46  
   30.47 +    if ( (v->domain->domain_id == 0) && opt_dom0_vcpus_pin )
   30.48 +        return -EINVAL;
   30.49 +
   30.50      cpus_and(online_affinity, *affinity, cpu_online_map);
   30.51      if ( cpus_empty(online_affinity) )
   30.52          return -EINVAL;
    31.1 --- a/xen/include/asm-x86/hvm/hvm.h	Tue Sep 26 16:15:45 2006 -0600
    31.2 +++ b/xen/include/asm-x86/hvm/hvm.h	Tue Sep 26 19:11:33 2006 -0600
    31.3 @@ -51,15 +51,13 @@ struct hvm_function_table {
    31.4       * Examine specifics of the guest state:
    31.5       * 1) determine whether the guest is in real or vm8086 mode,
    31.6       * 2) determine whether paging is enabled,
    31.7 -     * 3) return the length of the instruction that caused an exit.
    31.8 -     * 4) return the current guest control-register value
    31.9 +     * 3) return the current guest control-register value
   31.10       */
   31.11      int (*realmode)(struct vcpu *v);
   31.12      int (*paging_enabled)(struct vcpu *v);
   31.13      int (*long_mode_enabled)(struct vcpu *v);
   31.14      int (*pae_enabled)(struct vcpu *v);
   31.15      int (*guest_x86_mode)(struct vcpu *v);
   31.16 -    int (*instruction_length)(struct vcpu *v);
   31.17      unsigned long (*get_guest_ctrl_reg)(struct vcpu *v, unsigned int num);
   31.18  
   31.19      /* 
   31.20 @@ -159,11 +157,7 @@ hvm_guest_x86_mode(struct vcpu *v)
   31.21      return hvm_funcs.guest_x86_mode(v);
   31.22  }
   31.23  
   31.24 -static inline int
   31.25 -hvm_instruction_length(struct vcpu *v)
   31.26 -{
   31.27 -    return hvm_funcs.instruction_length(v);
   31.28 -}
   31.29 +int hvm_instruction_length(struct cpu_user_regs *regs, int mode);
   31.30  
   31.31  static inline void
   31.32  hvm_update_host_cr3(struct vcpu *v)
   31.33 @@ -182,9 +176,9 @@ hvm_get_guest_ctrl_reg(struct vcpu *v, u
   31.34      return 0;                   /* force to fail */
   31.35  }
   31.36  
   31.37 -extern void hvm_stts(struct vcpu *v);
   31.38 -extern void hvm_set_guest_time(struct vcpu *v, u64 gtime);
   31.39 -extern void hvm_do_resume(struct vcpu *v);
   31.40 +void hvm_stts(struct vcpu *v);
   31.41 +void hvm_set_guest_time(struct vcpu *v, u64 gtime);
   31.42 +void hvm_do_resume(struct vcpu *v);
   31.43  
   31.44  static inline void
   31.45  hvm_init_ap_context(struct vcpu_guest_context *ctxt,
   31.46 @@ -193,6 +187,6 @@ hvm_init_ap_context(struct vcpu_guest_co
   31.47      return hvm_funcs.init_ap_context(ctxt, vcpuid, trampoline_vector);
   31.48  }
   31.49  
   31.50 -extern int hvm_bringup_ap(int vcpuid, int trampoline_vector);
   31.51 +int hvm_bringup_ap(int vcpuid, int trampoline_vector);
   31.52  
   31.53  #endif /* __ASM_X86_HVM_HVM_H__ */
    32.1 --- a/xen/include/asm-x86/hvm/vmx/vmcs.h	Tue Sep 26 16:15:45 2006 -0600
    32.2 +++ b/xen/include/asm-x86/hvm/vmx/vmcs.h	Tue Sep 26 19:11:33 2006 -0600
    32.3 @@ -132,12 +132,16 @@ extern int vmcs_version;
    32.4  #define CPU_BASED_ACTIVATE_IO_BITMAP    0x02000000
    32.5  #define CPU_BASED_MONITOR_EXITING       0x20000000
    32.6  #define CPU_BASED_PAUSE_EXITING         0x40000000
    32.7 -#define PIN_BASED_EXT_INTR_MASK 0x1
    32.8 -#define PIN_BASED_NMI_EXITING   0x8
    32.9  
   32.10 +#define PIN_BASED_EXT_INTR_MASK         0x00000001
   32.11 +#define PIN_BASED_NMI_EXITING           0x00000008
   32.12 +
   32.13 +#define VM_EXIT_IA32E_MODE              0x00000200
   32.14  #define VM_EXIT_ACK_INTR_ON_EXIT        0x00008000
   32.15 -#define VM_EXIT_HOST_ADD_SPACE_SIZE     0x00000200
   32.16  
   32.17 +#define VM_ENTRY_IA32E_MODE             0x00000200
   32.18 +#define VM_ENTRY_SMM                    0x00000400
   32.19 +#define VM_ENTRY_DEACT_DUAL_MONITOR     0x00000800
   32.20  
   32.21  /* VMCS Encordings */
   32.22  enum vmcs_field {
   32.23 @@ -217,6 +221,7 @@ enum vmcs_field {
   32.24      GUEST_LDTR_AR_BYTES             = 0x00004820,
   32.25      GUEST_TR_AR_BYTES               = 0x00004822,
   32.26      GUEST_INTERRUPTIBILITY_INFO     = 0x00004824,
   32.27 +    GUEST_ACTIVITY_STATE            = 0x00004826,
   32.28      GUEST_SYSENTER_CS               = 0x0000482A,
   32.29      HOST_IA32_SYSENTER_CS           = 0x00004c00,
   32.30      CR0_GUEST_HOST_MASK             = 0x00006000,
    33.1 --- a/xen/include/asm-x86/hvm/vmx/vmx.h	Tue Sep 26 16:15:45 2006 -0600
    33.2 +++ b/xen/include/asm-x86/hvm/vmx/vmx.h	Tue Sep 26 19:11:33 2006 -0600
    33.3 @@ -37,73 +37,6 @@ extern void set_guest_time(struct vcpu *
    33.4  extern unsigned int cpu_rev;
    33.5  
    33.6  /*
    33.7 - * Need fill bits for SENTER
    33.8 - */
    33.9 -
   33.10 -#define MONITOR_PIN_BASED_EXEC_CONTROLS_RESERVED_VALUE  0x00000016
   33.11 -
   33.12 -#define MONITOR_PIN_BASED_EXEC_CONTROLS                 \
   33.13 -    (                                                   \
   33.14 -    MONITOR_PIN_BASED_EXEC_CONTROLS_RESERVED_VALUE |    \
   33.15 -    PIN_BASED_EXT_INTR_MASK |                           \
   33.16 -    PIN_BASED_NMI_EXITING                               \
   33.17 -    )
   33.18 -
   33.19 -#define MONITOR_CPU_BASED_EXEC_CONTROLS_RESERVED_VALUE  0x0401e172
   33.20 -
   33.21 -#define _MONITOR_CPU_BASED_EXEC_CONTROLS                \
   33.22 -    (                                                   \
   33.23 -    MONITOR_CPU_BASED_EXEC_CONTROLS_RESERVED_VALUE |    \
   33.24 -    CPU_BASED_HLT_EXITING |                             \
   33.25 -    CPU_BASED_INVDPG_EXITING |                          \
   33.26 -    CPU_BASED_MWAIT_EXITING |                           \
   33.27 -    CPU_BASED_MOV_DR_EXITING |                          \
   33.28 -    CPU_BASED_ACTIVATE_IO_BITMAP |                      \
   33.29 -    CPU_BASED_USE_TSC_OFFSETING                         \
   33.30 -    )
   33.31 -
   33.32 -#define MONITOR_CPU_BASED_EXEC_CONTROLS_IA32E_MODE      \
   33.33 -    (                                                   \
   33.34 -    CPU_BASED_CR8_LOAD_EXITING |                        \
   33.35 -    CPU_BASED_CR8_STORE_EXITING                         \
   33.36 -    )
   33.37 -
   33.38 -#define MONITOR_VM_EXIT_CONTROLS_RESERVED_VALUE         0x0003edff
   33.39 -
   33.40 -#define MONITOR_VM_EXIT_CONTROLS_IA32E_MODE             0x00000200
   33.41 -
   33.42 -#define _MONITOR_VM_EXIT_CONTROLS                       \
   33.43 -    (                                                   \
   33.44 -    MONITOR_VM_EXIT_CONTROLS_RESERVED_VALUE |           \
   33.45 -    VM_EXIT_ACK_INTR_ON_EXIT                            \
   33.46 -    )
   33.47 -
   33.48 -#if defined (__x86_64__)
   33.49 -#define MONITOR_CPU_BASED_EXEC_CONTROLS                 \
   33.50 -    (                                                   \
   33.51 -    _MONITOR_CPU_BASED_EXEC_CONTROLS |                  \
   33.52 -    MONITOR_CPU_BASED_EXEC_CONTROLS_IA32E_MODE          \
   33.53 -    )
   33.54 -#define MONITOR_VM_EXIT_CONTROLS                        \
   33.55 -    (                                                   \
   33.56 -    _MONITOR_VM_EXIT_CONTROLS |                         \
   33.57 -    MONITOR_VM_EXIT_CONTROLS_IA32E_MODE                 \
   33.58 -    )
   33.59 -#else
   33.60 -#define MONITOR_CPU_BASED_EXEC_CONTROLS                 \
   33.61 -    _MONITOR_CPU_BASED_EXEC_CONTROLS
   33.62 -
   33.63 -#define MONITOR_VM_EXIT_CONTROLS                        \
   33.64 -    _MONITOR_VM_EXIT_CONTROLS
   33.65 -#endif
   33.66 -
   33.67 -#define VM_ENTRY_CONTROLS_RESERVED_VALUE                0x000011ff
   33.68 -#define VM_ENTRY_CONTROLS_IA32E_MODE                    0x00000200
   33.69 -
   33.70 -#define MONITOR_VM_ENTRY_CONTROLS                       \
   33.71 -    VM_ENTRY_CONTROLS_RESERVED_VALUE
   33.72 -
   33.73 -/*
   33.74   * Exit Reasons
   33.75   */
   33.76  #define VMX_EXIT_REASONS_FAILED_VMENTRY 0x80000000
   33.77 @@ -425,38 +358,12 @@ static inline int vmx_pae_enabled(struct
   33.78  }
   33.79  
   33.80  /* Works only for vcpu == current */
   33.81 -static inline int vmx_realmode(struct vcpu *v)
   33.82 -{
   33.83 -    unsigned long rflags;
   33.84 -    ASSERT(v == current);
   33.85 -
   33.86 -    __vmread(GUEST_RFLAGS, &rflags);
   33.87 -    return rflags & X86_EFLAGS_VM;
   33.88 -}
   33.89 -
   33.90 -/* Works only for vcpu == current */
   33.91  static inline void vmx_update_host_cr3(struct vcpu *v)
   33.92  {
   33.93      ASSERT(v == current);
   33.94      __vmwrite(HOST_CR3, v->arch.cr3);
   33.95  }
   33.96  
   33.97 -static inline int vmx_guest_x86_mode(struct vcpu *v)
   33.98 -{
   33.99 -    unsigned long cs_ar_bytes;
  33.100 -    ASSERT(v == current);
  33.101 -
  33.102 -    if ( vmx_long_mode_enabled(v) )
  33.103 -    {
  33.104 -        __vmread(GUEST_CS_AR_BYTES, &cs_ar_bytes);
  33.105 -        return (cs_ar_bytes & (1u<<13)) ? 8 : 4;
  33.106 -    }
  33.107 -    if ( vmx_realmode(v) )
  33.108 -        return 2;
  33.109 -    __vmread(GUEST_CS_AR_BYTES, &cs_ar_bytes);
  33.110 -    return (cs_ar_bytes & (1u<<14)) ? 4 : 2;
  33.111 -}
  33.112 -
  33.113  static inline int vmx_pgbit_test(struct vcpu *v)
  33.114  {
  33.115      unsigned long cr0;
    34.1 --- a/xen/include/xen/compiler.h	Tue Sep 26 16:15:45 2006 -0600
    34.2 +++ b/xen/include/xen/compiler.h	Tue Sep 26 19:11:33 2006 -0600
    34.3 @@ -35,7 +35,7 @@
    34.4  #define offsetof(a,b) ((unsigned long)&(((a *)0)->b))
    34.5  #endif
    34.6  
    34.7 -#if defined(__x86_64__) && (__GNUC__ > 3)
    34.8 +#ifdef GCC_HAS_VISIBILITY_ATTRIBUTE
    34.9  /* Results in more efficient PIC code (no indirections through GOT or PLT). */
   34.10  #pragma GCC visibility push(hidden)
   34.11  #endif
    35.1 --- a/xen/include/xen/sched.h	Tue Sep 26 16:15:45 2006 -0600
    35.2 +++ b/xen/include/xen/sched.h	Tue Sep 26 19:11:33 2006 -0600
    35.3 @@ -280,7 +280,7 @@ void new_thread(struct vcpu *d,
    35.4  #define set_current_state(_s) do { current->state = (_s); } while (0)
    35.5  void scheduler_init(void);
    35.6  void schedulers_start(void);
    35.7 -int  sched_init_vcpu(struct vcpu *);
    35.8 +int  sched_init_vcpu(struct vcpu *v, unsigned int processor);
    35.9  void sched_destroy_domain(struct domain *);
   35.10  long sched_adjust(struct domain *, struct xen_domctl_scheduler_op *);
   35.11  int  sched_id(void);