ia64/xen-unstable

changeset 15406:810885428743

merge with xen-unstable.hg
author Alex Williamson <alex.williamson@hp.com>
date Wed Jun 20 12:49:27 2007 -0600 (2007-06-20)
parents c20bc60f9243 005dd6b1cf8e
children 593fe08cfca2
files
line diff
     1.1 --- a/docs/src/user.tex	Wed Jun 20 12:47:52 2007 -0600
     1.2 +++ b/docs/src/user.tex	Wed Jun 20 12:49:27 2007 -0600
     1.3 @@ -3178,6 +3178,7 @@ editing \path{grub.conf}.
     1.4    \begin{description}
     1.5    \item[ ask ] Display a vga menu allowing manual selection of video
     1.6    mode.
     1.7 +  \item[ current ] Use existing vga mode without modification.
     1.8    \item[ text-$<$mode$>$ ] Select text-mode resolution, where mode is
     1.9    one of 80x25, 80x28, 80x30, 80x34, 80x43, 80x50, 80x60.
    1.10    \item[ gfx-$<$mode$>$ ] Select VESA graphics mode
     2.1 --- a/tools/blktap/drivers/Makefile	Wed Jun 20 12:47:52 2007 -0600
     2.2 +++ b/tools/blktap/drivers/Makefile	Wed Jun 20 12:49:27 2007 -0600
     2.3 @@ -35,6 +35,7 @@ BLK-OBJS  += block-vmdk.o
     2.4  BLK-OBJS  += block-ram.o
     2.5  BLK-OBJS  += block-qcow.o
     2.6  BLK-OBJS  += aes.o
     2.7 +BLK-OBJS  += tapaio.o
     2.8  
     2.9  all: $(IBIN) qcow-util
    2.10  
     3.1 --- a/tools/blktap/drivers/block-aio.c	Wed Jun 20 12:47:52 2007 -0600
     3.2 +++ b/tools/blktap/drivers/block-aio.c	Wed Jun 20 12:49:27 2007 -0600
     3.3 @@ -43,14 +43,7 @@
     3.4  #include <sys/ioctl.h>
     3.5  #include <linux/fs.h>
     3.6  #include "tapdisk.h"
     3.7 -
     3.8 -
     3.9 -/**
    3.10 - * We used a kernel patch to return an fd associated with the AIO context
    3.11 - * so that we can concurrently poll on synchronous and async descriptors.
    3.12 - * This is signalled by passing 1 as the io context to io_setup.
    3.13 - */
    3.14 -#define REQUEST_ASYNC_FD 1
    3.15 +#include "tapaio.h"
    3.16  
    3.17  #define MAX_AIO_REQS (MAX_REQUESTS * MAX_SEGMENTS_PER_REQ)
    3.18  
    3.19 @@ -65,14 +58,13 @@ struct tdaio_state {
    3.20  	int fd;
    3.21  	
    3.22  	/* libaio state */
    3.23 -	io_context_t       aio_ctx;
    3.24 +	tap_aio_context_t  aio_ctx;
    3.25  	struct iocb        iocb_list  [MAX_AIO_REQS];
    3.26  	struct iocb       *iocb_free  [MAX_AIO_REQS];
    3.27  	struct pending_aio pending_aio[MAX_AIO_REQS];
    3.28  	int                iocb_free_count;
    3.29  	struct iocb       *iocb_queue[MAX_AIO_REQS];
    3.30  	int                iocb_queued;
    3.31 -	int                poll_fd; /* NB: we require aio_poll support */
    3.32  	struct io_event    aio_events[MAX_AIO_REQS];
    3.33  };
    3.34  
    3.35 @@ -148,7 +140,7 @@ static inline void init_fds(struct disk_
    3.36  	for(i = 0; i < MAX_IOFD; i++) 
    3.37  		dd->io_fd[i] = 0;
    3.38  
    3.39 -	dd->io_fd[0] = prv->poll_fd;
    3.40 +	dd->io_fd[0] = prv->aio_ctx.pollfd;
    3.41  }
    3.42  
    3.43  /* Open the disk file and initialize aio state. */
    3.44 @@ -162,12 +154,9 @@ int tdaio_open (struct disk_driver *dd, 
    3.45  	/* Initialize AIO */
    3.46  	prv->iocb_free_count = MAX_AIO_REQS;
    3.47  	prv->iocb_queued     = 0;
    3.48 -	
    3.49 -	prv->aio_ctx = (io_context_t) REQUEST_ASYNC_FD;
    3.50 -	prv->poll_fd = io_setup(MAX_AIO_REQS, &prv->aio_ctx);
    3.51  
    3.52 -	if (prv->poll_fd < 0) {
    3.53 -		ret = prv->poll_fd;
    3.54 +	ret = tap_aio_setup(&prv->aio_ctx, prv->aio_events, MAX_AIO_REQS);
    3.55 +	if (ret < 0) {
    3.56                  if (ret == -EAGAIN) {
    3.57                          DPRINTF("Couldn't setup AIO context.  If you are "
    3.58                                  "trying to concurrently use a large number "
    3.59 @@ -176,9 +165,7 @@ int tdaio_open (struct disk_driver *dd, 
    3.60                                  "(e.g. 'echo echo 1048576 > /proc/sys/fs/"
    3.61                                  "aio-max-nr')\n");
    3.62                  } else {
    3.63 -                        DPRINTF("Couldn't get fd for AIO poll support.  This "
    3.64 -                                "is probably because your kernel does not "
    3.65 -                                "have the aio-poll patch applied.\n");
    3.66 +                        DPRINTF("Couldn't setup AIO context.\n");
    3.67                  }
    3.68  		goto done;
    3.69  	}
    3.70 @@ -286,7 +273,7 @@ int tdaio_submit(struct disk_driver *dd)
    3.71  	if (!prv->iocb_queued)
    3.72  		return 0;
    3.73  
    3.74 -	ret = io_submit(prv->aio_ctx, prv->iocb_queued, prv->iocb_queue);
    3.75 +	ret = io_submit(prv->aio_ctx.aio_ctx, prv->iocb_queued, prv->iocb_queue);
    3.76  	
    3.77  	/* XXX: TODO: Handle error conditions here. */
    3.78  	
    3.79 @@ -300,7 +287,7 @@ int tdaio_close(struct disk_driver *dd)
    3.80  {
    3.81  	struct tdaio_state *prv = (struct tdaio_state *)dd->private;
    3.82  	
    3.83 -	io_destroy(prv->aio_ctx);
    3.84 +	io_destroy(prv->aio_ctx.aio_ctx);
    3.85  	close(prv->fd);
    3.86  
    3.87  	return 0;
    3.88 @@ -308,15 +295,13 @@ int tdaio_close(struct disk_driver *dd)
    3.89  
    3.90  int tdaio_do_callbacks(struct disk_driver *dd, int sid)
    3.91  {
    3.92 -	int ret, i, rsp = 0;
    3.93 +	int i, nr_events, rsp = 0;
    3.94  	struct io_event *ep;
    3.95  	struct tdaio_state *prv = (struct tdaio_state *)dd->private;
    3.96  
    3.97 -	/* Non-blocking test for completed io. */
    3.98 -	ret = io_getevents(prv->aio_ctx, 0, MAX_AIO_REQS, prv->aio_events,
    3.99 -			   NULL);
   3.100 -			
   3.101 -	for (ep=prv->aio_events,i=ret; i-->0; ep++) {
   3.102 +	nr_events = tap_aio_get_events(&prv->aio_ctx);
   3.103 +repeat:
   3.104 +	for (ep = prv->aio_events, i = nr_events; i-- > 0; ep++) {
   3.105  		struct iocb        *io  = ep->obj;
   3.106  		struct pending_aio *pio;
   3.107  		
   3.108 @@ -327,6 +312,14 @@ int tdaio_do_callbacks(struct disk_drive
   3.109  
   3.110  		prv->iocb_free[prv->iocb_free_count++] = io;
   3.111  	}
   3.112 +
   3.113 +	if (nr_events) {
   3.114 +		nr_events = tap_aio_more_events(&prv->aio_ctx);
   3.115 +		goto repeat;
   3.116 +	}
   3.117 +
   3.118 +	tap_aio_continue(&prv->aio_ctx);
   3.119 +
   3.120  	return rsp;
   3.121  }
   3.122  
     4.1 --- a/tools/blktap/drivers/block-qcow.c	Wed Jun 20 12:47:52 2007 -0600
     4.2 +++ b/tools/blktap/drivers/block-qcow.c	Wed Jun 20 12:49:27 2007 -0600
     4.3 @@ -38,6 +38,7 @@
     4.4  #include "bswap.h"
     4.5  #include "aes.h"
     4.6  #include "tapdisk.h"
     4.7 +#include "tapaio.h"
     4.8  
     4.9  #if 1
    4.10  #define ASSERT(_p) \
    4.11 @@ -53,9 +54,6 @@
    4.12          (l + (s - 1)) - ((l + (s - 1)) % s)); \
    4.13  })
    4.14  
    4.15 -/******AIO DEFINES******/
    4.16 -#define REQUEST_ASYNC_FD 1
    4.17 -
    4.18  struct pending_aio {
    4.19          td_callback_t cb;
    4.20          int id;
    4.21 @@ -145,7 +143,7 @@ struct tdqcow_state {
    4.22  	AES_KEY aes_encrypt_key;       /*AES key*/
    4.23  	AES_KEY aes_decrypt_key;       /*AES key*/
    4.24          /* libaio state */
    4.25 -        io_context_t        aio_ctx;
    4.26 +        tap_aio_context_t   aio_ctx;
    4.27          int                 max_aio_reqs;
    4.28          struct iocb        *iocb_list;
    4.29          struct iocb       **iocb_free;
    4.30 @@ -153,7 +151,6 @@ struct tdqcow_state {
    4.31          int                 iocb_free_count;
    4.32          struct iocb       **iocb_queue;
    4.33          int                 iocb_queued;
    4.34 -        int                 poll_fd;      /* NB: we require aio_poll support */
    4.35          struct io_event    *aio_events;
    4.36  };
    4.37  
    4.38 @@ -179,7 +176,7 @@ static void free_aio_state(struct disk_d
    4.39  
    4.40  static int init_aio_state(struct disk_driver *dd)
    4.41  {
    4.42 -        int i;
    4.43 +	int i, ret;
    4.44  	struct td_state     *bs = dd->td_state;
    4.45  	struct tdqcow_state  *s = (struct tdqcow_state *)dd->private;
    4.46          long     ioidx;
    4.47 @@ -216,12 +213,9 @@ static int init_aio_state(struct disk_dr
    4.48                  goto fail;
    4.49          }
    4.50  
    4.51 -        /*Signal kernel to create Poll FD for Asyc completion events*/
    4.52 -        s->aio_ctx = (io_context_t) REQUEST_ASYNC_FD;   
    4.53 -        s->poll_fd = io_setup(s->max_aio_reqs, &s->aio_ctx);
    4.54 -
    4.55 -	if (s->poll_fd < 0) {
    4.56 -                if (s->poll_fd == -EAGAIN) {
    4.57 +	ret = tap_aio_setup(&s->aio_ctx, s->aio_events, s->max_aio_reqs);
    4.58 +	if (ret < 0) {
    4.59 +                if (ret == -EAGAIN) {
    4.60                          DPRINTF("Couldn't setup AIO context.  If you are "
    4.61                                  "trying to concurrently use a large number "
    4.62                                  "of blktap-based disks, you may need to "
    4.63 @@ -229,9 +223,7 @@ static int init_aio_state(struct disk_dr
    4.64                                  "(e.g. 'echo echo 1048576 > /proc/sys/fs/"
    4.65                                  "aio-max-nr')\n");
    4.66                  } else {
    4.67 -                        DPRINTF("Couldn't get fd for AIO poll support.  This "
    4.68 -                                "is probably because your kernel does not "
    4.69 -                                "have the aio-poll patch applied.\n");
    4.70 +                        DPRINTF("Couldn't setup AIO context.\n");
    4.71                  }
    4.72  		goto fail;
    4.73  	}
    4.74 @@ -845,7 +837,7 @@ static inline void init_fds(struct disk_
    4.75  	for(i = 0; i < MAX_IOFD; i++) 
    4.76  		dd->io_fd[i] = 0;
    4.77  
    4.78 -	dd->io_fd[0] = s->poll_fd;
    4.79 +	dd->io_fd[0] = s->aio_ctx.pollfd;
    4.80  }
    4.81  
    4.82  /* Open the disk file and initialize qcow state. */
    4.83 @@ -1144,7 +1136,7 @@ int tdqcow_submit(struct disk_driver *dd
    4.84  	if (!prv->iocb_queued)
    4.85  		return 0;
    4.86  
    4.87 -	ret = io_submit(prv->aio_ctx, prv->iocb_queued, prv->iocb_queue);
    4.88 +	ret = io_submit(prv->aio_ctx.aio_ctx, prv->iocb_queued, prv->iocb_queue);
    4.89  
    4.90          /* XXX: TODO: Handle error conditions here. */
    4.91  
    4.92 @@ -1172,7 +1164,7 @@ int tdqcow_close(struct disk_driver *dd)
    4.93  		close(fd);
    4.94  	}
    4.95  
    4.96 -	io_destroy(s->aio_ctx);
    4.97 +	io_destroy(s->aio_ctx.aio_ctx);
    4.98  	free(s->name);
    4.99  	free(s->l1_table);
   4.100  	free(s->l2_cache);
   4.101 @@ -1184,17 +1176,15 @@ int tdqcow_close(struct disk_driver *dd)
   4.102  
   4.103  int tdqcow_do_callbacks(struct disk_driver *dd, int sid)
   4.104  {
   4.105 -        int ret, i, rsp = 0,*ptr;
   4.106 +        int ret, i, nr_events, rsp = 0,*ptr;
   4.107          struct io_event *ep;
   4.108          struct tdqcow_state *prv = (struct tdqcow_state *)dd->private;
   4.109  
   4.110          if (sid > MAX_IOFD) return 1;
   4.111 -	
   4.112 -	/* Non-blocking test for completed io. */
   4.113 -        ret = io_getevents(prv->aio_ctx, 0, prv->max_aio_reqs, prv->aio_events,
   4.114 -                           NULL);
   4.115  
   4.116 -        for (ep = prv->aio_events, i = ret; i-- > 0; ep++) {
   4.117 +        nr_events = tap_aio_get_events(&prv->aio_ctx);
   4.118 +repeat:
   4.119 +        for (ep = prv->aio_events, i = nr_events; i-- > 0; ep++) {
   4.120                  struct iocb        *io  = ep->obj;
   4.121                  struct pending_aio *pio;
   4.122  
   4.123 @@ -1215,6 +1205,14 @@ int tdqcow_do_callbacks(struct disk_driv
   4.124  
   4.125                  prv->iocb_free[prv->iocb_free_count++] = io;
   4.126          }
   4.127 +
   4.128 +        if (nr_events) {
   4.129 +                nr_events = tap_aio_more_events(&prv->aio_ctx);
   4.130 +                goto repeat;
   4.131 +        }
   4.132 +
   4.133 +        tap_aio_continue(&prv->aio_ctx);
   4.134 +
   4.135          return rsp;
   4.136  }
   4.137  
     5.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
     5.2 +++ b/tools/blktap/drivers/tapaio.c	Wed Jun 20 12:49:27 2007 -0600
     5.3 @@ -0,0 +1,164 @@
     5.4 +/*
     5.5 + * Copyright (c) 2006 Andrew Warfield and Julian Chesterfield
     5.6 + * Copyright (c) 2007 Red Hat, Inc.
     5.7 + *
     5.8 + * This program is free software; you can redistribute it and/or
     5.9 + * modify it under the terms of the GNU General Public License version 2
    5.10 + * as published by the Free Software Foundation; or, when distributed
    5.11 + * separately from the Linux kernel or incorporated into other
    5.12 + * software packages, subject to the following license:
    5.13 + *
    5.14 + * Permission is hereby granted, free of charge, to any person obtaining a copy
    5.15 + * of this source file (the "Software"), to deal in the Software without
    5.16 + * restriction, including without limitation the rights to use, copy, modify,
    5.17 + * merge, publish, distribute, sublicense, and/or sell copies of the Software,
    5.18 + * and to permit persons to whom the Software is furnished to do so, subject to
    5.19 + * the following conditions:
    5.20 + *
    5.21 + * The above copyright notice and this permission notice shall be included in
    5.22 + * all copies or substantial portions of the Software.
    5.23 + *
    5.24 + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
    5.25 + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
    5.26 + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
    5.27 + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
    5.28 + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
    5.29 + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
    5.30 + * IN THE SOFTWARE.
    5.31 + */
    5.32 +
    5.33 +#include "tapaio.h"
    5.34 +#include "tapdisk.h"
    5.35 +#include <unistd.h>
    5.36 +
    5.37 +/**
    5.38 + * We used a kernel patch to return an fd associated with the AIO context
    5.39 + * so that we can concurrently poll on synchronous and async descriptors.
    5.40 + * This is signalled by passing 1 as the io context to io_setup.
    5.41 + */
    5.42 +#define REQUEST_ASYNC_FD 1
    5.43 +
    5.44 +/*
    5.45 + * If we don't have any way to do epoll on aio events in a normal kernel,
    5.46 + * wait for aio events in a separate thread and return completion status
    5.47 + * that via a pipe that can be waited on normally.
    5.48 + *
    5.49 + * To keep locking problems between the completion thread and the submit
    5.50 + * thread to a minimum, there's a handshake which allows only one thread
    5.51 + * to be doing work on the completion queue at a time:
    5.52 + *
    5.53 + * 1) main thread sends completion thread a command via the command pipe;
    5.54 + * 2) completion thread waits for aio events and returns the number
    5.55 + *    received on the completion pipe
    5.56 + * 3) main thread processes the received ctx->aio_events events
    5.57 + * 4) loop back to 1) to let the completion thread refill the aio_events
    5.58 + *    buffer.
    5.59 + *
    5.60 + * This workaround needs to disappear once the kernel provides a single
    5.61 + * mechanism for waiting on both aio and normal fd wakeups.
    5.62 + */
    5.63 +static void *
    5.64 +tap_aio_completion_thread(void *arg)
    5.65 +{
    5.66 +	tap_aio_context_t *ctx = (tap_aio_context_t *) arg;
    5.67 +	int command;
    5.68 +	int nr_events;
    5.69 +	int rc;
    5.70 +
    5.71 +	while (1) {
    5.72 +		rc = read(ctx->command_fd[0], &command, sizeof(command));
    5.73 +
    5.74 +		do {
    5.75 +			rc = io_getevents(ctx->aio_ctx, 1,
    5.76 +					  ctx->max_aio_events, ctx->aio_events,
    5.77 +					  NULL);
    5.78 +			if (rc) {
    5.79 +				nr_events = rc;
    5.80 +				rc = write(ctx->completion_fd[1], &nr_events,
    5.81 +					   sizeof(nr_events));
    5.82 +			}
    5.83 +		} while (!rc);
    5.84 +	}
    5.85 +}
    5.86 +
    5.87 +void
    5.88 +tap_aio_continue(tap_aio_context_t *ctx)
    5.89 +{
    5.90 +        int cmd = 0;
    5.91 +
    5.92 +        if (!ctx->poll_in_thread)
    5.93 +                return;
    5.94 +
    5.95 +        if (write(ctx->command_fd[1], &cmd, sizeof(cmd)) < 0)
    5.96 +                DPRINTF("Cannot write to command pipe\n");
    5.97 +}
    5.98 +
    5.99 +int
   5.100 +tap_aio_setup(tap_aio_context_t *ctx,
   5.101 +              struct io_event *aio_events,
   5.102 +              int max_aio_events)
   5.103 +{
   5.104 +        int ret;
   5.105 +
   5.106 +        ctx->aio_events = aio_events;
   5.107 +        ctx->max_aio_events = max_aio_events;
   5.108 +        ctx->poll_in_thread = 0;
   5.109 +
   5.110 +        ctx->aio_ctx = (io_context_t) REQUEST_ASYNC_FD;
   5.111 +        ret = io_setup(ctx->max_aio_events, &ctx->aio_ctx);
   5.112 +        if (ret < 0 && ret != -EINVAL)
   5.113 +                return ret;
   5.114 +        else if (ret > 0) {
   5.115 +                ctx->pollfd = ret;
   5.116 +                return ctx->pollfd;
   5.117 +        }
   5.118 +
   5.119 +        ctx->aio_ctx = (io_context_t) 0;
   5.120 +        ret = io_setup(ctx->max_aio_events, &ctx->aio_ctx);
   5.121 +        if (ret < 0)
   5.122 +                return ret;
   5.123 +
   5.124 +        if ((ret = pipe(ctx->command_fd)) < 0) {
   5.125 +                DPRINTF("Unable to create command pipe\n");
   5.126 +                return -1;
   5.127 +        }
   5.128 +        if ((ret = pipe(ctx->completion_fd)) < 0) {
   5.129 +                DPRINTF("Unable to create completion pipe\n");
   5.130 +                return -1;
   5.131 +        }
   5.132 +
   5.133 +        if ((ret = pthread_create(&ctx->aio_thread, NULL,
   5.134 +                                  tap_aio_completion_thread, ctx)) != 0) {
   5.135 +                DPRINTF("Unable to create completion thread\n");
   5.136 +                return -1;
   5.137 +        }
   5.138 +
   5.139 +        ctx->pollfd = ctx->completion_fd[0];
   5.140 +        ctx->poll_in_thread = 1;
   5.141 +
   5.142 +        tap_aio_continue(ctx);
   5.143 +
   5.144 +        return 0;
   5.145 +}
   5.146 +
   5.147 +int
   5.148 +tap_aio_get_events(tap_aio_context_t *ctx)
   5.149 +{
   5.150 +        int nr_events = 0;
   5.151 +
   5.152 +        if (!ctx->poll_in_thread)
   5.153 +                nr_events = io_getevents(ctx->aio_ctx, 1,
   5.154 +                                         ctx->max_aio_events, ctx->aio_events, NULL);
   5.155 +        else
   5.156 +                read(ctx->completion_fd[0], &nr_events, sizeof(nr_events));
   5.157 +
   5.158 +        return nr_events;
   5.159 +}
   5.160 +
   5.161 +int tap_aio_more_events(tap_aio_context_t *ctx)
   5.162 +{
   5.163 +        return io_getevents(ctx->aio_ctx, 0,
   5.164 +                            ctx->max_aio_events, ctx->aio_events, NULL);
   5.165 +}
   5.166 +
   5.167 +
     6.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
     6.2 +++ b/tools/blktap/drivers/tapaio.h	Wed Jun 20 12:49:27 2007 -0600
     6.3 @@ -0,0 +1,58 @@
     6.4 +/*
     6.5 + * Copyright (c) 2006 Andrew Warfield and Julian Chesterfield
     6.6 + * Copyright (c) 2007 Red Hat, Inc.
     6.7 + *
     6.8 + * This program is free software; you can redistribute it and/or
     6.9 + * modify it under the terms of the GNU General Public License version 2
    6.10 + * as published by the Free Software Foundation; or, when distributed
    6.11 + * separately from the Linux kernel or incorporated into other
    6.12 + * software packages, subject to the following license:
    6.13 + *
    6.14 + * Permission is hereby granted, free of charge, to any person obtaining a copy
    6.15 + * of this source file (the "Software"), to deal in the Software without
    6.16 + * restriction, including without limitation the rights to use, copy, modify,
    6.17 + * merge, publish, distribute, sublicense, and/or sell copies of the Software,
    6.18 + * and to permit persons to whom the Software is furnished to do so, subject to
    6.19 + * the following conditions:
    6.20 + *
    6.21 + * The above copyright notice and this permission notice shall be included in
    6.22 + * all copies or substantial portions of the Software.
    6.23 + *
    6.24 + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
    6.25 + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
    6.26 + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
    6.27 + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
    6.28 + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
    6.29 + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
    6.30 + * IN THE SOFTWARE.
    6.31 + */
    6.32 +
    6.33 +#ifndef __TAPAIO_H__
    6.34 +#define __TAPAIO_H__
    6.35 +
    6.36 +#include <pthread.h>
    6.37 +#include <libaio.h>
    6.38 +
    6.39 +struct tap_aio_context {
    6.40 +        io_context_t     aio_ctx;
    6.41 +
    6.42 +        struct io_event *aio_events;
    6.43 +        int              max_aio_events;
    6.44 +
    6.45 +        pthread_t        aio_thread;
    6.46 +        int              command_fd[2];
    6.47 +        int              completion_fd[2];
    6.48 +        int              pollfd;
    6.49 +        unsigned int     poll_in_thread : 1;
    6.50 +};
    6.51 +
    6.52 +typedef struct tap_aio_context tap_aio_context_t;
    6.53 +
    6.54 +int  tap_aio_setup      (tap_aio_context_t *ctx,
    6.55 +                         struct io_event *aio_events,
    6.56 +                         int max_aio_events);
    6.57 +void tap_aio_continue   (tap_aio_context_t *ctx);
    6.58 +int  tap_aio_get_events (tap_aio_context_t *ctx);
    6.59 +int  tap_aio_more_events(tap_aio_context_t *ctx);
    6.60 +
    6.61 +#endif /* __TAPAIO_H__ */
     7.1 --- a/tools/examples/init.d/xendomains	Wed Jun 20 12:47:52 2007 -0600
     7.2 +++ b/tools/examples/init.d/xendomains	Wed Jun 20 12:49:27 2007 -0600
     7.3 @@ -182,25 +182,31 @@ rdnames()
     7.4  
     7.5  parseln()
     7.6  {
     7.7 -    name=`echo "$1" | cut -c0-17`
     7.8 -    name=${name%% *}
     7.9 -    rest=`echo "$1" | cut -c18- `
    7.10 -    read id mem cpu vcpu state tm < <(echo "$rest")
    7.11 +    if [[ "$1" =~ "\(domain" ]]; then
    7.12 +        name=;id=
    7.13 +    else if [[ "$1" =~ "\(name" ]]; then
    7.14 +        name=$(echo $1 | sed -e 's/^.*(name \(.*\))$/\1/')
    7.15 +    else if [[ "$1" =~ "\(domid" ]]; then
    7.16 +        id=$(echo $1 | sed -e 's/^.*(domid \(.*\))$/\1/')
    7.17 +    fi; fi; fi
    7.18 +
    7.19 +    [ -n "$name" -a -n "$id" ] && return 0 || return 1
    7.20  }
    7.21  
    7.22  is_running()
    7.23  {
    7.24      rdname $1
    7.25      RC=1
    7.26 +    name=;id=
    7.27      while read LN; do
    7.28 -	parseln "$LN"
    7.29 +	parseln "$LN" || continue
    7.30  	if test $id = 0; then continue; fi
    7.31  	case $name in 
    7.32  	    ($NM)
    7.33  		RC=0
    7.34  		;;
    7.35  	esac
    7.36 -    done < <(xm list | grep -v '^Name')
    7.37 +    done < <(xm list -l | grep '(\(domain\|domid\|name\)')
    7.38      return $RC
    7.39  }
    7.40  
    7.41 @@ -267,13 +273,14 @@ start()
    7.42  
    7.43  all_zombies()
    7.44  {
    7.45 +    name=;id=
    7.46      while read LN; do
    7.47 -	parseln "$LN"
    7.48 +	parseln "$LN" || continue
    7.49  	if test $id = 0; then continue; fi
    7.50  	if test "$state" != "-b---d" -a "$state" != "-----d"; then
    7.51  	    return 1;
    7.52  	fi
    7.53 -    done < <(xm list | grep -v '^Name')
    7.54 +    done < <(xm list -l | grep '(\(domain\|domid\|name\)')
    7.55      return 0
    7.56  }
    7.57  
    7.58 @@ -309,8 +316,9 @@ stop()
    7.59  	rdnames
    7.60      fi
    7.61      echo -n "Shutting down Xen domains:"
    7.62 +    name=;id=
    7.63      while read LN; do
    7.64 -	parseln "$LN"
    7.65 +	parseln "$LN" || continue
    7.66  	if test $id = 0; then continue; fi
    7.67  	echo -n " $name"
    7.68  	if test "$XENDOMAINS_AUTO_ONLY" = "true"; then
    7.69 @@ -384,7 +392,7 @@ stop()
    7.70  	    fi
    7.71  	    kill $WDOG_PID >/dev/null 2>&1
    7.72  	fi
    7.73 -    done < <(xm list | grep -v '^Name')
    7.74 +    done < <(xm list -l | grep '(\(domain\|domid\|name\)')
    7.75  
    7.76      # NB. this shuts down ALL Xen domains (politely), not just the ones in
    7.77      # AUTODIR/*
    7.78 @@ -409,15 +417,16 @@ stop()
    7.79  
    7.80  check_domain_up()
    7.81  {
    7.82 +    name=;id=
    7.83      while read LN; do
    7.84 -	parseln "$LN"
    7.85 +	parseln "$LN" || continue
    7.86  	if test $id = 0; then continue; fi
    7.87  	case $name in 
    7.88  	    ($1)
    7.89  		return 0
    7.90  		;;
    7.91  	esac
    7.92 -    done < <(xm list | grep -v "^Name")
    7.93 +    done < <(xm list -l | grep '(\(domain\|domid\|name\)')
    7.94      return 1
    7.95  }
    7.96  
     8.1 --- a/tools/ioemu/block-raw.c	Wed Jun 20 12:47:52 2007 -0600
     8.2 +++ b/tools/ioemu/block-raw.c	Wed Jun 20 12:49:27 2007 -0600
     8.3 @@ -166,7 +166,7 @@ typedef struct RawAIOCB {
     8.4      struct RawAIOCB *next;
     8.5  } RawAIOCB;
     8.6  
     8.7 -static int aio_sig_num = SIGUSR2;
     8.8 +const int aio_sig_num = SIGUSR2;
     8.9  static RawAIOCB *first_aio; /* AIO issued */
    8.10  static int aio_initialized = 0;
    8.11  
     9.1 --- a/tools/ioemu/target-i386-dm/exec-dm.c	Wed Jun 20 12:47:52 2007 -0600
     9.2 +++ b/tools/ioemu/target-i386-dm/exec-dm.c	Wed Jun 20 12:49:27 2007 -0600
     9.3 @@ -443,19 +443,40 @@ extern unsigned long logdirty_bitmap_siz
     9.4   * Forcing a word-sized read/write prevents the guest from seeing a partially
     9.5   * written word-sized atom.
     9.6   */
     9.7 -void memcpy_words(void *dst, void *src, size_t n)
     9.8 +#if defined(__x86_64__) || defined(__i386__)
     9.9 +static void memcpy_words(void *dst, void *src, size_t n)
    9.10  {
    9.11 -    while (n >= sizeof(long)) {
    9.12 -        *((long *)dst) = *((long *)src);
    9.13 -        dst = ((long *)dst) + 1;
    9.14 -        src = ((long *)src) + 1;
    9.15 -        n -= sizeof(long);
    9.16 -    }
    9.17 -
    9.18 -    if (n & 4) {
    9.19 +    asm (
    9.20 +        "   movl %%edx,%%ecx \n"
    9.21 +#ifdef __x86_64
    9.22 +        "   shrl $3,%%ecx    \n"
    9.23 +        "   andl $7,%%edx    \n"
    9.24 +        "   rep  movsq       \n"
    9.25 +        "   test $4,%%edx    \n"
    9.26 +        "   jz   1f          \n"
    9.27 +        "   movsl            \n"
    9.28 +#else /* __i386__ */
    9.29 +        "   shrl $2,%%ecx    \n"
    9.30 +        "   andl $3,%%edx    \n"
    9.31 +        "   rep  movsl       \n"
    9.32 +#endif
    9.33 +        "1: test $2,%%edx    \n"
    9.34 +        "   jz   1f          \n"
    9.35 +        "   movsw            \n"
    9.36 +        "1: test $1,%%edx    \n"
    9.37 +        "   jz   1f          \n"
    9.38 +        "   movsb            \n"
    9.39 +        "1:                  \n"
    9.40 +        : : "S" (src), "D" (dst), "d" (n) : "ecx" );
    9.41 +}
    9.42 +#else
    9.43 +static void memcpy_words(void *dst, void *src, size_t n)
    9.44 +{
    9.45 +    while (n >= sizeof(uint32_t)) {
    9.46          *((uint32_t *)dst) = *((uint32_t *)src);
    9.47          dst = ((uint32_t *)dst) + 1;
    9.48          src = ((uint32_t *)src) + 1;
    9.49 +        n -= sizeof(uint32_t);
    9.50      }
    9.51  
    9.52      if (n & 2) {
    9.53 @@ -470,6 +491,7 @@ void memcpy_words(void *dst, void *src, 
    9.54          src = ((uint8_t *)src) + 1;
    9.55      }
    9.56  }
    9.57 +#endif
    9.58  
    9.59  void cpu_physical_memory_rw(target_phys_addr_t addr, uint8_t *buf, 
    9.60                              int len, int is_write)
    10.1 --- a/tools/ioemu/vl.c	Wed Jun 20 12:47:52 2007 -0600
    10.2 +++ b/tools/ioemu/vl.c	Wed Jun 20 12:49:27 2007 -0600
    10.3 @@ -7059,6 +7059,18 @@ int main(int argc, char **argv)
    10.4  #endif
    10.5  
    10.6      char qemu_dm_logfilename[128];
    10.7 +    
    10.8 +    /* Ensure that SIGUSR2 is blocked by default when a new thread is created,
    10.9 +       then only the threads that use the signal unblock it -- this fixes a
   10.10 +       race condition in Qcow support where the AIO signal is misdelivered.  */
   10.11 +    {
   10.12 +        extern const int aio_sig_num;
   10.13 +        sigset_t set;
   10.14 +
   10.15 +        sigemptyset(&set);
   10.16 +        sigaddset(&set, aio_sig_num);
   10.17 +        sigprocmask(SIG_BLOCK, &set, NULL);
   10.18 +    }
   10.19  
   10.20      LIST_INIT (&vm_change_state_head);
   10.21  #ifndef _WIN32
    11.1 --- a/tools/libxc/xc_core.c	Wed Jun 20 12:47:52 2007 -0600
    11.2 +++ b/tools/libxc/xc_core.c	Wed Jun 20 12:49:27 2007 -0600
    11.3 @@ -156,7 +156,7 @@ struct xc_core_section_headers {
    11.4      Elf64_Shdr  *shdrs;
    11.5  };
    11.6  #define SHDR_INIT       16
    11.7 -#define SHDR_INC        4
    11.8 +#define SHDR_INC        4U
    11.9  
   11.10  static struct xc_core_section_headers*
   11.11  xc_core_shdr_init(void)
    12.1 --- a/tools/python/xen/xend/XendDomainInfo.py	Wed Jun 20 12:47:52 2007 -0600
    12.2 +++ b/tools/python/xen/xend/XendDomainInfo.py	Wed Jun 20 12:49:27 2007 -0600
    12.3 @@ -983,7 +983,7 @@ class XendDomainInfo:
    12.4                  self.info['VCPUs_live'] = vcpus
    12.5                  self._writeDom(self._vcpuDomDetails())
    12.6          else:
    12.7 -            self.info['VCPUs_live'] = vcpus
    12.8 +            self.info['VCPUs_max'] = vcpus
    12.9              xen.xend.XendDomain.instance().managed_config_save(self)
   12.10          log.info("Set VCPU count on domain %s to %d", self.info['name_label'],
   12.11                   vcpus)
    13.1 --- a/tools/python/xen/xend/server/blkif.py	Wed Jun 20 12:47:52 2007 -0600
    13.2 +++ b/tools/python/xen/xend/server/blkif.py	Wed Jun 20 12:49:27 2007 -0600
    13.3 @@ -98,6 +98,11 @@ class BlkifController(DevController):
    13.4  
    13.5          if (dev_type == 'cdrom' and new_front['device-type'] == 'cdrom' and
    13.6              dev == new_back['dev'] and mode == 'r'):
    13.7 +            # dummy device
    13.8 +            self.writeBackend(devid,
    13.9 +                              'type', new_back['type'],
   13.10 +                              'params', '')
   13.11 +            # new backend-device
   13.12              self.writeBackend(devid,
   13.13                                'type', new_back['type'],
   13.14                                'params', new_back['params'])
    14.1 --- a/xen/arch/ia64/xen/domain.c	Wed Jun 20 12:47:52 2007 -0600
    14.2 +++ b/xen/arch/ia64/xen/domain.c	Wed Jun 20 12:49:27 2007 -0600
    14.3 @@ -1146,9 +1146,8 @@ static void __init loaddomainelfimage(st
    14.4  			dom_imva = __va_ul(page_to_maddr(p));
    14.5  			if (filesz > 0) {
    14.6  				if (filesz >= PAGE_SIZE)
    14.7 -					memcpy((void *) dom_imva,
    14.8 -					       (void *) elfaddr,
    14.9 -					       PAGE_SIZE);
   14.10 +					copy_page((void *) dom_imva,
   14.11 +					          (void *) elfaddr);
   14.12  				else {
   14.13  					// copy partial page
   14.14  					memcpy((void *) dom_imva,
   14.15 @@ -1166,7 +1165,7 @@ static void __init loaddomainelfimage(st
   14.16  			}
   14.17  			else if (memsz > 0) {
   14.18                                  /* always zero out entire page */
   14.19 -				memset((void *) dom_imva, 0, PAGE_SIZE);
   14.20 +				clear_page((void *) dom_imva);
   14.21  			}
   14.22  			memsz -= PAGE_SIZE;
   14.23  			filesz -= PAGE_SIZE;
   14.24 @@ -1367,7 +1366,7 @@ int __init construct_dom0(struct domain 
   14.25  	if (start_info_page == NULL)
   14.26  		panic("can't allocate start info page");
   14.27  	si = page_to_virt(start_info_page);
   14.28 -	memset(si, 0, PAGE_SIZE);
   14.29 +	clear_page(si);
   14.30  	snprintf(si->magic, sizeof(si->magic), "xen-%i.%i-ia64",
   14.31  		xen_major_version(), xen_minor_version());
   14.32  	si->nr_pages     = max_pages;
    15.1 --- a/xen/arch/ia64/xen/xenmem.c	Wed Jun 20 12:47:52 2007 -0600
    15.2 +++ b/xen/arch/ia64/xen/xenmem.c	Wed Jun 20 12:49:27 2007 -0600
    15.3 @@ -90,7 +90,7 @@ alloc_dir_page(void)
    15.4  		panic("Not enough memory for virtual frame table!\n");
    15.5  	++table_size;
    15.6  	dir = mfn << PAGE_SHIFT;
    15.7 -	memset(__va(dir), 0, PAGE_SIZE);
    15.8 +	clear_page(__va(dir));
    15.9  	return dir;
   15.10  }
   15.11  
    16.1 --- a/xen/arch/x86/apic.c	Wed Jun 20 12:47:52 2007 -0600
    16.2 +++ b/xen/arch/x86/apic.c	Wed Jun 20 12:49:27 2007 -0600
    16.3 @@ -817,7 +817,7 @@ void __init init_apic_mappings(void)
    16.4       */
    16.5      if (!smp_found_config && detect_init_APIC()) {
    16.6          apic_phys = __pa(alloc_xenheap_page());
    16.7 -        memset(__va(apic_phys), 0, PAGE_SIZE);
    16.8 +        clear_page(__va(apic_phys));
    16.9      } else
   16.10          apic_phys = mp_lapic_addr;
   16.11  
   16.12 @@ -852,7 +852,7 @@ void __init init_apic_mappings(void)
   16.13              } else {
   16.14  fake_ioapic_page:
   16.15                  ioapic_phys = __pa(alloc_xenheap_page());
   16.16 -                memset(__va(ioapic_phys), 0, PAGE_SIZE);
   16.17 +                clear_page(__va(ioapic_phys));
   16.18              }
   16.19              set_fixmap_nocache(idx, ioapic_phys);
   16.20              apic_printk(APIC_VERBOSE, "mapped IOAPIC to %08lx (%08lx)\n",
    17.1 --- a/xen/arch/x86/boot/cmdline.S	Wed Jun 20 12:47:52 2007 -0600
    17.2 +++ b/xen/arch/x86/boot/cmdline.S	Wed Jun 20 12:49:27 2007 -0600
    17.3 @@ -119,30 +119,31 @@ 3:      pop     %edi
    17.4          ret
    17.5  
    17.6  .Lfind_option:
    17.7 +        mov     4(%esp),%eax
    17.8 +        dec     %eax
    17.9          push    %ebx
   17.10 -        push    4+8(%esp)
   17.11 -        push    4+8(%esp)
   17.12 +1:      pushl   4+8(%esp)
   17.13 +        inc     %eax
   17.14 +        push    %eax
   17.15          call    .Lstrstr
   17.16          add     $8,%esp
   17.17          test    %eax,%eax
   17.18          jz      3f
   17.19          cmp     %eax,4+4(%esp)
   17.20 -        je      1f
   17.21 +        je      2f
   17.22          cmpb    $' ',-1(%eax)
   17.23 -        jne     2f
   17.24 -1:      mov     %eax,%ebx
   17.25 -        push    4+8(%esp)
   17.26 +        jne     1b
   17.27 +2:      mov     %eax,%ebx
   17.28 +        pushl   4+8(%esp)
   17.29          call    .Lstrlen
   17.30          add     $4,%esp
   17.31 -        xchg    %eax,%ebx
   17.32 -        add     %eax,%ebx
   17.33 +        xadd    %eax,%ebx
   17.34          cmpb    $'\0',(%ebx)
   17.35          je      3f
   17.36          cmpb    $' ',(%ebx)
   17.37          je      3f
   17.38          cmpb    $'=',(%ebx)
   17.39 -        je      3f
   17.40 -2:      xor     %eax,%eax
   17.41 +        jne     1b
   17.42  3:      pop     %ebx
   17.43          ret
   17.44  
   17.45 @@ -297,7 +298,7 @@ 1:      lodsw
   17.46          call    .Lstr_prefix
   17.47          add     $8,%esp
   17.48          test    %eax,%eax
   17.49 -        jnz     .Lcmdline_exit
   17.50 +        jnz     .Lparse_vga_current
   17.51  
   17.52          /* We have 'vga=mode-<mode>'. */
   17.53          add     $5,%ebx
   17.54 @@ -305,6 +306,19 @@ 1:      lodsw
   17.55          call    .Latoi
   17.56          add     $4,%esp
   17.57          mov     %ax,bootsym_phys(boot_vid_mode)
   17.58 +        jmp     .Lcmdline_exit
   17.59 +
   17.60 +.Lparse_vga_current:
   17.61 +        /* Check for 'vga=current'. */
   17.62 +        push    %ebx
   17.63 +        pushl   $sym_phys(.Lvga_current)
   17.64 +        call    .Lstr_prefix
   17.65 +        add     $8,%esp
   17.66 +        test    %eax,%eax
   17.67 +        jnz     .Lcmdline_exit
   17.68 +
   17.69 +        /* We have 'vga=current'. */
   17.70 +        movw    $VIDEO_CURRENT_MODE,bootsym_phys(boot_vid_mode)
   17.71  
   17.72  .Lcmdline_exit:
   17.73          popa
   17.74 @@ -328,6 +342,8 @@ 1:      lodsw
   17.75          .asciz  "gfx-"
   17.76  .Lvga_mode:
   17.77          .asciz  "mode-"
   17.78 +.Lvga_current:
   17.79 +        .asciz  "current"
   17.80  .Lno_rm_opt:
   17.81          .asciz  "no-real-mode"
   17.82  .Ledid_opt:
    18.1 --- a/xen/arch/x86/boot/trampoline.S	Wed Jun 20 12:47:52 2007 -0600
    18.2 +++ b/xen/arch/x86/boot/trampoline.S	Wed Jun 20 12:49:27 2007 -0600
    18.3 @@ -13,12 +13,11 @@ trampoline_realmode_entry:
    18.4          cli
    18.5          lidt    bootsym(idt_48)
    18.6          lgdt    bootsym(gdt_48)
    18.7 +        mov     $1,%bl                    # EBX != 0 indicates we are an AP
    18.8          xor     %ax, %ax
    18.9          inc     %ax
   18.10          lmsw    %ax                       # CR0.PE = 1 (enter protected mode)
   18.11 -        mov     $1,%bl                    # EBX != 0 indicates we are an AP
   18.12 -        jmp     1f
   18.13 -1:      ljmpl   $BOOT_CS32,$bootsym_phys(trampoline_protmode_entry)
   18.14 +        ljmpl   $BOOT_CS32,$bootsym_phys(trampoline_protmode_entry)
   18.15  
   18.16  idt_48: .word   0, 0, 0 # base = limit = 0
   18.17  gdt_48: .word   6*8-1
   18.18 @@ -135,10 +134,9 @@ trampoline_boot_cpu_entry:
   18.19          ljmp    $BOOT_PSEUDORM_CS,$bootsym(1f)
   18.20          .code16
   18.21  1:      mov     %eax,%cr0                 # CR0.PE = 0 (leave protected mode)
   18.22 -        jmp     1f
   18.23  
   18.24          /* Load proper real-mode values into %cs, %ds, %es and %ss. */
   18.25 -1:      ljmp    $(BOOT_TRAMPOLINE>>4),$bootsym(1f)
   18.26 +        ljmp    $(BOOT_TRAMPOLINE>>4),$bootsym(1f)
   18.27  1:      mov     $(BOOT_TRAMPOLINE>>4),%ax
   18.28          mov     %ax,%ds
   18.29          mov     %ax,%es
   18.30 @@ -166,10 +164,9 @@ 1:      mov     $(BOOT_TRAMPOLINE>>4),%a
   18.31          xor     %ax,%ax
   18.32          inc     %ax
   18.33          lmsw    %ax                       # CR0.PE = 1 (enter protected mode)
   18.34 -        jmp     1f
   18.35  
   18.36          /* Load proper protected-mode values into all segment registers. */
   18.37 -1:      ljmpl   $BOOT_CS32,$bootsym_phys(1f)
   18.38 +        ljmpl   $BOOT_CS32,$bootsym_phys(1f)
   18.39          .code32
   18.40  1:      mov     $BOOT_DS,%eax
   18.41          mov     %eax,%ds
    19.1 --- a/xen/arch/x86/boot/video.S	Wed Jun 20 12:47:52 2007 -0600
    19.2 +++ b/xen/arch/x86/boot/video.S	Wed Jun 20 12:49:27 2007 -0600
    19.3 @@ -15,7 +15,10 @@
    19.4  
    19.5  #include "video.h"
    19.6  
    19.7 -#define modelist (0x3000)
    19.8 +/* Scratch space layout. */
    19.9 +#define modelist       (0x3000)
   19.10 +#define vesa_glob_info (modelist + 1024)
   19.11 +#define vesa_mode_info (vesa_glob_info + 1024)
   19.12  
   19.13  /* Retrieve Extended Display Identification Data. */
   19.14  #define CONFIG_FIRMWARE_EDID
   19.15 @@ -109,7 +112,7 @@ mopar2: movb    %al, _param(PARAM_VIDEO_
   19.16  
   19.17  # Fetching of VESA frame buffer parameters
   19.18  mopar_gr:
   19.19 -        leaw    modelist+1024, %di
   19.20 +        leaw    vesa_mode_info, %di
   19.21          movb    $0x23, _param(PARAM_HAVE_VGA)
   19.22          movw    16(%di), %ax
   19.23          movw    %ax, _param(PARAM_LFB_LINELENGTH)
   19.24 @@ -128,9 +131,7 @@ mopar_gr:
   19.25          movl    %eax, _param(PARAM_LFB_COLORS+4)
   19.26  
   19.27  # get video mem size
   19.28 -        leaw    modelist+1024, %di
   19.29 -        movw    $0x4f00, %ax
   19.30 -        int     $0x10
   19.31 +        leaw    vesa_glob_info, %di
   19.32          xorl    %eax, %eax
   19.33          movw    18(%di), %ax
   19.34          movl    %eax, _param(PARAM_LFB_SIZE)
   19.35 @@ -183,7 +184,10 @@ dac_done:
   19.36  
   19.37          movw    %es, _param(PARAM_VESAPM_SEG)
   19.38          movw    %di, _param(PARAM_VESAPM_OFF)
   19.39 -no_pm:  ret
   19.40 +
   19.41 +no_pm:  pushw   %ds
   19.42 +        popw    %es
   19.43 +        ret
   19.44  
   19.45  # The video mode menu
   19.46  mode_menu:
   19.47 @@ -428,17 +432,13 @@ setmenu:
   19.48          jmp     mode_set
   19.49  
   19.50  check_vesa:
   19.51 -#ifdef CONFIG_FIRMWARE_EDID
   19.52 -        leaw    modelist+1024, %di
   19.53 +        leaw    vesa_glob_info, %di
   19.54          movw    $0x4f00, %ax
   19.55          int     $0x10
   19.56          cmpw    $0x004f, %ax
   19.57          jnz     setbad
   19.58  
   19.59 -        movw    4(%di), %ax
   19.60 -        movw    %ax, bootsym(vbe_version)
   19.61 -#endif
   19.62 -        leaw    modelist+1024, %di
   19.63 +        leaw    vesa_mode_info, %di
   19.64          subb    $VIDEO_FIRST_VESA>>8, %bh
   19.65          movw    %bx, %cx                # Get mode information structure
   19.66          movw    $0x4f01, %ax
   19.67 @@ -447,7 +447,7 @@ check_vesa:
   19.68          cmpw    $0x004f, %ax
   19.69          jnz     setbad
   19.70  
   19.71 -        movb    (%di), %al              # Check capabilities.
   19.72 +        movb    (%di), %al              # Check mode attributes.
   19.73          andb    $0x99, %al
   19.74          cmpb    $0x99, %al
   19.75          jnz     _setbad                 # Doh! No linear frame buffer.
   19.76 @@ -530,6 +530,7 @@ spec_inits:
   19.77          .word   bootsym(set_8pixel)
   19.78          .word   bootsym(set_80x43)
   19.79          .word   bootsym(set_80x28)
   19.80 +        .word   bootsym(set_current)
   19.81          .word   bootsym(set_80x30)
   19.82          .word   bootsym(set_80x34)
   19.83          .word   bootsym(set_80x60)
   19.84 @@ -575,6 +576,7 @@ set14:  movw    $0x1111, %ax            
   19.85          movb    $0x01, %ah              # Define cursor scan lines 11-12
   19.86          movw    $0x0b0c, %cx
   19.87          int     $0x10
   19.88 +set_current:
   19.89          stc
   19.90          ret
   19.91  
   19.92 @@ -695,33 +697,34 @@ vga_modes_end:
   19.93  # Detect VESA modes.
   19.94  vesa_modes:
   19.95          movw    %di, %bp                # BP=original mode table end
   19.96 -        addw    $0x200, %di             # Buffer space
   19.97 +        leaw    vesa_glob_info, %di
   19.98          movw    $0x4f00, %ax            # VESA Get card info call
   19.99          int     $0x10
  19.100 +        movw    %di, %si
  19.101          movw    %bp, %di
  19.102          cmpw    $0x004f, %ax            # Successful?
  19.103          jnz     ret0
  19.104          
  19.105 -        cmpw    $0x4556, 0x200(%di)     # 'VE'
  19.106 +        cmpw    $0x4556, (%si)          # 'VE'
  19.107          jnz     ret0
  19.108          
  19.109 -        cmpw    $0x4153, 0x202(%di)     # 'SA'
  19.110 +        cmpw    $0x4153, 2(%si)         # 'SA'
  19.111          jnz     ret0
  19.112          
  19.113          movw    $bootsym(vesa_name), bootsym(card_name) # Set name to "VESA VGA"
  19.114          pushw   %gs
  19.115 -        lgsw    0x20e(%di), %si         # GS:SI=mode list
  19.116 +        lgsw    0xe(%si), %si           # GS:SI=mode list
  19.117          movw    $128, %cx               # Iteration limit
  19.118  vesa1:
  19.119          gs;     lodsw
  19.120 -        cmpw    $0xffff, %ax                        # End of the table?
  19.121 +        cmpw    $0xffff, %ax            # End of the table?
  19.122          jz      vesar
  19.123          
  19.124 -        cmpw    $0x0080, %ax                        # Check validity of mode ID
  19.125 +        cmpw    $0x0080, %ax            # Check validity of mode ID
  19.126          jc      vesa2
  19.127          
  19.128 -        orb     %ah, %ah        # Valid IDs: 0x0000-0x007f/0x0100-0x07ff
  19.129 -        jz      vesan                # Certain BIOSes report 0x80-0xff!
  19.130 +        orb     %ah, %ah                # Valid IDs 0x0000-0x007f/0x0100-0x07ff
  19.131 +        jz      vesan                   # Certain BIOSes report 0x80-0xff!
  19.132  
  19.133          cmpw    $0x0800, %ax
  19.134          jnc     vesae
  19.135 @@ -891,8 +894,13 @@ store_edid:
  19.136          cmpb    $1, bootsym(opt_edid)   # EDID disabled on cmdline (edid=no)?
  19.137          je      .Lno_edid
  19.138  
  19.139 -        cmpw    $0x0200, bootsym(vbe_version)  # only do EDID on >= VBE2.0
  19.140 -        jl      .Lno_edid
  19.141 +        leaw    vesa_glob_info, %di
  19.142 +        movw    $0x4f00, %ax
  19.143 +        int     $0x10
  19.144 +        cmpw    $0x004f, %ax
  19.145 +        jne     .Lno_edid
  19.146 +        cmpw    $0x0200, 4(%di)         # only do EDID on >= VBE2.0
  19.147 +        jb      .Lno_edid
  19.148  
  19.149          xorw    %di, %di                # Report Capability
  19.150          pushw   %di
  19.151 @@ -901,6 +909,8 @@ store_edid:
  19.152          xorw    %bx, %bx
  19.153          xorw    %cx, %cx
  19.154          int     $0x10
  19.155 +        pushw   %ds
  19.156 +        popw    %es
  19.157          cmpw    $0x004f, %ax            # Call failed?
  19.158          jne     .Lno_edid
  19.159  
  19.160 @@ -920,8 +930,6 @@ store_edid:
  19.161          movw    $0x01, %bx
  19.162          movw    $0x00, %cx
  19.163          movw    $0x00, %dx
  19.164 -        pushw   %ds
  19.165 -        popw    %es
  19.166          movw    $bootsym(boot_edid_info), %di
  19.167          int     $0x10
  19.168  
  19.169 @@ -940,7 +948,6 @@ edit_buf:       .space  6       # Line e
  19.170  card_name:      .word   0       # Pointer to adapter name
  19.171  graphic_mode:   .byte   0       # Graphic mode with a linear frame buffer
  19.172  dac_size:       .byte   6       # DAC bit depth
  19.173 -vbe_version:    .word   0       # VBE bios version
  19.174  
  19.175  # Status messages
  19.176  keymsg:         .ascii  "Press <RETURN> to see video modes available,"
    20.1 --- a/xen/arch/x86/boot/video.h	Wed Jun 20 12:47:52 2007 -0600
    20.2 +++ b/xen/arch/x86/boot/video.h	Wed Jun 20 12:49:27 2007 -0600
    20.3 @@ -16,10 +16,11 @@
    20.4  #define VIDEO_80x50         0x0f01
    20.5  #define VIDEO_80x43         0x0f02
    20.6  #define VIDEO_80x28         0x0f03
    20.7 -#define VIDEO_80x30         0x0f04
    20.8 -#define VIDEO_80x34         0x0f05
    20.9 -#define VIDEO_80x60         0x0f06
   20.10 -#define VIDEO_LAST_SPECIAL  0x0f07
   20.11 +#define VIDEO_CURRENT_MODE  0x0f04
   20.12 +#define VIDEO_80x30         0x0f05
   20.13 +#define VIDEO_80x34         0x0f06
   20.14 +#define VIDEO_80x60         0x0f07
   20.15 +#define VIDEO_LAST_SPECIAL  0x0f08
   20.16  
   20.17  #define ASK_VGA             0xfffd
   20.18  #define VIDEO_VESA_BY_SIZE  0xffff
    21.1 --- a/xen/arch/x86/boot/x86_32.S	Wed Jun 20 12:47:52 2007 -0600
    21.2 +++ b/xen/arch/x86/boot/x86_32.S	Wed Jun 20 12:49:27 2007 -0600
    21.3 @@ -30,9 +30,7 @@ 1:      mov     %eax,(%edi)
    21.4          loop    1b
    21.5                  
    21.6          /* Pass off the Multiboot info structure to C land. */
    21.7 -        mov     multiboot_ptr,%eax
    21.8 -        add     $__PAGE_OFFSET,%eax
    21.9 -        push    %eax
   21.10 +        pushl   multiboot_ptr
   21.11          call    __start_xen
   21.12          ud2     /* Force a panic (invalid opcode). */
   21.13  
    22.1 --- a/xen/arch/x86/boot/x86_64.S	Wed Jun 20 12:47:52 2007 -0600
    22.2 +++ b/xen/arch/x86/boot/x86_64.S	Wed Jun 20 12:49:27 2007 -0600
    22.3 @@ -51,8 +51,6 @@ 1:      movq    %rax,(%rdi)
    22.4  
    22.5          /* Pass off the Multiboot info structure to C land. */
    22.6          mov     multiboot_ptr(%rip),%edi
    22.7 -        lea     start-0x100000(%rip),%rax
    22.8 -        add     %rax,%rdi
    22.9          call    __start_xen
   22.10          ud2     /* Force a panic (invalid opcode). */
   22.11  
    23.1 --- a/xen/arch/x86/domain.c	Wed Jun 20 12:47:52 2007 -0600
    23.2 +++ b/xen/arch/x86/domain.c	Wed Jun 20 12:49:27 2007 -0600
    23.3 @@ -232,20 +232,19 @@ static int setup_compat_l4(struct vcpu *
    23.4      l4_pgentry_t *l4tab;
    23.5      int rc;
    23.6  
    23.7 -    if ( !pg )
    23.8 +    if ( pg == NULL )
    23.9          return -ENOMEM;
   23.10  
   23.11      /* This page needs to look like a pagetable so that it can be shadowed */
   23.12      pg->u.inuse.type_info = PGT_l4_page_table|PGT_validated;
   23.13  
   23.14      l4tab = copy_page(page_to_virt(pg), idle_pg_table);
   23.15 +    l4tab[0] = l4e_empty();
   23.16      l4tab[l4_table_offset(LINEAR_PT_VIRT_START)] =
   23.17          l4e_from_page(pg, __PAGE_HYPERVISOR);
   23.18      l4tab[l4_table_offset(PERDOMAIN_VIRT_START)] =
   23.19          l4e_from_paddr(__pa(v->domain->arch.mm_perdomain_l3),
   23.20                         __PAGE_HYPERVISOR);
   23.21 -    v->arch.guest_table = pagetable_from_page(pg);
   23.22 -    v->arch.guest_table_user = v->arch.guest_table;
   23.23  
   23.24      if ( (rc = setup_arg_xlat_area(v, l4tab)) < 0 )
   23.25      {
   23.26 @@ -253,6 +252,9 @@ static int setup_compat_l4(struct vcpu *
   23.27          return rc;
   23.28      }
   23.29  
   23.30 +    v->arch.guest_table = pagetable_from_page(pg);
   23.31 +    v->arch.guest_table_user = v->arch.guest_table;
   23.32 +
   23.33      return 0;
   23.34  }
   23.35  
   23.36 @@ -318,11 +320,11 @@ int switch_compat(struct domain *d)
   23.37      gdt_l1e = l1e_from_page(virt_to_page(compat_gdt_table), PAGE_HYPERVISOR);
   23.38      for ( vcpuid = 0; vcpuid < MAX_VIRT_CPUS; vcpuid++ )
   23.39      {
   23.40 +        if ( (d->vcpu[vcpuid] != NULL) &&
   23.41 +             (setup_compat_l4(d->vcpu[vcpuid]) != 0) )
   23.42 +            goto undo_and_fail;
   23.43          d->arch.mm_perdomain_pt[((vcpuid << GDT_LDT_VCPU_SHIFT) +
   23.44                                   FIRST_RESERVED_GDT_PAGE)] = gdt_l1e;
   23.45 -        if (d->vcpu[vcpuid]
   23.46 -            && setup_compat_l4(d->vcpu[vcpuid]) != 0)
   23.47 -            return -ENOMEM;
   23.48      }
   23.49  
   23.50      d->arch.physaddr_bitsize =
   23.51 @@ -330,6 +332,19 @@ int switch_compat(struct domain *d)
   23.52          + (PAGE_SIZE - 2);
   23.53  
   23.54      return 0;
   23.55 +
   23.56 + undo_and_fail:
   23.57 +    d->arch.is_32bit_pv = d->arch.has_32bit_shinfo = 0;
   23.58 +    release_arg_xlat_area(d);
   23.59 +    gdt_l1e = l1e_from_page(virt_to_page(gdt_table), PAGE_HYPERVISOR);
   23.60 +    while ( vcpuid-- != 0 )
   23.61 +    {
   23.62 +        if ( d->vcpu[vcpuid] != NULL )
   23.63 +            release_compat_l4(d->vcpu[vcpuid]);
   23.64 +        d->arch.mm_perdomain_pt[((vcpuid << GDT_LDT_VCPU_SHIFT) +
   23.65 +                                 FIRST_RESERVED_GDT_PAGE)] = gdt_l1e;
   23.66 +    }
   23.67 +    return -ENOMEM;
   23.68  }
   23.69  
   23.70  #else
   23.71 @@ -461,7 +476,7 @@ int arch_domain_create(struct domain *d)
   23.72          if ( (d->shared_info = alloc_xenheap_page()) == NULL )
   23.73              goto fail;
   23.74  
   23.75 -        memset(d->shared_info, 0, PAGE_SIZE);
   23.76 +        clear_page(d->shared_info);
   23.77          share_xen_page_with_guest(
   23.78              virt_to_page(d->shared_info), d, XENSHARE_writable);
   23.79      }
    24.1 --- a/xen/arch/x86/domain_build.c	Wed Jun 20 12:47:52 2007 -0600
    24.2 +++ b/xen/arch/x86/domain_build.c	Wed Jun 20 12:49:27 2007 -0600
    24.3 @@ -505,7 +505,7 @@ int __init construct_dom0(
    24.4      v->arch.guest_table = pagetable_from_paddr((unsigned long)l3start);
    24.5  #else
    24.6      l2start = l2tab = (l2_pgentry_t *)mpt_alloc; mpt_alloc += PAGE_SIZE;
    24.7 -    memcpy(l2tab, idle_pg_table, PAGE_SIZE);
    24.8 +    copy_page(l2tab, idle_pg_table);
    24.9      l2tab[LINEAR_PT_VIRT_START >> L2_PAGETABLE_SHIFT] =
   24.10          l2e_from_paddr((unsigned long)l2start, __PAGE_HYPERVISOR);
   24.11      v->arch.guest_table = pagetable_from_paddr((unsigned long)l2start);
   24.12 @@ -645,7 +645,7 @@ int __init construct_dom0(
   24.13              panic("Not enough RAM for domain 0 PML4.\n");
   24.14          l4start = l4tab = page_to_virt(page);
   24.15      }
   24.16 -    memcpy(l4tab, idle_pg_table, PAGE_SIZE);
   24.17 +    copy_page(l4tab, idle_pg_table);
   24.18      l4tab[l4_table_offset(LINEAR_PT_VIRT_START)] =
   24.19          l4e_from_paddr(__pa(l4start), __PAGE_HYPERVISOR);
   24.20      l4tab[l4_table_offset(PERDOMAIN_VIRT_START)] =
   24.21 @@ -823,7 +823,7 @@ int __init construct_dom0(
   24.22  
   24.23      /* Set up start info area. */
   24.24      si = (start_info_t *)vstartinfo_start;
   24.25 -    memset(si, 0, PAGE_SIZE);
   24.26 +    clear_page(si);
   24.27      si->nr_pages = nr_pages;
   24.28  
   24.29      si->shared_info = virt_to_maddr(d->shared_info);
    25.1 --- a/xen/arch/x86/flushtlb.c	Wed Jun 20 12:47:52 2007 -0600
    25.2 +++ b/xen/arch/x86/flushtlb.c	Wed Jun 20 12:49:27 2007 -0600
    25.3 @@ -80,6 +80,8 @@ void write_cr3(unsigned long cr3)
    25.4  
    25.5      t = pre_flush();
    25.6  
    25.7 +    hvm_flush_guest_tlbs();
    25.8 +
    25.9  #ifdef USER_MAPPINGS_ARE_GLOBAL
   25.10      __pge_off();
   25.11      __asm__ __volatile__ ( "mov %0, %%cr3" : : "r" (cr3) : "memory" );
   25.12 @@ -103,6 +105,8 @@ void local_flush_tlb(void)
   25.13  
   25.14      t = pre_flush();
   25.15  
   25.16 +    hvm_flush_guest_tlbs();
   25.17 +
   25.18  #ifdef USER_MAPPINGS_ARE_GLOBAL
   25.19      __pge_off();
   25.20      __pge_on();
    26.1 --- a/xen/arch/x86/hvm/hvm.c	Wed Jun 20 12:47:52 2007 -0600
    26.2 +++ b/xen/arch/x86/hvm/hvm.c	Wed Jun 20 12:49:27 2007 -0600
    26.3 @@ -831,11 +831,24 @@ void hvm_update_guest_cr3(struct vcpu *v
    26.4      hvm_funcs.update_guest_cr3(v);
    26.5  }
    26.6  
    26.7 +static void hvm_latch_shinfo_size(struct domain *d)
    26.8 +{
    26.9 +    /*
   26.10 +     * Called from operations which are among the very first executed by
   26.11 +     * PV drivers on initialisation or after save/restore. These are sensible
   26.12 +     * points at which to sample the execution mode of the guest and latch
   26.13 +     * 32- or 64-bit format for shared state.
   26.14 +     */
   26.15 +    if ( current->domain == d )
   26.16 +        d->arch.has_32bit_shinfo = (hvm_guest_x86_mode(current) != 8);
   26.17 +}
   26.18 +
   26.19  /* Initialise a hypercall transfer page for a VMX domain using
   26.20     paravirtualised drivers. */
   26.21  void hvm_hypercall_page_initialise(struct domain *d,
   26.22                                     void *hypercall_page)
   26.23  {
   26.24 +    hvm_latch_shinfo_size(d);
   26.25      hvm_funcs.init_hypercall_page(d, hypercall_page);
   26.26  }
   26.27  
   26.28 @@ -1065,13 +1078,7 @@ long do_hvm_op(unsigned long op, XEN_GUE
   26.29                  break;
   26.30              case HVM_PARAM_CALLBACK_IRQ:
   26.31                  hvm_set_callback_via(d, a.value);
   26.32 -                /*
   26.33 -                 * Since this operation is one of the very first executed
   26.34 -                 * by PV drivers on initialisation or after save/restore, it
   26.35 -                 * is a sensible point at which to sample the execution mode of
   26.36 -                 * the guest and latch 32- or 64-bit format for shared state.
   26.37 -                 */
   26.38 -                d->arch.has_32bit_shinfo = (hvm_guest_x86_mode(current) != 8);
   26.39 +                hvm_latch_shinfo_size(d);
   26.40                  break;
   26.41              }
   26.42              d->arch.hvm_domain.params[a.index] = a.value;
    27.1 --- a/xen/arch/x86/hvm/irq.c	Wed Jun 20 12:47:52 2007 -0600
    27.2 +++ b/xen/arch/x86/hvm/irq.c	Wed Jun 20 12:49:27 2007 -0600
    27.3 @@ -285,43 +285,49 @@ void hvm_set_callback_via(struct domain 
    27.4      }
    27.5  }
    27.6  
    27.7 -int cpu_has_pending_irq(struct vcpu *v)
    27.8 +enum hvm_intack hvm_vcpu_has_pending_irq(struct vcpu *v)
    27.9  {
   27.10      struct hvm_domain *plat = &v->domain->arch.hvm_domain;
   27.11  
   27.12 -    /* APIC */
   27.13 -    if ( vlapic_has_interrupt(v) != -1 )
   27.14 -        return 1;
   27.15 +    if ( unlikely(v->arch.hvm_vcpu.nmi_pending) )
   27.16 +        return hvm_intack_nmi;
   27.17  
   27.18 -    /* PIC */
   27.19 +    if ( vlapic_has_interrupt(v) != -1 )
   27.20 +        return hvm_intack_lapic;
   27.21 +
   27.22      if ( !vlapic_accept_pic_intr(v) )
   27.23 -        return 0;
   27.24 +        return hvm_intack_none;
   27.25  
   27.26 -    return plat->vpic[0].int_output;
   27.27 +    return plat->vpic[0].int_output ? hvm_intack_pic : hvm_intack_none;
   27.28  }
   27.29  
   27.30 -int cpu_get_interrupt(struct vcpu *v, int *type)
   27.31 +int hvm_vcpu_ack_pending_irq(struct vcpu *v, enum hvm_intack type, int *vector)
   27.32  {
   27.33 -    int vector;
   27.34 +    switch ( type )
   27.35 +    {
   27.36 +    case hvm_intack_nmi:
   27.37 +        return test_and_clear_bool(v->arch.hvm_vcpu.nmi_pending);
   27.38 +    case hvm_intack_lapic:
   27.39 +        return ((*vector = cpu_get_apic_interrupt(v)) != -1);
   27.40 +    case hvm_intack_pic:
   27.41 +        ASSERT(v->vcpu_id == 0);
   27.42 +        return ((*vector = cpu_get_pic_interrupt(v)) != -1);
   27.43 +    default:
   27.44 +        break;
   27.45 +    }
   27.46  
   27.47 -    if ( (vector = cpu_get_apic_interrupt(v, type)) != -1 )
   27.48 -        return vector;
   27.49 -
   27.50 -    if ( (v->vcpu_id == 0) &&
   27.51 -         ((vector = cpu_get_pic_interrupt(v, type)) != -1) )
   27.52 -        return vector;
   27.53 -
   27.54 -    return -1;
   27.55 +    return 0;
   27.56  }
   27.57  
   27.58 -int get_isa_irq_vector(struct vcpu *v, int isa_irq, int type)
   27.59 +int get_isa_irq_vector(struct vcpu *v, int isa_irq, enum hvm_intack src)
   27.60  {
   27.61      unsigned int gsi = hvm_isa_irq_to_gsi(isa_irq);
   27.62  
   27.63 -    if ( type == APIC_DM_EXTINT )
   27.64 +    if ( src == hvm_intack_pic )
   27.65          return (v->domain->arch.hvm_domain.vpic[isa_irq >> 3].irq_base
   27.66                  + (isa_irq & 7));
   27.67  
   27.68 +    ASSERT(src == hvm_intack_lapic);
   27.69      return domain_vioapic(v->domain)->redirtbl[gsi].fields.vector;
   27.70  }
   27.71  
   27.72 @@ -337,19 +343,20 @@ int is_isa_irq_masked(struct vcpu *v, in
   27.73              domain_vioapic(v->domain)->redirtbl[gsi].fields.mask);
   27.74  }
   27.75  
   27.76 -/*
   27.77 - * TODO: 1. Should not need special treatment of event-channel events.
   27.78 - *       2. Should take notice of interrupt shadows (or clear them).
   27.79 - */
   27.80  int hvm_local_events_need_delivery(struct vcpu *v)
   27.81  {
   27.82 -    int pending;
   27.83 +    enum hvm_intack type;
   27.84  
   27.85 -    pending = (vcpu_info(v, evtchn_upcall_pending) || cpu_has_pending_irq(v));
   27.86 -    if ( unlikely(pending) )
   27.87 -        pending = hvm_interrupts_enabled(v); 
   27.88 +    /* TODO: Get rid of event-channel special case. */
   27.89 +    if ( vcpu_info(v, evtchn_upcall_pending) )
   27.90 +        type = hvm_intack_pic;
   27.91 +    else
   27.92 +        type = hvm_vcpu_has_pending_irq(v);
   27.93  
   27.94 -    return pending;
   27.95 +    if ( likely(type == hvm_intack_none) )
   27.96 +        return 0;
   27.97 +
   27.98 +    return hvm_interrupts_enabled(v, type);
   27.99  }
  27.100  
  27.101  #if 0 /* Keep for debugging */
    28.1 --- a/xen/arch/x86/hvm/svm/asid.c	Wed Jun 20 12:47:52 2007 -0600
    28.2 +++ b/xen/arch/x86/hvm/svm/asid.c	Wed Jun 20 12:49:27 2007 -0600
    28.3 @@ -60,7 +60,7 @@ struct svm_asid_data {
    28.4     u64 core_asid_generation;
    28.5     u32 next_asid;
    28.6     u32 max_asid;
    28.7 -   u32 erratum170;
    28.8 +   u32 erratum170:1;
    28.9  };
   28.10  
   28.11  static DEFINE_PER_CPU(struct svm_asid_data, svm_asid_data);
   28.12 @@ -140,25 +140,21 @@ void svm_asid_init_vcpu(struct vcpu *v)
   28.13  }
   28.14  
   28.15  /*
   28.16 - * Increase the Generation to make free ASIDs.  Flush physical TLB and give
   28.17 - * ASID.
   28.18 + * Increase the Generation to make free ASIDs, and indirectly cause a 
   28.19 + * TLB flush of all ASIDs on the next vmrun.
   28.20   */
   28.21 -static void svm_asid_handle_inc_generation(struct vcpu *v)
   28.22 +void svm_asid_inc_generation(void)
   28.23  {
   28.24      struct svm_asid_data *data = svm_asid_core_data();
   28.25  
   28.26 -    if ( likely(data->core_asid_generation <  SVM_ASID_LAST_GENERATION) )
   28.27 +    if ( likely(data->core_asid_generation < SVM_ASID_LAST_GENERATION) )
   28.28      {
   28.29 -        /* Handle ASID overflow. */
   28.30 +        /* Move to the next generation.  We can't flush the TLB now
   28.31 +         * because you need to vmrun to do that, and current might not
   28.32 +         * be a HVM vcpu, but the first HVM vcpu that runs after this 
   28.33 +         * will pick up ASID 1 and flush the TLBs. */
   28.34          data->core_asid_generation++;
   28.35 -        data->next_asid = SVM_ASID_FIRST_GUEST_ASID + 1;
   28.36 -
   28.37 -        /* Handle VCPU. */
   28.38 -        v->arch.hvm_svm.vmcb->guest_asid = SVM_ASID_FIRST_GUEST_ASID;
   28.39 -        v->arch.hvm_svm.asid_generation  = data->core_asid_generation;
   28.40 -
   28.41 -        /* Trigger flush of physical TLB. */
   28.42 -        v->arch.hvm_svm.vmcb->tlb_control = 1;
   28.43 +        data->next_asid = SVM_ASID_FIRST_GUEST_ASID;
   28.44          return;
   28.45      }
   28.46  
   28.47 @@ -168,11 +164,12 @@ static void svm_asid_handle_inc_generati
   28.48       * this core (flushing TLB always). So correctness is established; it
   28.49       * only runs a bit slower.
   28.50       */
   28.51 -    printk("AMD SVM: ASID generation overrun. Disabling ASIDs.\n");
   28.52 -    data->erratum170 = 1;
   28.53 -    data->core_asid_generation = SVM_ASID_INVALID_GENERATION;
   28.54 -
   28.55 -    svm_asid_init_vcpu(v);
   28.56 +    if ( !data->erratum170 )
   28.57 +    {
   28.58 +        printk("AMD SVM: ASID generation overrun. Disabling ASIDs.\n");
   28.59 +        data->erratum170 = 1;
   28.60 +        data->core_asid_generation = SVM_ASID_INVALID_GENERATION;
   28.61 +    }
   28.62  }
   28.63  
   28.64  /*
   28.65 @@ -202,18 +199,21 @@ asmlinkage void svm_asid_handle_vmrun(vo
   28.66          return;
   28.67      }
   28.68  
   28.69 -    /* Different ASID generations trigger fetching of a fresh ASID. */
   28.70 -    if ( likely(data->next_asid <= data->max_asid) )
   28.71 -    {
   28.72 -        /* There is a free ASID. */
   28.73 -        v->arch.hvm_svm.vmcb->guest_asid = data->next_asid++;
   28.74 -        v->arch.hvm_svm.asid_generation  = data->core_asid_generation;
   28.75 +    /* If there are no free ASIDs, need to go to a new generation */
   28.76 +    if ( unlikely(data->next_asid > data->max_asid) )
   28.77 +        svm_asid_inc_generation();
   28.78 +
   28.79 +    /* Now guaranteed to be a free ASID. */
   28.80 +    v->arch.hvm_svm.vmcb->guest_asid = data->next_asid++;
   28.81 +    v->arch.hvm_svm.asid_generation  = data->core_asid_generation;
   28.82 +
   28.83 +    /* When we assign ASID 1, flush all TLB entries.  We need to do it 
   28.84 +     * here because svm_asid_inc_generation() can be called at any time, 
   28.85 +     * but the TLB flush can only happen on vmrun. */
   28.86 +    if ( v->arch.hvm_svm.vmcb->guest_asid == SVM_ASID_FIRST_GUEST_ASID )
   28.87 +        v->arch.hvm_svm.vmcb->tlb_control = 1;
   28.88 +    else
   28.89          v->arch.hvm_svm.vmcb->tlb_control = 0;
   28.90 -        return;
   28.91 -    }
   28.92 -
   28.93 -    /* Slow path, may cause TLB flush. */
   28.94 -    svm_asid_handle_inc_generation(v);
   28.95  }
   28.96  
   28.97  void svm_asid_inv_asid(struct vcpu *v)
    29.1 --- a/xen/arch/x86/hvm/svm/intr.c	Wed Jun 20 12:47:52 2007 -0600
    29.2 +++ b/xen/arch/x86/hvm/svm/intr.c	Wed Jun 20 12:49:27 2007 -0600
    29.3 @@ -15,7 +15,6 @@
    29.4   * You should have received a copy of the GNU General Public License along with
    29.5   * this program; if not, write to the Free Software Foundation, Inc., 59 Temple
    29.6   * Place - Suite 330, Boston, MA 02111-1307 USA.
    29.7 - *
    29.8   */
    29.9  
   29.10  #include <xen/config.h>
   29.11 @@ -39,100 +38,119 @@
   29.12  #include <xen/domain_page.h>
   29.13  #include <asm/hvm/trace.h>
   29.14  
   29.15 -/*
   29.16 - * Most of this code is copied from vmx_io.c and modified 
   29.17 - * to be suitable for SVM.
   29.18 - */
   29.19 -
   29.20 -static inline int svm_inject_extint(struct vcpu *v, int trap)
   29.21 +static void svm_inject_dummy_vintr(struct vcpu *v)
   29.22  {
   29.23      struct vmcb_struct *vmcb = v->arch.hvm_svm.vmcb;
   29.24      vintr_t intr = vmcb->vintr;
   29.25  
   29.26 -    /* Update only relevant fields */    
   29.27      intr.fields.irq = 1;
   29.28      intr.fields.intr_masking = 1;
   29.29 -    intr.fields.vector = trap;
   29.30 +    intr.fields.vector = 0;
   29.31      intr.fields.prio = 0xF;
   29.32      intr.fields.ign_tpr = 1;
   29.33      vmcb->vintr = intr;
   29.34 +}
   29.35 +    
   29.36 +static void svm_inject_nmi(struct vcpu *v)
   29.37 +{
   29.38 +    struct vmcb_struct *vmcb = v->arch.hvm_svm.vmcb;
   29.39 +    eventinj_t event;
   29.40  
   29.41 -    return 0;
   29.42 +    event.bytes = 0;
   29.43 +    event.fields.v = 1;
   29.44 +    event.fields.type = EVENTTYPE_NMI;
   29.45 +    event.fields.vector = 2;
   29.46 +
   29.47 +    ASSERT(vmcb->eventinj.fields.v == 0);
   29.48 +    vmcb->eventinj = event;
   29.49 +}
   29.50 +    
   29.51 +static void svm_inject_extint(struct vcpu *v, int vector)
   29.52 +{
   29.53 +    struct vmcb_struct *vmcb = v->arch.hvm_svm.vmcb;
   29.54 +    eventinj_t event;
   29.55 +
   29.56 +    event.bytes = 0;
   29.57 +    event.fields.v = 1;
   29.58 +    event.fields.type = EVENTTYPE_INTR;
   29.59 +    event.fields.vector = vector;
   29.60 +
   29.61 +    ASSERT(vmcb->eventinj.fields.v == 0);
   29.62 +    vmcb->eventinj = event;
   29.63  }
   29.64      
   29.65  asmlinkage void svm_intr_assist(void) 
   29.66  {
   29.67      struct vcpu *v = current;
   29.68      struct vmcb_struct *vmcb = v->arch.hvm_svm.vmcb;
   29.69 -    int intr_type = APIC_DM_EXTINT;
   29.70 -    int intr_vector = -1;
   29.71 +    enum hvm_intack intr_source;
   29.72 +    int intr_vector;
   29.73  
   29.74      /*
   29.75 -     * Previous Interrupt delivery caused this intercept?
   29.76 +     * Previous event delivery caused this intercept?
   29.77       * This will happen if the injection is latched by the processor (hence
   29.78 -     * clearing vintr.fields.irq) but then subsequently a fault occurs (e.g.,
   29.79 -     * due to lack of shadow mapping of guest IDT or guest-kernel stack).
   29.80 -     * 
   29.81 -     * NB. Exceptions that fault during delivery are lost. This needs to be
   29.82 -     * fixed but we'll usually get away with it since faults are usually
   29.83 -     * idempotent. But this isn't the case for e.g. software interrupts!
   29.84 +     * clearing vintr.fields.irq or eventinj.v) but then subsequently a fault
   29.85 +     * occurs (e.g., due to lack of shadow mapping of guest IDT or guest-kernel
   29.86 +     * stack).
   29.87       */
   29.88 -    if ( vmcb->exitintinfo.fields.v && (vmcb->exitintinfo.fields.type == 0) )
   29.89 +    if ( vmcb->exitintinfo.fields.v )
   29.90      {
   29.91 -        intr_vector = vmcb->exitintinfo.fields.vector;
   29.92 +        vmcb->eventinj = vmcb->exitintinfo;
   29.93          vmcb->exitintinfo.bytes = 0;
   29.94          HVMTRACE_1D(REINJ_VIRQ, v, intr_vector);
   29.95 -        svm_inject_extint(v, intr_vector);
   29.96          return;
   29.97      }
   29.98  
   29.99 -    /*
  29.100 -     * Previous interrupt still pending? This occurs if we return from VMRUN
  29.101 -     * very early in the entry-to-guest process. Usually this is because an
  29.102 -     * external physical interrupt was pending when we executed VMRUN.
  29.103 -     */
  29.104 -    if ( vmcb->vintr.fields.irq )
  29.105 -        return;
  29.106 -
  29.107 -    /* Crank the handle on interrupt state and check for new interrrupts. */
  29.108 +    /* Crank the handle on interrupt state. */
  29.109      pt_update_irq(v);
  29.110      hvm_set_callback_irq_level();
  29.111 -    if ( !cpu_has_pending_irq(v) )
  29.112 -        return;
  29.113 +
  29.114 +    do {
  29.115 +        intr_source = hvm_vcpu_has_pending_irq(v);
  29.116 +        if ( likely(intr_source == hvm_intack_none) )
  29.117 +            return;
  29.118  
  29.119 -    /*
  29.120 -     * If the guest can't take an interrupt right now, create a 'fake'
  29.121 -     * virtual interrupt on to intercept as soon as the guest _can_ take
  29.122 -     * interrupts.  Do not obtain the next interrupt from the vlapic/pic
  29.123 -     * if unable to inject.
  29.124 -     *
  29.125 -     * Also do this if there is an exception pending.  This is because
  29.126 -     * the delivery of the exception can arbitrarily delay the injection
  29.127 -     * of the vintr (for example, if the exception is handled via an
  29.128 -     * interrupt gate, hence zeroing RFLAGS.IF). In the meantime:
  29.129 -     * - the vTPR could be modified upwards, so we need to wait until the
  29.130 -     *   exception is delivered before we can safely decide that an
  29.131 -     *   interrupt is deliverable; and
  29.132 -     * - the guest might look at the APIC/PIC state, so we ought not to have 
  29.133 -     *   cleared the interrupt out of the IRR.
  29.134 -     */
  29.135 -    if ( irq_masked(vmcb->rflags) || vmcb->interrupt_shadow 
  29.136 -         || vmcb->eventinj.fields.v )  
  29.137 +        /*
  29.138 +         * If the guest can't take an interrupt right now, create a 'fake'
  29.139 +         * virtual interrupt on to intercept as soon as the guest _can_ take
  29.140 +         * interrupts.  Do not obtain the next interrupt from the vlapic/pic
  29.141 +         * if unable to inject.
  29.142 +         *
  29.143 +         * Also do this if there is an injection already pending. This is
  29.144 +         * because the event delivery can arbitrarily delay the injection
  29.145 +         * of the vintr (for example, if the exception is handled via an
  29.146 +         * interrupt gate, hence zeroing RFLAGS.IF). In the meantime:
  29.147 +         * - the vTPR could be modified upwards, so we need to wait until the
  29.148 +         *   exception is delivered before we can safely decide that an
  29.149 +         *   interrupt is deliverable; and
  29.150 +         * - the guest might look at the APIC/PIC state, so we ought not to
  29.151 +         *   have cleared the interrupt out of the IRR.
  29.152 +         *
  29.153 +         * TODO: Better NMI handling. We need a way to skip a MOV SS interrupt
  29.154 +         * shadow. This is hard to do without hardware support. We should also
  29.155 +         * track 'NMI blocking' from NMI injection until IRET. This can be done
  29.156 +         * quite easily in software by intercepting the unblocking IRET.
  29.157 +         */
  29.158 +        if ( !hvm_interrupts_enabled(v, intr_source) ||
  29.159 +             vmcb->eventinj.fields.v )
  29.160 +        {
  29.161 +            vmcb->general1_intercepts |= GENERAL1_INTERCEPT_VINTR;
  29.162 +            HVMTRACE_2D(INJ_VIRQ, v, 0x0, /*fake=*/ 1);
  29.163 +            svm_inject_dummy_vintr(v);
  29.164 +            return;
  29.165 +        }
  29.166 +    } while ( !hvm_vcpu_ack_pending_irq(v, intr_source, &intr_vector) );
  29.167 +
  29.168 +    if ( intr_source == hvm_intack_nmi )
  29.169      {
  29.170 -        vmcb->general1_intercepts |= GENERAL1_INTERCEPT_VINTR;
  29.171 -        HVMTRACE_2D(INJ_VIRQ, v, 0x0, /*fake=*/ 1);
  29.172 -        svm_inject_extint(v, 0x0); /* actual vector doesn't matter */
  29.173 -        return;
  29.174 +        svm_inject_nmi(v);
  29.175      }
  29.176 -
  29.177 -    /* Okay, we can deliver the interrupt: grab it and update PIC state. */
  29.178 -    intr_vector = cpu_get_interrupt(v, &intr_type);
  29.179 -    BUG_ON(intr_vector < 0);
  29.180 -
  29.181 -    HVMTRACE_2D(INJ_VIRQ, v, intr_vector, /*fake=*/ 0);
  29.182 -    svm_inject_extint(v, intr_vector);
  29.183 -
  29.184 -    pt_intr_post(v, intr_vector, intr_type);
  29.185 +    else
  29.186 +    {
  29.187 +        HVMTRACE_2D(INJ_VIRQ, v, intr_vector, /*fake=*/ 0);
  29.188 +        svm_inject_extint(v, intr_vector);
  29.189 +        pt_intr_post(v, intr_vector, intr_source);
  29.190 +    }
  29.191  }
  29.192  
  29.193  /*
    30.1 --- a/xen/arch/x86/hvm/svm/svm.c	Wed Jun 20 12:47:52 2007 -0600
    30.2 +++ b/xen/arch/x86/hvm/svm/svm.c	Wed Jun 20 12:49:27 2007 -0600
    30.3 @@ -312,26 +312,8 @@ int svm_vmcb_save(struct vcpu *v, struct
    30.4      c->sysenter_esp = vmcb->sysenter_esp;
    30.5      c->sysenter_eip = vmcb->sysenter_eip;
    30.6  
    30.7 -    /* Save any event/interrupt that was being injected when we last
    30.8 -     * exited.  Although there are three(!) VMCB fields that can contain
    30.9 -     * active events, we only need to save at most one: because the
   30.10 -     * intr_assist logic never delivers an IRQ when any other event is
   30.11 -     * active, we know that the only possible collision is if we inject
   30.12 -     * a fault while exitintinfo contains a valid event (the delivery of
   30.13 -     * which caused the last exit).  In that case replaying just the
   30.14 -     * first event should cause the same behaviour when we restore. */
   30.15 -    if ( vmcb->vintr.fields.irq 
   30.16 -         && /* Check it's not a fake interrupt (see svm_intr_assist()) */
   30.17 -         !(vmcb->general1_intercepts & GENERAL1_INTERCEPT_VINTR) )
   30.18 -    {
   30.19 -        c->pending_vector = vmcb->vintr.fields.vector;
   30.20 -        c->pending_type = 0; /* External interrupt */
   30.21 -        c->pending_error_valid = 0;
   30.22 -        c->pending_reserved = 0;
   30.23 -        c->pending_valid = 1;
   30.24 -        c->error_code = 0;
   30.25 -    }
   30.26 -    else if ( vmcb->exitintinfo.fields.v )
   30.27 +    /* Save any event/interrupt that was being injected when we last exited. */
   30.28 +    if ( vmcb->exitintinfo.fields.v )
   30.29      {
   30.30          c->pending_event = vmcb->exitintinfo.bytes & 0xffffffff;
   30.31          c->error_code = vmcb->exitintinfo.fields.errorcode;
   30.32 @@ -569,10 +551,15 @@ static inline void svm_restore_dr(struct
   30.33          __restore_debug_registers(v);
   30.34  }
   30.35  
   30.36 -static int svm_interrupts_enabled(struct vcpu *v)
   30.37 +static int svm_interrupts_enabled(struct vcpu *v, enum hvm_intack type)
   30.38  {
   30.39 -    unsigned long eflags = v->arch.hvm_svm.vmcb->rflags;
   30.40 -    return !irq_masked(eflags); 
   30.41 +    struct vmcb_struct *vmcb = v->arch.hvm_svm.vmcb;
   30.42 +
   30.43 +    if ( type == hvm_intack_nmi )
   30.44 +        return !vmcb->interrupt_shadow;
   30.45 +
   30.46 +    ASSERT((type == hvm_intack_pic) || (type == hvm_intack_lapic));
   30.47 +    return !irq_masked(vmcb->rflags) && !vmcb->interrupt_shadow; 
   30.48  }
   30.49  
   30.50  static int svm_guest_x86_mode(struct vcpu *v)
   30.51 @@ -598,6 +585,14 @@ static void svm_update_guest_cr3(struct 
   30.52      v->arch.hvm_svm.vmcb->cr3 = v->arch.hvm_vcpu.hw_cr3; 
   30.53  }
   30.54  
   30.55 +static void svm_flush_guest_tlbs(void)
   30.56 +{
   30.57 +    /* Roll over the CPU's ASID generation, so it gets a clean TLB when we
   30.58 +     * next VMRUN.  (If ASIDs are disabled, the whole TLB is flushed on
   30.59 +     * VMRUN anyway). */
   30.60 +    svm_asid_inc_generation();
   30.61 +}
   30.62 +
   30.63  static void svm_update_vtpr(struct vcpu *v, unsigned long value)
   30.64  {
   30.65      struct vmcb_struct *vmcb = v->arch.hvm_svm.vmcb;
   30.66 @@ -771,8 +766,6 @@ static void svm_init_hypercall_page(stru
   30.67      char *p;
   30.68      int i;
   30.69  
   30.70 -    memset(hypercall_page, 0, PAGE_SIZE);
   30.71 -
   30.72      for ( i = 0; i < (PAGE_SIZE / 32); i++ )
   30.73      {
   30.74          p = (char *)(hypercall_page + (i * 32));
   30.75 @@ -948,6 +941,7 @@ static struct hvm_function_table svm_fun
   30.76      .get_segment_register = svm_get_segment_register,
   30.77      .update_host_cr3      = svm_update_host_cr3,
   30.78      .update_guest_cr3     = svm_update_guest_cr3,
   30.79 +    .flush_guest_tlbs     = svm_flush_guest_tlbs,
   30.80      .update_vtpr          = svm_update_vtpr,
   30.81      .stts                 = svm_stts,
   30.82      .set_tsc_offset       = svm_set_tsc_offset,
   30.83 @@ -957,7 +951,7 @@ static struct hvm_function_table svm_fun
   30.84      .event_injection_faulted = svm_event_injection_faulted
   30.85  };
   30.86  
   30.87 -void svm_npt_detect(void)
   30.88 +static void svm_npt_detect(void)
   30.89  {
   30.90      u32 eax, ebx, ecx, edx;
   30.91  
   30.92 @@ -1017,6 +1011,9 @@ int start_svm(struct cpuinfo_x86 *c)
   30.93  
   30.94      hvm_enable(&svm_function_table);
   30.95  
   30.96 +    if ( opt_hap_enabled )
   30.97 +        printk("SVM: Nested paging enabled.\n");
   30.98 +        
   30.99      return 1;
  30.100  }
  30.101  
  30.102 @@ -1477,7 +1474,7 @@ static void svm_io_instruction(struct vc
  30.103  
  30.104      /* Copy current guest state into io instruction state structure. */
  30.105      memcpy(regs, guest_cpu_user_regs(), HVM_CONTEXT_STACK_BYTES);
  30.106 -    hvm_store_cpu_guest_regs(v, regs, NULL);
  30.107 +    svm_store_cpu_guest_regs(v, regs, NULL);
  30.108  
  30.109      info.bytes = vmcb->exitinfo1;
  30.110  
  30.111 @@ -2148,11 +2145,14 @@ static inline void svm_do_msr_access(
  30.112  
  30.113  static inline void svm_vmexit_do_hlt(struct vmcb_struct *vmcb)
  30.114  {
  30.115 +    enum hvm_intack type = hvm_vcpu_has_pending_irq(current);
  30.116 +
  30.117      __update_guest_eip(vmcb, 1);
  30.118  
  30.119      /* Check for interrupt not handled or new interrupt. */
  30.120 -    if ( (vmcb->rflags & X86_EFLAGS_IF) &&
  30.121 -         (vmcb->vintr.fields.irq || cpu_has_pending_irq(current)) ) {
  30.122 +    if ( vmcb->eventinj.fields.v ||
  30.123 +         ((type != hvm_intack_none) && svm_interrupts_enabled(current, type)) )
  30.124 +    {
  30.125          HVMTRACE_1D(HLT, current, /*int pending=*/ 1);
  30.126          return;
  30.127      }
    31.1 --- a/xen/arch/x86/hvm/svm/vmcb.c	Wed Jun 20 12:47:52 2007 -0600
    31.2 +++ b/xen/arch/x86/hvm/svm/vmcb.c	Wed Jun 20 12:49:27 2007 -0600
    31.3 @@ -56,7 +56,7 @@ struct vmcb_struct *alloc_vmcb(void)
    31.4          return NULL;
    31.5      }
    31.6  
    31.7 -    memset(vmcb, 0, PAGE_SIZE);
    31.8 +    clear_page(vmcb);
    31.9      return vmcb;
   31.10  }
   31.11  
   31.12 @@ -72,11 +72,11 @@ struct host_save_area *alloc_host_save_a
   31.13      hsa = alloc_xenheap_page();
   31.14      if ( hsa == NULL )
   31.15      {
   31.16 -        printk(XENLOG_WARNING "Warning: failed to allocate vmcb.\n");
   31.17 +        printk(XENLOG_WARNING "Warning: failed to allocate hsa.\n");
   31.18          return NULL;
   31.19      }
   31.20  
   31.21 -    memset(hsa, 0, PAGE_SIZE);
   31.22 +    clear_page(hsa);
   31.23      return hsa;
   31.24  }
   31.25  
    32.1 --- a/xen/arch/x86/hvm/vioapic.c	Wed Jun 20 12:47:52 2007 -0600
    32.2 +++ b/xen/arch/x86/hvm/vioapic.c	Wed Jun 20 12:49:27 2007 -0600
    32.3 @@ -254,17 +254,11 @@ static void ioapic_inj_irq(
    32.4      HVM_DBG_LOG(DBG_LEVEL_IOAPIC, "irq %d trig %d deliv %d",
    32.5                  vector, trig_mode, delivery_mode);
    32.6  
    32.7 -    switch ( delivery_mode )
    32.8 -    {
    32.9 -    case dest_Fixed:
   32.10 -    case dest_LowestPrio:
   32.11 -        if ( vlapic_set_irq(target, vector, trig_mode) )
   32.12 -            vcpu_kick(vlapic_vcpu(target));
   32.13 -        break;
   32.14 -    default:
   32.15 -        gdprintk(XENLOG_WARNING, "error delivery mode %d\n", delivery_mode);
   32.16 -        break;
   32.17 -    }
   32.18 +    ASSERT((delivery_mode == dest_Fixed) ||
   32.19 +           (delivery_mode == dest_LowestPrio));
   32.20 +
   32.21 +    if ( vlapic_set_irq(target, vector, trig_mode) )
   32.22 +        vcpu_kick(vlapic_vcpu(target));
   32.23  }
   32.24  
   32.25  static uint32_t ioapic_get_delivery_bitmask(
   32.26 @@ -368,7 +362,6 @@ static void vioapic_deliver(struct hvm_h
   32.27      }
   32.28  
   32.29      case dest_Fixed:
   32.30 -    case dest_ExtINT:
   32.31      {
   32.32          uint8_t bit;
   32.33          for ( bit = 0; deliver_bitmask != 0; bit++ )
   32.34 @@ -393,10 +386,21 @@ static void vioapic_deliver(struct hvm_h
   32.35          break;
   32.36      }
   32.37  
   32.38 -    case dest_SMI:
   32.39      case dest_NMI:
   32.40 -    case dest_INIT:
   32.41 -    case dest__reserved_2:
   32.42 +    {
   32.43 +        uint8_t bit;
   32.44 +        for ( bit = 0; deliver_bitmask != 0; bit++ )
   32.45 +        {
   32.46 +            if ( !(deliver_bitmask & (1 << bit)) )
   32.47 +                continue;
   32.48 +            deliver_bitmask &= ~(1 << bit);
   32.49 +            if ( ((v = vioapic_domain(vioapic)->vcpu[bit]) != NULL) &&
   32.50 +                 !test_and_set_bool(v->arch.hvm_vcpu.nmi_pending) )
   32.51 +                vcpu_kick(v);
   32.52 +        }
   32.53 +        break;
   32.54 +    }
   32.55 +
   32.56      default:
   32.57          gdprintk(XENLOG_WARNING, "Unsupported delivery mode %d\n",
   32.58                   delivery_mode);
    33.1 --- a/xen/arch/x86/hvm/vlapic.c	Wed Jun 20 12:47:52 2007 -0600
    33.2 +++ b/xen/arch/x86/hvm/vlapic.c	Wed Jun 20 12:49:27 2007 -0600
    33.3 @@ -294,7 +294,8 @@ static int vlapic_accept_irq(struct vcpu
    33.4          break;
    33.5  
    33.6      case APIC_DM_NMI:
    33.7 -        gdprintk(XENLOG_WARNING, "Ignoring guest NMI\n");
    33.8 +        if ( !test_and_set_bool(v->arch.hvm_vcpu.nmi_pending) )
    33.9 +            vcpu_kick(v);
   33.10          break;
   33.11  
   33.12      case APIC_DM_INIT:
   33.13 @@ -747,7 +748,7 @@ int vlapic_has_interrupt(struct vcpu *v)
   33.14      return highest_irr;
   33.15  }
   33.16  
   33.17 -int cpu_get_apic_interrupt(struct vcpu *v, int *mode)
   33.18 +int cpu_get_apic_interrupt(struct vcpu *v)
   33.19  {
   33.20      int vector = vlapic_has_interrupt(v);
   33.21      struct vlapic *vlapic = vcpu_vlapic(v);
   33.22 @@ -757,8 +758,6 @@ int cpu_get_apic_interrupt(struct vcpu *
   33.23   
   33.24      vlapic_set_vector(vector, &vlapic->regs->data[APIC_ISR]);
   33.25      vlapic_clear_irr(vector, vlapic);
   33.26 -
   33.27 -    *mode = APIC_DM_FIXED;
   33.28      return vector;
   33.29  }
   33.30  
   33.31 @@ -935,7 +934,7 @@ int vlapic_init(struct vcpu *v)
   33.32  	return -ENOMEM;
   33.33      }
   33.34  
   33.35 -    memset(vlapic->regs, 0, PAGE_SIZE);
   33.36 +    clear_page(vlapic->regs);
   33.37  
   33.38      vlapic_reset(vlapic);
   33.39  
    34.1 --- a/xen/arch/x86/hvm/vmx/intr.c	Wed Jun 20 12:47:52 2007 -0600
    34.2 +++ b/xen/arch/x86/hvm/vmx/intr.c	Wed Jun 20 12:49:27 2007 -0600
    34.3 @@ -102,8 +102,8 @@ static void update_tpr_threshold(struct 
    34.4  
    34.5  asmlinkage void vmx_intr_assist(void)
    34.6  {
    34.7 -    int has_ext_irq, intr_vector, intr_type = 0;
    34.8 -    unsigned long eflags, intr_shadow;
    34.9 +    int intr_vector;
   34.10 +    enum hvm_intack intr_source;
   34.11      struct vcpu *v = current;
   34.12      unsigned int idtv_info_field;
   34.13      unsigned long inst_len;
   34.14 @@ -114,65 +114,67 @@ asmlinkage void vmx_intr_assist(void)
   34.15  
   34.16      update_tpr_threshold(vcpu_vlapic(v));
   34.17  
   34.18 -    has_ext_irq = cpu_has_pending_irq(v);
   34.19 +    do {
   34.20 +        intr_source = hvm_vcpu_has_pending_irq(v);
   34.21 +
   34.22 +        if ( unlikely(v->arch.hvm_vmx.vector_injected) )
   34.23 +        {
   34.24 +            v->arch.hvm_vmx.vector_injected = 0;
   34.25 +            if ( unlikely(intr_source != hvm_intack_none) )
   34.26 +                enable_irq_window(v);
   34.27 +            return;
   34.28 +        }
   34.29 +
   34.30 +        /* This could be moved earlier in the VMX resume sequence. */
   34.31 +        idtv_info_field = __vmread(IDT_VECTORING_INFO_FIELD);
   34.32 +        if ( unlikely(idtv_info_field & INTR_INFO_VALID_MASK) )
   34.33 +        {
   34.34 +            __vmwrite(VM_ENTRY_INTR_INFO_FIELD, idtv_info_field);
   34.35  
   34.36 -    if ( unlikely(v->arch.hvm_vmx.vector_injected) )
   34.37 -    {
   34.38 -        v->arch.hvm_vmx.vector_injected = 0;
   34.39 -        if ( unlikely(has_ext_irq) )
   34.40 -            enable_irq_window(v);
   34.41 -        return;
   34.42 -    }
   34.43 +            /*
   34.44 +             * Safe: the length will only be interpreted for software
   34.45 +             * exceptions and interrupts. If we get here then delivery of some
   34.46 +             * event caused a fault, and this always results in defined
   34.47 +             * VM_EXIT_INSTRUCTION_LEN.
   34.48 +             */
   34.49 +            inst_len = __vmread(VM_EXIT_INSTRUCTION_LEN); /* Safe */
   34.50 +            __vmwrite(VM_ENTRY_INSTRUCTION_LEN, inst_len);
   34.51  
   34.52 -    /* This could be moved earlier in the VMX resume sequence. */
   34.53 -    idtv_info_field = __vmread(IDT_VECTORING_INFO_FIELD);
   34.54 -    if ( unlikely(idtv_info_field & INTR_INFO_VALID_MASK) )
   34.55 -    {
   34.56 -        __vmwrite(VM_ENTRY_INTR_INFO_FIELD, idtv_info_field);
   34.57 +            if ( unlikely(idtv_info_field & 0x800) ) /* valid error code */
   34.58 +                __vmwrite(VM_ENTRY_EXCEPTION_ERROR_CODE,
   34.59 +                          __vmread(IDT_VECTORING_ERROR_CODE));
   34.60 +            if ( unlikely(intr_source != hvm_intack_none) )
   34.61 +                enable_irq_window(v);
   34.62 +
   34.63 +            HVM_DBG_LOG(DBG_LEVEL_1, "idtv_info_field=%x", idtv_info_field);
   34.64 +            return;
   34.65 +        }
   34.66 +
   34.67 +        if ( likely(intr_source == hvm_intack_none) )
   34.68 +            return;
   34.69  
   34.70          /*
   34.71 -         * Safe: the length will only be interpreted for software exceptions
   34.72 -         * and interrupts. If we get here then delivery of some event caused a
   34.73 -         * fault, and this always results in defined VM_EXIT_INSTRUCTION_LEN.
   34.74 +         * TODO: Better NMI handling. Shouldn't wait for EFLAGS.IF==1, but
   34.75 +         * should wait for exit from 'NMI blocking' window (NMI injection to
   34.76 +         * next IRET). This requires us to use the new 'virtual NMI' support.
   34.77           */
   34.78 -        inst_len = __vmread(VM_EXIT_INSTRUCTION_LEN); /* Safe */
   34.79 -        __vmwrite(VM_ENTRY_INSTRUCTION_LEN, inst_len);
   34.80 -
   34.81 -        if ( unlikely(idtv_info_field & 0x800) ) /* valid error code */
   34.82 -            __vmwrite(VM_ENTRY_EXCEPTION_ERROR_CODE,
   34.83 -                      __vmread(IDT_VECTORING_ERROR_CODE));
   34.84 -        if ( unlikely(has_ext_irq) )
   34.85 +        if ( !hvm_interrupts_enabled(v, intr_source) )
   34.86 +        {
   34.87              enable_irq_window(v);
   34.88 -
   34.89 -        HVM_DBG_LOG(DBG_LEVEL_1, "idtv_info_field=%x", idtv_info_field);
   34.90 -        return;
   34.91 -    }
   34.92 -
   34.93 -    if ( likely(!has_ext_irq) )
   34.94 -        return;
   34.95 +            return;
   34.96 +        }
   34.97 +    } while ( !hvm_vcpu_ack_pending_irq(v, intr_source, &intr_vector) );
   34.98  
   34.99 -    intr_shadow = __vmread(GUEST_INTERRUPTIBILITY_INFO);
  34.100 -    if ( unlikely(intr_shadow & (VMX_INTR_SHADOW_STI|VMX_INTR_SHADOW_MOV_SS)) )
  34.101 -    {
  34.102 -        enable_irq_window(v);
  34.103 -        HVM_DBG_LOG(DBG_LEVEL_1, "interruptibility");
  34.104 -        return;
  34.105 -    }
  34.106 -
  34.107 -    eflags = __vmread(GUEST_RFLAGS);
  34.108 -    if ( irq_masked(eflags) )
  34.109 +    if ( intr_source == hvm_intack_nmi )
  34.110      {
  34.111 -        enable_irq_window(v);
  34.112 -        return;
  34.113 +        vmx_inject_nmi(v);
  34.114      }
  34.115 -
  34.116 -    intr_vector = cpu_get_interrupt(v, &intr_type);
  34.117 -    BUG_ON(intr_vector < 0);
  34.118 -
  34.119 -    HVMTRACE_2D(INJ_VIRQ, v, intr_vector, /*fake=*/ 0);
  34.120 -    vmx_inject_extint(v, intr_vector, VMX_DELIVER_NO_ERROR_CODE);
  34.121 -
  34.122 -    pt_intr_post(v, intr_vector, intr_type);
  34.123 +    else
  34.124 +    {
  34.125 +        HVMTRACE_2D(INJ_VIRQ, v, intr_vector, /*fake=*/ 0);
  34.126 +        vmx_inject_extint(v, intr_vector);
  34.127 +        pt_intr_post(v, intr_vector, intr_source);
  34.128 +    }
  34.129  }
  34.130  
  34.131  /*
    35.1 --- a/xen/arch/x86/hvm/vmx/vmcs.c	Wed Jun 20 12:47:52 2007 -0600
    35.2 +++ b/xen/arch/x86/hvm/vmx/vmcs.c	Wed Jun 20 12:49:27 2007 -0600
    35.3 @@ -158,7 +158,7 @@ static struct vmcs_struct *vmx_alloc_vmc
    35.4          return NULL;
    35.5      }
    35.6  
    35.7 -    memset(vmcs, 0, PAGE_SIZE);
    35.8 +    clear_page(vmcs);
    35.9      vmcs->vmcs_revision_id = vmcs_revision_id;
   35.10  
   35.11      return vmcs;
    36.1 --- a/xen/arch/x86/hvm/vmx/vmx.c	Wed Jun 20 12:47:52 2007 -0600
    36.2 +++ b/xen/arch/x86/hvm/vmx/vmx.c	Wed Jun 20 12:49:27 2007 -0600
    36.3 @@ -1070,8 +1070,6 @@ static void vmx_init_hypercall_page(stru
    36.4      char *p;
    36.5      int i;
    36.6  
    36.7 -    memset(hypercall_page, 0, PAGE_SIZE);
    36.8 -
    36.9      for ( i = 0; i < (PAGE_SIZE / 32); i++ )
   36.10      {
   36.11          p = (char *)(hypercall_page + (i * 32));
   36.12 @@ -1115,16 +1113,26 @@ static int vmx_nx_enabled(struct vcpu *v
   36.13      return v->arch.hvm_vmx.efer & EFER_NX;
   36.14  }
   36.15  
   36.16 -static int vmx_interrupts_enabled(struct vcpu *v) 
   36.17 +static int vmx_interrupts_enabled(struct vcpu *v, enum hvm_intack type)
   36.18  {
   36.19 -    unsigned long eflags = __vmread(GUEST_RFLAGS); 
   36.20 -    return !irq_masked(eflags); 
   36.21 +    unsigned long intr_shadow, eflags;
   36.22 +
   36.23 +    ASSERT(v == current);
   36.24 +
   36.25 +    intr_shadow  = __vmread(GUEST_INTERRUPTIBILITY_INFO);
   36.26 +    intr_shadow &= VMX_INTR_SHADOW_STI|VMX_INTR_SHADOW_MOV_SS;
   36.27 +
   36.28 +    if ( type == hvm_intack_nmi )
   36.29 +        return !intr_shadow;
   36.30 +
   36.31 +    ASSERT((type == hvm_intack_pic) || (type == hvm_intack_lapic));
   36.32 +    eflags = __vmread(GUEST_RFLAGS);
   36.33 +    return !irq_masked(eflags) && !intr_shadow;
   36.34  }
   36.35  
   36.36 -
   36.37  static void vmx_update_host_cr3(struct vcpu *v)
   36.38  {
   36.39 -    ASSERT( (v == current) || !vcpu_runnable(v) );
   36.40 +    ASSERT((v == current) || !vcpu_runnable(v));
   36.41      vmx_vmcs_enter(v);
   36.42      __vmwrite(HOST_CR3, v->arch.cr3);
   36.43      vmx_vmcs_exit(v);
   36.44 @@ -1132,12 +1140,18 @@ static void vmx_update_host_cr3(struct v
   36.45  
   36.46  static void vmx_update_guest_cr3(struct vcpu *v)
   36.47  {
   36.48 -    ASSERT( (v == current) || !vcpu_runnable(v) );
   36.49 +    ASSERT((v == current) || !vcpu_runnable(v));
   36.50      vmx_vmcs_enter(v);
   36.51      __vmwrite(GUEST_CR3, v->arch.hvm_vcpu.hw_cr3);
   36.52      vmx_vmcs_exit(v);
   36.53  }
   36.54  
   36.55 +static void vmx_flush_guest_tlbs(void)
   36.56 +{
   36.57 +    /* No tagged TLB support on VMX yet.  The fact that we're in Xen 
   36.58 +     * at all means any guest will have a clean TLB when it's next run,
   36.59 +     * because VMRESUME will flush it for us. */
   36.60 +}
   36.61  
   36.62  static void vmx_inject_exception(
   36.63      unsigned int trapnr, int errcode, unsigned long cr2)
   36.64 @@ -1205,6 +1219,7 @@ static struct hvm_function_table vmx_fun
   36.65      .get_segment_register = vmx_get_segment_register,
   36.66      .update_host_cr3      = vmx_update_host_cr3,
   36.67      .update_guest_cr3     = vmx_update_guest_cr3,
   36.68 +    .flush_guest_tlbs     = vmx_flush_guest_tlbs,
   36.69      .update_vtpr          = vmx_update_vtpr,
   36.70      .stts                 = vmx_stts,
   36.71      .set_tsc_offset       = vmx_set_tsc_offset,
   36.72 @@ -1837,7 +1852,7 @@ static void vmx_io_instruction(unsigned 
   36.73  
   36.74      /* Copy current guest state into io instruction state structure. */
   36.75      memcpy(regs, guest_cpu_user_regs(), HVM_CONTEXT_STACK_BYTES);
   36.76 -    hvm_store_cpu_guest_regs(current, regs, NULL);
   36.77 +    vmx_store_cpu_guest_regs(current, regs, NULL);
   36.78  
   36.79      HVM_DBG_LOG(DBG_LEVEL_IO, "vm86 %d, eip=%x:%lx, "
   36.80                  "exit_qualification = %lx",
   36.81 @@ -2549,7 +2564,8 @@ static inline int vmx_do_msr_read(struct
   36.82  
   36.83      HVM_DBG_LOG(DBG_LEVEL_1, "ecx=%x", ecx);
   36.84  
   36.85 -    switch (ecx) {
   36.86 +    switch ( ecx )
   36.87 +    {
   36.88      case MSR_IA32_TIME_STAMP_COUNTER:
   36.89          msr_content = hvm_get_guest_time(v);
   36.90          break;
   36.91 @@ -2565,6 +2581,8 @@ static inline int vmx_do_msr_read(struct
   36.92      case MSR_IA32_APICBASE:
   36.93          msr_content = vcpu_vlapic(v)->hw.apic_base_msr;
   36.94          break;
   36.95 +    case MSR_IA32_VMX_BASIC...MSR_IA32_VMX_CR4_FIXED1:
   36.96 +        goto gp_fault;
   36.97      default:
   36.98          if ( long_mode_do_msr_read(regs) )
   36.99              goto done;
  36.100 @@ -2576,8 +2594,8 @@ static inline int vmx_do_msr_read(struct
  36.101              regs->edx = edx;
  36.102              goto done;
  36.103          }
  36.104 -        vmx_inject_hw_exception(v, TRAP_gp_fault, 0);
  36.105 -        return 0;
  36.106 +
  36.107 +        goto gp_fault;
  36.108      }
  36.109  
  36.110      regs->eax = msr_content & 0xFFFFFFFF;
  36.111 @@ -2589,6 +2607,10 @@ done:
  36.112                  ecx, (unsigned long)regs->eax,
  36.113                  (unsigned long)regs->edx);
  36.114      return 1;
  36.115 +
  36.116 +gp_fault:
  36.117 +    vmx_inject_hw_exception(v, TRAP_gp_fault, 0);
  36.118 +    return 0;
  36.119  }
  36.120  
  36.121  static int vmx_alloc_vlapic_mapping(struct domain *d)
  36.122 @@ -2667,7 +2689,8 @@ static inline int vmx_do_msr_write(struc
  36.123      msr_content = (u32)regs->eax | ((u64)regs->edx << 32);
  36.124      HVMTRACE_2D(MSR_WRITE, v, ecx, msr_content);
  36.125  
  36.126 -    switch (ecx) {
  36.127 +    switch ( ecx )
  36.128 +    {
  36.129      case MSR_IA32_TIME_STAMP_COUNTER:
  36.130          hvm_set_guest_time(v, msr_content);
  36.131          pt_reset(v);
  36.132 @@ -2684,6 +2707,8 @@ static inline int vmx_do_msr_write(struc
  36.133      case MSR_IA32_APICBASE:
  36.134          vlapic_msr_set(vcpu_vlapic(v), msr_content);
  36.135          break;
  36.136 +    case MSR_IA32_VMX_BASIC...MSR_IA32_VMX_CR4_FIXED1:
  36.137 +        goto gp_fault;
  36.138      default:
  36.139          if ( !long_mode_do_msr_write(regs) )
  36.140              wrmsr_hypervisor_regs(ecx, regs->eax, regs->edx);
  36.141 @@ -2691,6 +2716,10 @@ static inline int vmx_do_msr_write(struc
  36.142      }
  36.143  
  36.144      return 1;
  36.145 +
  36.146 +gp_fault:
  36.147 +    vmx_inject_hw_exception(v, TRAP_gp_fault, 0);
  36.148 +    return 0;
  36.149  }
  36.150  
  36.151  static void vmx_do_hlt(void)
    37.1 --- a/xen/arch/x86/hvm/vpic.c	Wed Jun 20 12:47:52 2007 -0600
    37.2 +++ b/xen/arch/x86/hvm/vpic.c	Wed Jun 20 12:49:27 2007 -0600
    37.3 @@ -499,7 +499,7 @@ void vpic_irq_negative_edge(struct domai
    37.4          vpic_update_int_output(vpic);
    37.5  }
    37.6  
    37.7 -int cpu_get_pic_interrupt(struct vcpu *v, int *type)
    37.8 +int cpu_get_pic_interrupt(struct vcpu *v)
    37.9  {
   37.10      int irq, vector;
   37.11      struct hvm_hw_vpic *vpic = &v->domain->arch.hvm_domain.vpic[0];
   37.12 @@ -512,6 +512,5 @@ int cpu_get_pic_interrupt(struct vcpu *v
   37.13          return -1;
   37.14  
   37.15      vector = vpic[irq >> 3].irq_base + (irq & 7);
   37.16 -    *type = APIC_DM_EXTINT;
   37.17      return vector;
   37.18  }
    38.1 --- a/xen/arch/x86/hvm/vpt.c	Wed Jun 20 12:47:52 2007 -0600
    38.2 +++ b/xen/arch/x86/hvm/vpt.c	Wed Jun 20 12:49:27 2007 -0600
    38.3 @@ -155,7 +155,8 @@ void pt_update_irq(struct vcpu *v)
    38.4      }
    38.5  }
    38.6  
    38.7 -static struct periodic_time *is_pt_irq(struct vcpu *v, int vector, int type)
    38.8 +static struct periodic_time *is_pt_irq(
    38.9 +    struct vcpu *v, int vector, enum hvm_intack src)
   38.10  {
   38.11      struct list_head *head = &v->arch.hvm_vcpu.tm_list;
   38.12      struct periodic_time *pt;
   38.13 @@ -174,7 +175,7 @@ static struct periodic_time *is_pt_irq(s
   38.14              return pt;
   38.15          }
   38.16  
   38.17 -        vec = get_isa_irq_vector(v, pt->irq, type);
   38.18 +        vec = get_isa_irq_vector(v, pt->irq, src);
   38.19  
   38.20          /* RTC irq need special care */
   38.21          if ( (vector != vec) || (pt->irq == 8 && !is_rtc_periodic_irq(rtc)) )
   38.22 @@ -186,7 +187,7 @@ static struct periodic_time *is_pt_irq(s
   38.23      return NULL;
   38.24  }
   38.25  
   38.26 -void pt_intr_post(struct vcpu *v, int vector, int type)
   38.27 +void pt_intr_post(struct vcpu *v, int vector, enum hvm_intack src)
   38.28  {
   38.29      struct periodic_time *pt;
   38.30      time_cb *cb;
   38.31 @@ -194,7 +195,7 @@ void pt_intr_post(struct vcpu *v, int ve
   38.32  
   38.33      spin_lock(&v->arch.hvm_vcpu.tm_lock);
   38.34  
   38.35 -    pt = is_pt_irq(v, vector, type);
   38.36 +    pt = is_pt_irq(v, vector, src);
   38.37      if ( pt == NULL )
   38.38      {
   38.39          spin_unlock(&v->arch.hvm_vcpu.tm_lock);
   38.40 @@ -227,13 +228,10 @@ void pt_reset(struct vcpu *v)
   38.41  
   38.42      list_for_each_entry ( pt, head, list )
   38.43      {
   38.44 -        if ( pt->enabled )
   38.45 -        {
   38.46 -            pt->pending_intr_nr = 0;
   38.47 -            pt->last_plt_gtime = hvm_get_guest_time(pt->vcpu);
   38.48 -            pt->scheduled = NOW() + pt->period;
   38.49 -            set_timer(&pt->timer, pt->scheduled);
   38.50 -        }
   38.51 +        pt->pending_intr_nr = 0;
   38.52 +        pt->last_plt_gtime = hvm_get_guest_time(pt->vcpu);
   38.53 +        pt->scheduled = NOW() + pt->period;
   38.54 +        set_timer(&pt->timer, pt->scheduled);
   38.55      }
   38.56  
   38.57      spin_unlock(&v->arch.hvm_vcpu.tm_lock);
   38.58 @@ -247,10 +245,7 @@ void pt_migrate(struct vcpu *v)
   38.59      spin_lock(&v->arch.hvm_vcpu.tm_lock);
   38.60  
   38.61      list_for_each_entry ( pt, head, list )
   38.62 -    {
   38.63 -        if ( pt->enabled )
   38.64 -            migrate_timer(&pt->timer, v->processor);
   38.65 -    }
   38.66 +        migrate_timer(&pt->timer, v->processor);
   38.67  
   38.68      spin_unlock(&v->arch.hvm_vcpu.tm_lock);
   38.69  }
   38.70 @@ -263,8 +258,9 @@ void create_periodic_time(
   38.71  
   38.72      spin_lock(&v->arch.hvm_vcpu.tm_lock);
   38.73  
   38.74 -    init_timer(&pt->timer, pt_timer_fn, pt, v->processor);
   38.75      pt->enabled = 1;
   38.76 +    pt->pending_intr_nr = 0;
   38.77 +
   38.78      if ( period < 900000 ) /* < 0.9 ms */
   38.79      {
   38.80          gdprintk(XENLOG_WARNING,
   38.81 @@ -283,6 +279,8 @@ void create_periodic_time(
   38.82      pt->priv = data;
   38.83  
   38.84      list_add(&pt->list, &v->arch.hvm_vcpu.tm_list);
   38.85 +
   38.86 +    init_timer(&pt->timer, pt_timer_fn, pt, v->processor);
   38.87      set_timer(&pt->timer, pt->scheduled);
   38.88  
   38.89      spin_unlock(&v->arch.hvm_vcpu.tm_lock);
   38.90 @@ -295,8 +293,12 @@ void destroy_periodic_time(struct period
   38.91  
   38.92      pt_lock(pt);
   38.93      pt->enabled = 0;
   38.94 -    pt->pending_intr_nr = 0;
   38.95      list_del(&pt->list);
   38.96 +    pt_unlock(pt);
   38.97 +
   38.98 +    /*
   38.99 +     * pt_timer_fn() can run until this kill_timer() returns. We must do this
  38.100 +     * outside pt_lock() otherwise we can deadlock with pt_timer_fn().
  38.101 +     */
  38.102      kill_timer(&pt->timer);
  38.103 -    pt_unlock(pt);
  38.104  }
    39.1 --- a/xen/arch/x86/mm.c	Wed Jun 20 12:47:52 2007 -0600
    39.2 +++ b/xen/arch/x86/mm.c	Wed Jun 20 12:49:27 2007 -0600
    39.3 @@ -2942,7 +2942,7 @@ long do_set_gdt(XEN_GUEST_HANDLE(ulong) 
    39.4      if ( entries > FIRST_RESERVED_GDT_ENTRY )
    39.5          return -EINVAL;
    39.6      
    39.7 -    if ( copy_from_guest((unsigned long *)frames, frame_list, nr_pages) )
    39.8 +    if ( copy_from_guest(frames, frame_list, nr_pages) )
    39.9          return -EFAULT;
   39.10  
   39.11      LOCK_BIGLOCK(current->domain);
   39.12 @@ -3123,7 +3123,7 @@ long arch_memory_op(int op, XEN_GUEST_HA
   39.13          else if ( (d = rcu_lock_domain_by_id(fmap.domid)) == NULL )
   39.14              return -ESRCH;
   39.15  
   39.16 -        rc = copy_from_guest(&d->arch.e820[0], fmap.map.buffer,
   39.17 +        rc = copy_from_guest(d->arch.e820, fmap.map.buffer,
   39.18                               fmap.map.nr_entries) ? -EFAULT : 0;
   39.19          d->arch.nr_e820 = fmap.map.nr_entries;
   39.20  
   39.21 @@ -3144,7 +3144,7 @@ long arch_memory_op(int op, XEN_GUEST_HA
   39.22              return -EFAULT;
   39.23  
   39.24          map.nr_entries = min(map.nr_entries, d->arch.nr_e820);
   39.25 -        if ( copy_to_guest(map.buffer, &d->arch.e820[0], map.nr_entries) ||
   39.26 +        if ( copy_to_guest(map.buffer, d->arch.e820, map.nr_entries) ||
   39.27               copy_to_guest(arg, &map, 1) )
   39.28              return -EFAULT;
   39.29  
   39.30 @@ -3168,7 +3168,7 @@ long arch_memory_op(int op, XEN_GUEST_HA
   39.31          buffer = guest_handle_cast(memmap.buffer, e820entry_t);
   39.32  
   39.33          count = min((unsigned int)e820.nr_map, memmap.nr_entries);
   39.34 -        if ( copy_to_guest(buffer, &e820.map[0], count) < 0 )
   39.35 +        if ( copy_to_guest(buffer, e820.map, count) < 0 )
   39.36              return -EFAULT;
   39.37  
   39.38          memmap.nr_entries = count;
   39.39 @@ -3181,7 +3181,7 @@ long arch_memory_op(int op, XEN_GUEST_HA
   39.40  
   39.41      case XENMEM_machphys_mapping:
   39.42      {
   39.43 -        struct xen_machphys_mapping mapping = {
   39.44 +        static const struct xen_machphys_mapping mapping = {
   39.45              .v_start = MACH2PHYS_VIRT_START,
   39.46              .v_end   = MACH2PHYS_VIRT_END,
   39.47              .max_mfn = MACH2PHYS_NR_ENTRIES - 1
    40.1 --- a/xen/arch/x86/setup.c	Wed Jun 20 12:47:52 2007 -0600
    40.2 +++ b/xen/arch/x86/setup.c	Wed Jun 20 12:49:27 2007 -0600
    40.3 @@ -295,14 +295,14 @@ static struct e820map __initdata boot_e8
    40.4  /* Reserve area (@s,@e) in the temporary bootstrap e820 map. */
    40.5  static void __init reserve_in_boot_e820(unsigned long s, unsigned long e)
    40.6  {
    40.7 -    unsigned long rs, re;
    40.8 +    uint64_t rs, re;
    40.9      int i;
   40.10  
   40.11      for ( i = 0; i < boot_e820.nr_map; i++ )
   40.12      {
   40.13          /* Have we found the e820 region that includes the specified range? */
   40.14          rs = boot_e820.map[i].addr;
   40.15 -        re = boot_e820.map[i].addr + boot_e820.map[i].size;
   40.16 +        re = rs + boot_e820.map[i].size;
   40.17          if ( (s < rs) || (e > re) )
   40.18              continue;
   40.19  
   40.20 @@ -402,7 +402,7 @@ void init_done(void)
   40.21      startup_cpu_idle_loop();
   40.22  }
   40.23  
   40.24 -void __init __start_xen(multiboot_info_t *mbi)
   40.25 +void __init __start_xen(unsigned long mbi_p)
   40.26  {
   40.27      char *memmap_type = NULL;
   40.28      char __cmdline[] = "", *cmdline = __cmdline;
   40.29 @@ -410,6 +410,7 @@ void __init __start_xen(multiboot_info_t
   40.30      unsigned int initrdidx = 1;
   40.31      char *_policy_start = NULL;
   40.32      unsigned long _policy_len = 0;
   40.33 +    multiboot_info_t *mbi = __va(mbi_p);
   40.34      module_t *mod = (module_t *)__va(mbi->mods_addr);
   40.35      unsigned long nr_pages, modules_length;
   40.36      int i, e820_warn = 0, bytes = 0;
   40.37 @@ -678,6 +679,9 @@ void __init __start_xen(multiboot_info_t
   40.38              barrier();
   40.39              move_memory(e, 0, __pa(&_end) - xen_phys_start);
   40.40  
   40.41 +            /* Poison low 1MB to detect stray pointers to physical 0-1MB. */
   40.42 +            memset(maddr_to_bootstrap_virt(e), 0x55, 1U<<20);
   40.43 +
   40.44              /* Walk initial pagetables, relocating page directory entries. */
   40.45              pl4e = __va(__pa(idle_pg_table));
   40.46              for ( i = 0 ; i < L4_PAGETABLE_ENTRIES; i++, pl4e++ )
    41.1 --- a/xen/arch/x86/traps.c	Wed Jun 20 12:47:52 2007 -0600
    41.2 +++ b/xen/arch/x86/traps.c	Wed Jun 20 12:49:27 2007 -0600
    41.3 @@ -462,7 +462,17 @@ int rdmsr_hypervisor_regs(
    41.4      if ( idx > 0 )
    41.5          return 0;
    41.6  
    41.7 -    *eax = *edx = 0;
    41.8 +    switch ( idx )
    41.9 +    {
   41.10 +    case 0:
   41.11 +    {
   41.12 +        *eax = *edx = 0;
   41.13 +        break;
   41.14 +    }
   41.15 +    default:
   41.16 +        BUG();
   41.17 +    }
   41.18 +
   41.19      return 1;
   41.20  }
   41.21  
   41.22 @@ -1130,7 +1140,7 @@ static inline int guest_io_okay(
   41.23           * read as 0xff (no access allowed).
   41.24           */
   41.25          TOGGLE_MODE();
   41.26 -        switch ( __copy_from_guest_offset(&x.bytes[0], v->arch.iobmp,
   41.27 +        switch ( __copy_from_guest_offset(x.bytes, v->arch.iobmp,
   41.28                                            port>>3, 2) )
   41.29          {
   41.30          default: x.bytes[0] = ~0;
    42.1 --- a/xen/arch/x86/x86_32/traps.c	Wed Jun 20 12:47:52 2007 -0600
    42.2 +++ b/xen/arch/x86/x86_32/traps.c	Wed Jun 20 12:49:27 2007 -0600
    42.3 @@ -513,6 +513,7 @@ static void hypercall_page_initialise_ri
    42.4  
    42.5  void hypercall_page_initialise(struct domain *d, void *hypercall_page)
    42.6  {
    42.7 +    memset(hypercall_page, 0xCC, PAGE_SIZE);
    42.8      if ( is_hvm_domain(d) )
    42.9          hvm_hypercall_page_initialise(d, hypercall_page);
   42.10      else if ( supervisor_mode_kernel )
    43.1 --- a/xen/arch/x86/x86_64/compat_kexec.S	Wed Jun 20 12:47:52 2007 -0600
    43.2 +++ b/xen/arch/x86/x86_64/compat_kexec.S	Wed Jun 20 12:49:27 2007 -0600
    43.3 @@ -2,13 +2,32 @@
    43.4   * Compatibility kexec handler.
    43.5   */
    43.6  
    43.7 +/*
    43.8 + * NOTE: We rely on Xen not relocating itself above the 4G boundary. This is
    43.9 + * currently true but if it ever changes then compat_pg_table will
   43.10 + * need to be moved back below 4G at run time.
   43.11 + */
   43.12 +
   43.13  #include <xen/config.h>
   43.14  
   43.15  #include <asm/asm_defns.h>
   43.16  #include <asm/msr.h>
   43.17  #include <asm/page.h>
   43.18  
   43.19 -#define SYM_PHYS(sym)       ((sym) - __XEN_VIRT_START)
   43.20 +/* The unrelocated physical address of a symbol. */
   43.21 +#define SYM_PHYS(sym)          ((sym) - __XEN_VIRT_START)
   43.22 +
   43.23 +/* Load physical address of symbol into register and relocate it. */
   43.24 +#define RELOCATE_SYM(sym,reg)  mov $SYM_PHYS(sym), reg ; \
   43.25 +                               add xen_phys_start(%rip), reg
   43.26 +
   43.27 +/*
   43.28 + * Relocate a physical address in memory. Size of temporary register
   43.29 + * determines size of the value to relocate.
   43.30 + */
   43.31 +#define RELOCATE_MEM(addr,reg) mov addr(%rip), reg ; \
   43.32 +                               add xen_phys_start(%rip), reg ; \
   43.33 +                               mov reg, addr(%rip)
   43.34  
   43.35          .text
   43.36  
   43.37 @@ -31,21 +50,36 @@ 1:      dec %r9
   43.38          test %r9,%r9
   43.39          jnz 1b
   43.40  
   43.41 -        mov $SYM_PHYS(compat_page_list),%rdx
   43.42 +        RELOCATE_SYM(compat_page_list,%rdx)
   43.43 +
   43.44 +        /* Relocate compatibility mode entry point address. */
   43.45 +        RELOCATE_MEM(compatibility_mode_far,%eax)
   43.46 +
   43.47 +        /* Relocate compat_pg_table. */
   43.48 +        RELOCATE_MEM(compat_pg_table,     %rax)
   43.49 +        RELOCATE_MEM(compat_pg_table+0x8, %rax)
   43.50 +        RELOCATE_MEM(compat_pg_table+0x10,%rax)
   43.51 +        RELOCATE_MEM(compat_pg_table+0x18,%rax)
   43.52  
   43.53          /*
   43.54           * Setup an identity mapped region in PML4[0] of idle page
   43.55           * table.
   43.56           */
   43.57 -        lea l3_identmap(%rip),%rax
   43.58 -        sub %rbx,%rax
   43.59 +        RELOCATE_SYM(l3_identmap,%rax)
   43.60          or  $0x63,%rax
   43.61          mov %rax, idle_pg_table(%rip)
   43.62  
   43.63          /* Switch to idle page table. */
   43.64 -        movq $SYM_PHYS(idle_pg_table), %rax
   43.65 +        RELOCATE_SYM(idle_pg_table,%rax)
   43.66          movq %rax, %cr3
   43.67  
   43.68 +        /* Switch to identity mapped compatibility stack. */
   43.69 +        RELOCATE_SYM(compat_stack,%rax)
   43.70 +        movq %rax, %rsp
   43.71 +
   43.72 +        /* Save xen_phys_start for 32 bit code. */
   43.73 +        movq xen_phys_start(%rip), %rbx
   43.74 +
   43.75          /* Jump to low identity mapping in compatibility mode. */
   43.76          ljmp *compatibility_mode_far(%rip)
   43.77          ud2
   43.78 @@ -54,8 +88,27 @@ compatibility_mode_far:
   43.79          .long SYM_PHYS(compatibility_mode)
   43.80          .long __HYPERVISOR_CS32
   43.81  
   43.82 +        /*
   43.83 +         * We use 5 words of stack for the arguments passed to the kernel. The
   43.84 +         * kernel only uses 1 word before switching to its own stack. Allocate
   43.85 +         * 16 words to give "plenty" of room.
   43.86 +         */
   43.87 +        .fill 16,4,0
   43.88 +compat_stack:
   43.89 +
   43.90          .code32
   43.91  
   43.92 +#undef RELOCATE_SYM
   43.93 +#undef RELOCATE_MEM
   43.94 +
   43.95 +/*
   43.96 + * Load physical address of symbol into register and relocate it. %rbx
   43.97 + * contains xen_phys_start(%rip) saved before jump to compatibility
   43.98 + * mode.
   43.99 + */
  43.100 +#define RELOCATE_SYM(sym,reg) mov $SYM_PHYS(sym), reg ; \
  43.101 +                              add %ebx, reg
  43.102 +
  43.103  compatibility_mode:
  43.104          /* Setup some sane segments. */
  43.105          movl $__HYPERVISOR_DS32, %eax
  43.106 @@ -78,7 +131,7 @@ compatibility_mode:
  43.107          movl %eax, %cr0
  43.108  
  43.109          /* Switch to 32 bit page table. */
  43.110 -        movl  $SYM_PHYS(compat_pg_table), %eax
  43.111 +        RELOCATE_SYM(compat_pg_table, %eax)
  43.112          movl  %eax, %cr3
  43.113  
  43.114          /* Clear MSR_EFER[LME], disabling long mode */
    44.1 --- a/xen/arch/x86/x86_64/traps.c	Wed Jun 20 12:47:52 2007 -0600
    44.2 +++ b/xen/arch/x86/x86_64/traps.c	Wed Jun 20 12:49:27 2007 -0600
    44.3 @@ -510,6 +510,7 @@ static void hypercall_page_initialise_ri
    44.4  
    44.5  void hypercall_page_initialise(struct domain *d, void *hypercall_page)
    44.6  {
    44.7 +    memset(hypercall_page, 0xCC, PAGE_SIZE);
    44.8      if ( is_hvm_domain(d) )
    44.9          hvm_hypercall_page_initialise(d, hypercall_page);
   44.10      else if ( !is_pv_32bit_domain(d) )
    45.1 --- a/xen/common/compat/memory.c	Wed Jun 20 12:47:52 2007 -0600
    45.2 +++ b/xen/common/compat/memory.c	Wed Jun 20 12:49:27 2007 -0600
    45.3 @@ -258,7 +258,8 @@ int compat_memory_op(unsigned int cmd, X
    45.4                      compat_pfn_t pfn = nat.rsrv->extent_start.p[start_extent];
    45.5  
    45.6                      BUG_ON(pfn != nat.rsrv->extent_start.p[start_extent]);
    45.7 -                    if ( __copy_to_compat_offset(cmp.rsrv.extent_start, start_extent, &pfn, 1) )
    45.8 +                    if ( __copy_to_compat_offset(cmp.rsrv.extent_start,
    45.9 +                                                 start_extent, &pfn, 1) )
   45.10                      {
   45.11                          if ( split >= 0 )
   45.12                          {
   45.13 @@ -275,6 +276,10 @@ int compat_memory_op(unsigned int cmd, X
   45.14                          break;
   45.15                      }
   45.16                  }
   45.17 +
   45.18 +                /* Bail if there was an error. */
   45.19 +                if ( (split >= 0) && (end_extent != nat.rsrv->nr_extents) )
   45.20 +                    split = 0;
   45.21              }
   45.22              else
   45.23                  start_extent = end_extent;
    46.1 --- a/xen/common/domctl.c	Wed Jun 20 12:47:52 2007 -0600
    46.2 +++ b/xen/common/domctl.c	Wed Jun 20 12:49:27 2007 -0600
    46.3 @@ -43,7 +43,7 @@ void cpumask_to_xenctl_cpumap(
    46.4  
    46.5      bitmap_long_to_byte(bytemap, cpus_addr(*cpumask), NR_CPUS);
    46.6  
    46.7 -    copy_to_guest(xenctl_cpumap->bitmap, &bytemap[0], copy_bytes);
    46.8 +    copy_to_guest(xenctl_cpumap->bitmap, bytemap, copy_bytes);
    46.9  
   46.10      for ( i = copy_bytes; i < guest_bytes; i++ )
   46.11          copy_to_guest_offset(xenctl_cpumap->bitmap, i, &zero, 1);
   46.12 @@ -63,7 +63,7 @@ void xenctl_cpumap_to_cpumask(
   46.13      if ( guest_handle_is_null(xenctl_cpumap->bitmap) )
   46.14          return;
   46.15  
   46.16 -    copy_from_guest(&bytemap[0], xenctl_cpumap->bitmap, copy_bytes);
   46.17 +    copy_from_guest(bytemap, xenctl_cpumap->bitmap, copy_bytes);
   46.18  
   46.19      bitmap_byte_to_long(cpus_addr(*cpumask), bytemap, NR_CPUS);
   46.20  }
    47.1 --- a/xen/common/grant_table.c	Wed Jun 20 12:47:52 2007 -0600
    47.2 +++ b/xen/common/grant_table.c	Wed Jun 20 12:49:27 2007 -0600
    47.3 @@ -148,7 +148,7 @@ get_maptrack_handle(
    47.4                  return -1;
    47.5              }
    47.6  
    47.7 -            memset(new_mt, 0, PAGE_SIZE);
    47.8 +            clear_page(new_mt);
    47.9  
   47.10              new_mt_limit = lgt->maptrack_limit + MAPTRACK_PER_PAGE;
   47.11  
   47.12 @@ -624,7 +624,7 @@ gnttab_grow_table(struct domain *d, unsi
   47.13      {
   47.14          if ( (gt->active[i] = alloc_xenheap_page()) == NULL )
   47.15              goto active_alloc_failed;
   47.16 -        memset(gt->active[i], 0, PAGE_SIZE);
   47.17 +        clear_page(gt->active[i]);
   47.18      }
   47.19  
   47.20      /* Shared */
   47.21 @@ -632,7 +632,7 @@ gnttab_grow_table(struct domain *d, unsi
   47.22      {
   47.23          if ( (gt->shared[i] = alloc_xenheap_page()) == NULL )
   47.24              goto shared_alloc_failed;
   47.25 -        memset(gt->shared[i], 0, PAGE_SIZE);
   47.26 +        clear_page(gt->shared[i]);
   47.27      }
   47.28  
   47.29      /* Share the new shared frames with the recipient domain */
   47.30 @@ -1365,7 +1365,7 @@ grant_table_create(
   47.31      {
   47.32          if ( (t->active[i] = alloc_xenheap_page()) == NULL )
   47.33              goto no_mem_2;
   47.34 -        memset(t->active[i], 0, PAGE_SIZE);
   47.35 +        clear_page(t->active[i]);
   47.36      }
   47.37  
   47.38      /* Tracking of mapped foreign frames table */
   47.39 @@ -1375,7 +1375,7 @@ grant_table_create(
   47.40      memset(t->maptrack, 0, max_nr_maptrack_frames() * sizeof(t->maptrack[0]));
   47.41      if ( (t->maptrack[0] = alloc_xenheap_page()) == NULL )
   47.42          goto no_mem_3;
   47.43 -    memset(t->maptrack[0], 0, PAGE_SIZE);
   47.44 +    clear_page(t->maptrack[0]);
   47.45      t->maptrack_limit = PAGE_SIZE / sizeof(struct grant_mapping);
   47.46      for ( i = 0; i < t->maptrack_limit; i++ )
   47.47          t->maptrack[0][i].ref = i+1;
   47.48 @@ -1389,7 +1389,7 @@ grant_table_create(
   47.49      {
   47.50          if ( (t->shared[i] = alloc_xenheap_page()) == NULL )
   47.51              goto no_mem_4;
   47.52 -        memset(t->shared[i], 0, PAGE_SIZE);
   47.53 +        clear_page(t->shared[i]);
   47.54      }
   47.55  
   47.56      for ( i = 0; i < INITIAL_NR_GRANT_FRAMES; i++ )
    48.1 --- a/xen/common/kernel.c	Wed Jun 20 12:47:52 2007 -0600
    48.2 +++ b/xen/common/kernel.c	Wed Jun 20 12:49:27 2007 -0600
    48.3 @@ -142,7 +142,7 @@ DO(xen_version)(int cmd, XEN_GUEST_HANDL
    48.4      {
    48.5          xen_extraversion_t extraversion;
    48.6          safe_strcpy(extraversion, xen_extra_version());
    48.7 -        if ( copy_to_guest(arg, (char *)extraversion, sizeof(extraversion)) )
    48.8 +        if ( copy_to_guest(arg, extraversion, ARRAY_SIZE(extraversion)) )
    48.9              return -EFAULT;
   48.10          return 0;
   48.11      }
   48.12 @@ -167,7 +167,7 @@ DO(xen_version)(int cmd, XEN_GUEST_HANDL
   48.13          memset(info, 0, sizeof(info));
   48.14          arch_get_xen_caps(&info);
   48.15  
   48.16 -        if ( copy_to_guest(arg, (char *)info, sizeof(info)) )
   48.17 +        if ( copy_to_guest(arg, info, ARRAY_SIZE(info)) )
   48.18              return -EFAULT;
   48.19          return 0;
   48.20      }
   48.21 @@ -187,7 +187,7 @@ DO(xen_version)(int cmd, XEN_GUEST_HANDL
   48.22      {
   48.23          xen_changeset_info_t chgset;
   48.24          safe_strcpy(chgset, xen_changeset());
   48.25 -        if ( copy_to_guest(arg, (char *)chgset, sizeof(chgset)) )
   48.26 +        if ( copy_to_guest(arg, chgset, ARRAY_SIZE(chgset)) )
   48.27              return -EFAULT;
   48.28          return 0;
   48.29      }
   48.30 @@ -229,8 +229,8 @@ DO(xen_version)(int cmd, XEN_GUEST_HANDL
   48.31  
   48.32      case XENVER_guest_handle:
   48.33      {
   48.34 -        if ( copy_to_guest(arg, (char *)current->domain->handle,
   48.35 -                           sizeof(current->domain->handle)) )
   48.36 +        if ( copy_to_guest(arg, current->domain->handle,
   48.37 +                           ARRAY_SIZE(current->domain->handle)) )
   48.38              return -EFAULT;
   48.39          return 0;
   48.40      }    
    49.1 --- a/xen/common/kexec.c	Wed Jun 20 12:47:52 2007 -0600
    49.2 +++ b/xen/common/kexec.c	Wed Jun 20 12:49:27 2007 -0600
    49.3 @@ -169,7 +169,11 @@ static int kexec_get(reserve)(xen_kexec_
    49.4  
    49.5  static int kexec_get(xen)(xen_kexec_range_t *range)
    49.6  {
    49.7 +#ifdef CONFIG_X86_64
    49.8 +    range->start = xenheap_phys_start;
    49.9 +#else
   49.10      range->start = virt_to_maddr(_start);
   49.11 +#endif
   49.12      range->size = (unsigned long)xenheap_phys_end - (unsigned long)range->start;
   49.13      return 0;
   49.14  }
    50.1 --- a/xen/common/perfc.c	Wed Jun 20 12:47:52 2007 -0600
    50.2 +++ b/xen/common/perfc.c	Wed Jun 20 12:49:27 2007 -0600
    50.3 @@ -227,7 +227,7 @@ static int perfc_copy_info(XEN_GUEST_HAN
    50.4      }
    50.5      BUG_ON(v != perfc_nbr_vals);
    50.6  
    50.7 -    if ( copy_to_guest(desc, (xen_sysctl_perfc_desc_t *)perfc_d, NR_PERFCTRS) )
    50.8 +    if ( copy_to_guest(desc, perfc_d, NR_PERFCTRS) )
    50.9          return -EFAULT;
   50.10      if ( copy_to_guest(val, perfc_vals, perfc_nbr_vals) )
   50.11          return -EFAULT;
    51.1 --- a/xen/drivers/char/console.c	Wed Jun 20 12:47:52 2007 -0600
    51.2 +++ b/xen/drivers/char/console.c	Wed Jun 20 12:49:27 2007 -0600
    51.3 @@ -326,7 +326,7 @@ static long guest_console_write(XEN_GUES
    51.4                  CONSOLEIO_write, count, buffer);
    51.5  
    51.6          kcount = min_t(int, count, sizeof(kbuf)-1);
    51.7 -        if ( copy_from_guest((char *)kbuf, buffer, kcount) )
    51.8 +        if ( copy_from_guest(kbuf, buffer, kcount) )
    51.9              return -EFAULT;
   51.10          kbuf[kcount] = '\0';
   51.11  
    52.1 --- a/xen/drivers/video/vga.c	Wed Jun 20 12:47:52 2007 -0600
    52.2 +++ b/xen/drivers/video/vga.c	Wed Jun 20 12:49:27 2007 -0600
    52.3 @@ -33,6 +33,9 @@ static unsigned char *video;
    52.4   *   'vga=ask':
    52.5   *      display a vga menu of available modes
    52.6   * 
    52.7 + *   'vga=current':
    52.8 + *      use the current vga mode without modification
    52.9 + * 
   52.10   *   'vga=text-80x<rows>':
   52.11   *      text mode, where <rows> is one of {25,28,30,34,43,50,60}
   52.12   * 
    53.1 --- a/xen/include/asm-ia64/guest_access.h	Wed Jun 20 12:47:52 2007 -0600
    53.2 +++ b/xen/include/asm-ia64/guest_access.h	Wed Jun 20 12:49:27 2007 -0600
    53.3 @@ -76,28 +76,31 @@ extern int xencomm_handle_is_null(void *
    53.4      __copy_field_from_guest(ptr, hnd, field)
    53.5  
    53.6  #define __copy_to_guest_offset(hnd, idx, ptr, nr) ({                    \
    53.7 -    const typeof(ptr) _d = (hnd).p;                                     \
    53.8 -    const typeof(ptr) _s = (ptr);                                       \
    53.9 +    const typeof(*(ptr)) *_s = (ptr);                                   \
   53.10 +    void *_d = (hnd).p;                                                 \
   53.11 +    ((void)((hnd).p == (ptr)));                                         \
   53.12      xencomm_copy_to_guest(_d, _s, sizeof(*_s)*(nr), sizeof(*_s)*(idx)); \
   53.13  })
   53.14  
   53.15  #define __copy_field_to_guest(hnd, ptr, field) ({                   \
   53.16 -    const int _off = offsetof(typeof(*ptr), field);                 \
   53.17 -    const typeof(ptr) _d = (hnd).p;                                 \
   53.18 +    unsigned int _off = offsetof(typeof(*(hnd).p), field);          \
   53.19      const typeof(&(ptr)->field) _s = &(ptr)->field;                 \
   53.20 +    void *_d = (hnd).p;                                             \
   53.21 +    ((void)(&(hnd).p->field == &(ptr)->field));                     \
   53.22      xencomm_copy_to_guest(_d, _s, sizeof(*_s), _off);               \
   53.23  })
   53.24  
   53.25 -#define __copy_from_guest_offset(ptr, hnd, idx, nr) ({                     \
   53.26 -    const typeof(ptr) _s = (hnd).p;                                        \
   53.27 -    const typeof(ptr) _d = (ptr);                                          \
   53.28 -    xencomm_copy_from_guest(_d, _s, sizeof(*_s)*(nr), sizeof(*_s)*(idx));  \
   53.29 +#define __copy_from_guest_offset(ptr, hnd, idx, nr) ({                    \
   53.30 +    const typeof(*(ptr)) *_s = (hnd).p;                                   \
   53.31 +    typeof(*(ptr)) *_d = (ptr);                                           \
   53.32 +    xencomm_copy_from_guest(_d, _s, sizeof(*_d)*(nr), sizeof(*_d)*(idx)); \
   53.33  })
   53.34  
   53.35  #define __copy_field_from_guest(ptr, hnd, field) ({                 \
   53.36 -    const int _off = offsetof(typeof(*ptr), field);                 \
   53.37 -    const typeof(ptr) _s = (hnd).p;                                 \
   53.38 -    const typeof(&(ptr)->field) _d = &(ptr)->field;                 \
   53.39 +    unsigned int _off = offsetof(typeof(*(hnd).p), field);          \
   53.40 +    const void *_s = (hnd).p;                                       \
   53.41 +    typeof(&(ptr)->field) _d = &(ptr)->field;                       \
   53.42 +    ((void)(&(hnd).p->field == &(ptr)->field));                     \
   53.43      xencomm_copy_from_guest(_d, _s, sizeof(*_d), _off);             \
   53.44  })
   53.45  
    54.1 --- a/xen/include/asm-x86/event.h	Wed Jun 20 12:47:52 2007 -0600
    54.2 +++ b/xen/include/asm-x86/event.h	Wed Jun 20 12:49:27 2007 -0600
    54.3 @@ -10,7 +10,6 @@
    54.4  #define __ASM_EVENT_H__
    54.5  
    54.6  #include <xen/shared.h>
    54.7 -#include <asm/hvm/irq.h> /* cpu_has_pending_irq() */
    54.8  
    54.9  static inline void vcpu_kick(struct vcpu *v)
   54.10  {
    55.1 --- a/xen/include/asm-x86/guest_access.h	Wed Jun 20 12:47:52 2007 -0600
    55.2 +++ b/xen/include/asm-x86/guest_access.h	Wed Jun 20 12:49:27 2007 -0600
    55.3 @@ -32,11 +32,12 @@
    55.4   * specifying an offset into the guest array.
    55.5   */
    55.6  #define copy_to_guest_offset(hnd, off, ptr, nr) ({      \
    55.7 -    typeof(ptr) _x = (hnd).p;                           \
    55.8 -    const typeof(ptr) _y = (ptr);                       \
    55.9 +    const typeof(*(ptr)) *_s = (ptr);                   \
   55.10 +    char (*_d)[sizeof(*_s)] = (void *)(hnd).p;          \
   55.11 +    ((void)((hnd).p == (ptr)));                         \
   55.12      is_hvm_vcpu(current) ?                              \
   55.13 -    copy_to_user_hvm(_x+(off), _y, sizeof(*_x)*(nr)) :  \
   55.14 -    copy_to_user(_x+(off), _y, sizeof(*_x)*(nr));       \
   55.15 +    copy_to_user_hvm(_d+(off), _s, sizeof(*_s)*(nr)) :  \
   55.16 +    copy_to_user(_d+(off), _s, sizeof(*_s)*(nr));       \
   55.17  })
   55.18  
   55.19  /*
   55.20 @@ -44,29 +45,30 @@
   55.21   * specifying an offset into the guest array.
   55.22   */
   55.23  #define copy_from_guest_offset(ptr, hnd, off, nr) ({    \
   55.24 -    const typeof(ptr) _x = (hnd).p;                     \
   55.25 -    typeof(ptr) _y = (ptr);                             \
   55.26 +    const typeof(*(ptr)) *_s = (hnd).p;                 \
   55.27 +    typeof(*(ptr)) *_d = (ptr);                         \
   55.28      is_hvm_vcpu(current) ?                              \
   55.29 -    copy_from_user_hvm(_y, _x+(off), sizeof(*_x)*(nr)) :\
   55.30 -    copy_from_user(_y, _x+(off), sizeof(*_x)*(nr));     \
   55.31 +    copy_from_user_hvm(_d, _s+(off), sizeof(*_d)*(nr)) :\
   55.32 +    copy_from_user(_d, _s+(off), sizeof(*_d)*(nr));     \
   55.33  })
   55.34  
   55.35  /* Copy sub-field of a structure to guest context via a guest handle. */
   55.36  #define copy_field_to_guest(hnd, ptr, field) ({         \
   55.37 -    typeof(&(ptr)->field) _x = &(hnd).p->field;         \
   55.38 -    const typeof(&(ptr)->field) _y = &(ptr)->field;     \
   55.39 +    const typeof(&(ptr)->field) _s = &(ptr)->field;     \
   55.40 +    void *_d = &(hnd).p->field;                         \
   55.41 +    ((void)(&(hnd).p->field == &(ptr)->field));         \
   55.42      is_hvm_vcpu(current) ?                              \
   55.43 -    copy_to_user_hvm(_x, _y, sizeof(*_x)) :             \
   55.44 -    copy_to_user(_x, _y, sizeof(*_x));                  \
   55.45 +    copy_to_user_hvm(_d, _s, sizeof(*_s)) :             \
   55.46 +    copy_to_user(_d, _s, sizeof(*_s));                  \
   55.47  })
   55.48  
   55.49  /* Copy sub-field of a structure from guest context via a guest handle. */
   55.50  #define copy_field_from_guest(ptr, hnd, field) ({       \
   55.51 -    const typeof(&(ptr)->field) _x = &(hnd).p->field;   \
   55.52 -    typeof(&(ptr)->field) _y = &(ptr)->field;           \
   55.53 +    const typeof(&(ptr)->field) _s = &(hnd).p->field;   \
   55.54 +    typeof(&(ptr)->field) _d = &(ptr)->field;           \
   55.55      is_hvm_vcpu(current) ?                              \
   55.56 -    copy_from_user_hvm(_y, _x, sizeof(*_x)) :           \
   55.57 -    copy_from_user(_y, _x, sizeof(*_x));                \
   55.58 +    copy_from_user_hvm(_d, _s, sizeof(*_d)) :           \
   55.59 +    copy_from_user(_d, _s, sizeof(*_d));                \
   55.60  })
   55.61  
   55.62  /*
   55.63 @@ -78,35 +80,37 @@
   55.64       array_access_ok((hnd).p, (nr), sizeof(*(hnd).p)))
   55.65  
   55.66  #define __copy_to_guest_offset(hnd, off, ptr, nr) ({    \
   55.67 -    typeof(ptr) _x = (hnd).p;                           \
   55.68 -    const typeof(ptr) _y = (ptr);                       \
   55.69 +    const typeof(*(ptr)) *_s = (ptr);                   \
   55.70 +    char (*_d)[sizeof(*_s)] = (void *)(hnd).p;          \
   55.71 +    ((void)((hnd).p == (ptr)));                         \
   55.72      is_hvm_vcpu(current) ?                              \
   55.73 -    copy_to_user_hvm(_x+(off), _y, sizeof(*_x)*(nr)) :  \
   55.74 -    __copy_to_user(_x+(off), _y, sizeof(*_x)*(nr));     \
   55.75 +    copy_to_user_hvm(_d+(off), _s, sizeof(*_s)*(nr)) :  \
   55.76 +    __copy_to_user(_d+(off), _s, sizeof(*_s)*(nr));     \
   55.77  })
   55.78  
   55.79  #define __copy_from_guest_offset(ptr, hnd, off, nr) ({  \
   55.80 -    const typeof(ptr) _x = (hnd).p;                     \
   55.81 -    typeof(ptr) _y = (ptr);                             \
   55.82 +    const typeof(*(ptr)) *_s = (hnd).p;                 \
   55.83 +    typeof(*(ptr)) *_d = (ptr);                         \
   55.84      is_hvm_vcpu(current) ?                              \
   55.85 -    copy_from_user_hvm(_y, _x+(off),sizeof(*_x)*(nr)) : \
   55.86 -    __copy_from_user(_y, _x+(off), sizeof(*_x)*(nr));   \
   55.87 +    copy_from_user_hvm(_d, _s+(off), sizeof(*_d)*(nr)) :\
   55.88 +    __copy_from_user(_d, _s+(off), sizeof(*_d)*(nr));   \
   55.89  })
   55.90  
   55.91  #define __copy_field_to_guest(hnd, ptr, field) ({       \
   55.92 -    typeof(&(ptr)->field) _x = &(hnd).p->field;         \
   55.93 -    const typeof(&(ptr)->field) _y = &(ptr)->field;     \
   55.94 +    const typeof(&(ptr)->field) _s = &(ptr)->field;     \
   55.95 +    void *_d = &(hnd).p->field;                         \
   55.96 +    ((void)(&(hnd).p->field == &(ptr)->field));         \
   55.97      is_hvm_vcpu(current) ?                              \
   55.98 -    copy_to_user_hvm(_x, _y, sizeof(*_x)) :             \
   55.99 -    __copy_to_user(_x, _y, sizeof(*_x));                \
  55.100 +    copy_to_user_hvm(_d, _s, sizeof(*_s)) :             \
  55.101 +    __copy_to_user(_d, _s, sizeof(*_s));                \
  55.102  })
  55.103  
  55.104  #define __copy_field_from_guest(ptr, hnd, field) ({     \
  55.105 -    const typeof(&(ptr)->field) _x = &(hnd).p->field;   \
  55.106 -    typeof(&(ptr)->field) _y = &(ptr)->field;           \
  55.107 +    const typeof(&(ptr)->field) _s = &(hnd).p->field;   \
  55.108 +    typeof(&(ptr)->field) _d = &(ptr)->field;           \
  55.109      is_hvm_vcpu(current) ?                              \
  55.110 -    copy_from_user_hvm(_y, _x, sizeof(*_x)) :           \
  55.111 -    __copy_from_user(_y, _x, sizeof(*_x));              \
  55.112 +    copy_from_user_hvm(_d, _s, sizeof(*_d)) :           \
  55.113 +    __copy_from_user(_d, _s, sizeof(*_d));              \
  55.114  })
  55.115  
  55.116  #endif /* __ASM_X86_GUEST_ACCESS_H__ */
    56.1 --- a/xen/include/asm-x86/hvm/hvm.h	Wed Jun 20 12:47:52 2007 -0600
    56.2 +++ b/xen/include/asm-x86/hvm/hvm.h	Wed Jun 20 12:49:27 2007 -0600
    56.3 @@ -55,6 +55,14 @@ typedef struct segment_register {
    56.4      u64        base;
    56.5  } __attribute__ ((packed)) segment_register_t;
    56.6  
    56.7 +/* Interrupt acknowledgement sources. */
    56.8 +enum hvm_intack {
    56.9 +    hvm_intack_none,
   56.10 +    hvm_intack_pic,
   56.11 +    hvm_intack_lapic,
   56.12 +    hvm_intack_nmi
   56.13 +};
   56.14 +
   56.15  /*
   56.16   * The hardware virtual machine (HVM) interface abstracts away from the
   56.17   * x86/x86_64 CPU virtualization assist specifics. Currently this interface
   56.18 @@ -106,7 +114,7 @@ struct hvm_function_table {
   56.19      int (*long_mode_enabled)(struct vcpu *v);
   56.20      int (*pae_enabled)(struct vcpu *v);
   56.21      int (*nx_enabled)(struct vcpu *v);
   56.22 -    int (*interrupts_enabled)(struct vcpu *v);
   56.23 +    int (*interrupts_enabled)(struct vcpu *v, enum hvm_intack);
   56.24      int (*guest_x86_mode)(struct vcpu *v);
   56.25      unsigned long (*get_guest_ctrl_reg)(struct vcpu *v, unsigned int num);
   56.26      unsigned long (*get_segment_base)(struct vcpu *v, enum x86_segment seg);
   56.27 @@ -124,6 +132,13 @@ struct hvm_function_table {
   56.28      void (*update_guest_cr3)(struct vcpu *v);
   56.29  
   56.30      /*
   56.31 +     * Called to ensure than all guest-specific mappings in a tagged TLB
   56.32 +     * are flushed; does *not* flush Xen's TLB entries, and on
   56.33 +     * processors without a tagged TLB it will be a noop.
   56.34 +     */
   56.35 +    void (*flush_guest_tlbs)(void);
   56.36 +
   56.37 +    /*
   56.38       * Reflect the virtual APIC's value in the guest's V_TPR register
   56.39       */
   56.40      void (*update_vtpr)(struct vcpu *v, unsigned long value);
   56.41 @@ -148,6 +163,7 @@ struct hvm_function_table {
   56.42  };
   56.43  
   56.44  extern struct hvm_function_table hvm_funcs;
   56.45 +extern int hvm_enabled;
   56.46  
   56.47  int hvm_domain_initialise(struct domain *d);
   56.48  void hvm_domain_relinquish_resources(struct domain *d);
   56.49 @@ -191,16 +207,16 @@ hvm_long_mode_enabled(struct vcpu *v)
   56.50  #define hvm_long_mode_enabled(v) (v,0)
   56.51  #endif
   56.52  
   56.53 - static inline int
   56.54 +static inline int
   56.55  hvm_pae_enabled(struct vcpu *v)
   56.56  {
   56.57      return hvm_funcs.pae_enabled(v);
   56.58  }
   56.59  
   56.60  static inline int
   56.61 -hvm_interrupts_enabled(struct vcpu *v)
   56.62 +hvm_interrupts_enabled(struct vcpu *v, enum hvm_intack type)
   56.63  {
   56.64 -    return hvm_funcs.interrupts_enabled(v);
   56.65 +    return hvm_funcs.interrupts_enabled(v, type);
   56.66  }
   56.67  
   56.68  static inline int
   56.69 @@ -231,6 +247,13 @@ hvm_update_vtpr(struct vcpu *v, unsigned
   56.70  
   56.71  void hvm_update_guest_cr3(struct vcpu *v, unsigned long guest_cr3);
   56.72  
   56.73 +static inline void 
   56.74 +hvm_flush_guest_tlbs(void)
   56.75 +{
   56.76 +    if ( hvm_enabled )
   56.77 +        hvm_funcs.flush_guest_tlbs();
   56.78 +}
   56.79 +
   56.80  void hvm_hypercall_page_initialise(struct domain *d,
   56.81                                     void *hypercall_page);
   56.82  
    57.1 --- a/xen/include/asm-x86/hvm/irq.h	Wed Jun 20 12:47:52 2007 -0600
    57.2 +++ b/xen/include/asm-x86/hvm/irq.h	Wed Jun 20 12:49:27 2007 -0600
    57.3 @@ -24,11 +24,11 @@
    57.4  
    57.5  #include <xen/types.h>
    57.6  #include <xen/spinlock.h>
    57.7 +#include <asm/hvm/hvm.h>
    57.8  #include <asm/hvm/vpic.h>
    57.9  #include <asm/hvm/vioapic.h>
   57.10  #include <public/hvm/save.h>
   57.11  
   57.12 -
   57.13  struct hvm_irq {
   57.14      /*
   57.15       * Virtual interrupt wires for a single PCI bus.
   57.16 @@ -58,7 +58,6 @@ struct hvm_irq {
   57.17              HVMIRQ_callback_gsi,
   57.18              HVMIRQ_callback_pci_intx
   57.19          } callback_via_type;
   57.20 -        uint32_t pad; /* So the next field will be aligned */
   57.21      };
   57.22      union {
   57.23          uint32_t gsi;
   57.24 @@ -115,9 +114,12 @@ void hvm_set_pci_link_route(struct domai
   57.25  void hvm_set_callback_irq_level(void);
   57.26  void hvm_set_callback_via(struct domain *d, uint64_t via);
   57.27  
   57.28 -int cpu_get_interrupt(struct vcpu *v, int *type);
   57.29 -int cpu_has_pending_irq(struct vcpu *v);
   57.30 -int get_isa_irq_vector(struct vcpu *vcpu, int irq, int type);
   57.31 +/* Check/Acknowledge next pending interrupt. */
   57.32 +enum hvm_intack hvm_vcpu_has_pending_irq(struct vcpu *v);
   57.33 +int hvm_vcpu_ack_pending_irq(
   57.34 +    struct vcpu *v, enum hvm_intack type, int *vector);
   57.35 +
   57.36 +int get_isa_irq_vector(struct vcpu *vcpu, int irq, enum hvm_intack src);
   57.37  int is_isa_irq_masked(struct vcpu *v, int isa_irq);
   57.38  
   57.39  #endif /* __ASM_X86_HVM_IRQ_H__ */
    58.1 --- a/xen/include/asm-x86/hvm/support.h	Wed Jun 20 12:47:52 2007 -0600
    58.2 +++ b/xen/include/asm-x86/hvm/support.h	Wed Jun 20 12:49:27 2007 -0600
    58.3 @@ -215,7 +215,6 @@ int hvm_load(struct domain *d, hvm_domai
    58.4  /* End of save/restore */
    58.5  
    58.6  extern char hvm_io_bitmap[];
    58.7 -extern int hvm_enabled;
    58.8  
    58.9  void hvm_enable(struct hvm_function_table *);
   58.10  void hvm_disable(void);
    59.1 --- a/xen/include/asm-x86/hvm/svm/asid.h	Wed Jun 20 12:47:52 2007 -0600
    59.2 +++ b/xen/include/asm-x86/hvm/svm/asid.h	Wed Jun 20 12:49:27 2007 -0600
    59.3 @@ -30,6 +30,7 @@
    59.4  void svm_asid_init(struct cpuinfo_x86 *c);
    59.5  void svm_asid_init_vcpu(struct vcpu *v);
    59.6  void svm_asid_inv_asid(struct vcpu *v);
    59.7 +void svm_asid_inc_generation(void);
    59.8  
    59.9  /*
   59.10   * ASID related, guest triggered events.
    60.1 --- a/xen/include/asm-x86/hvm/vcpu.h	Wed Jun 20 12:47:52 2007 -0600
    60.2 +++ b/xen/include/asm-x86/hvm/vcpu.h	Wed Jun 20 12:49:27 2007 -0600
    60.3 @@ -30,12 +30,14 @@
    60.4  
    60.5  struct hvm_vcpu {
    60.6      unsigned long       hw_cr3;     /* value we give to HW to use */
    60.7 -    unsigned long       ioflags;
    60.8      struct hvm_io_op    io_op;
    60.9      struct vlapic       vlapic;
   60.10      s64                 cache_tsc_offset;
   60.11      u64                 guest_time;
   60.12  
   60.13 +    /* Is an NMI pending for delivery to this VCPU core? */
   60.14 +    bool_t              nmi_pending; /* NB. integrate flag with save/restore */
   60.15 +
   60.16      /* Lock and list for virtual platform timers. */
   60.17      spinlock_t          tm_lock;
   60.18      struct list_head    tm_list;
    61.1 --- a/xen/include/asm-x86/hvm/vlapic.h	Wed Jun 20 12:47:52 2007 -0600
    61.2 +++ b/xen/include/asm-x86/hvm/vlapic.h	Wed Jun 20 12:49:27 2007 -0600
    61.3 @@ -76,7 +76,7 @@ int vlapic_set_irq(struct vlapic *vlapic
    61.4  int vlapic_find_highest_irr(struct vlapic *vlapic);
    61.5  
    61.6  int vlapic_has_interrupt(struct vcpu *v);
    61.7 -int cpu_get_apic_interrupt(struct vcpu *v, int *mode);
    61.8 +int cpu_get_apic_interrupt(struct vcpu *v);
    61.9  
   61.10  int  vlapic_init(struct vcpu *v);
   61.11  void vlapic_destroy(struct vcpu *v);
    62.1 --- a/xen/include/asm-x86/hvm/vmx/vmx.h	Wed Jun 20 12:47:52 2007 -0600
    62.2 +++ b/xen/include/asm-x86/hvm/vmx/vmx.h	Wed Jun 20 12:49:27 2007 -0600
    62.3 @@ -336,9 +336,16 @@ static inline void vmx_inject_sw_excepti
    62.4                             instruction_len);
    62.5  }
    62.6  
    62.7 -static inline void vmx_inject_extint(struct vcpu *v, int trap, int error_code)
    62.8 +static inline void vmx_inject_extint(struct vcpu *v, int trap)
    62.9  {
   62.10 -    __vmx_inject_exception(v, trap, INTR_TYPE_EXT_INTR, error_code, 0);
   62.11 +    __vmx_inject_exception(v, trap, INTR_TYPE_EXT_INTR,
   62.12 +                           VMX_DELIVER_NO_ERROR_CODE, 0);
   62.13 +}
   62.14 +
   62.15 +static inline void vmx_inject_nmi(struct vcpu *v)
   62.16 +{
   62.17 +    __vmx_inject_exception(v, 2, INTR_TYPE_NMI,
   62.18 +                           VMX_DELIVER_NO_ERROR_CODE, 0);
   62.19  }
   62.20  
   62.21  #endif /* __ASM_X86_HVM_VMX_VMX_H__ */
    63.1 --- a/xen/include/asm-x86/hvm/vpic.h	Wed Jun 20 12:47:52 2007 -0600
    63.2 +++ b/xen/include/asm-x86/hvm/vpic.h	Wed Jun 20 12:49:27 2007 -0600
    63.3 @@ -32,7 +32,7 @@
    63.4  void vpic_irq_positive_edge(struct domain *d, int irq);
    63.5  void vpic_irq_negative_edge(struct domain *d, int irq);
    63.6  void vpic_init(struct domain *d);
    63.7 -int cpu_get_pic_interrupt(struct vcpu *v, int *type);
    63.8 +int cpu_get_pic_interrupt(struct vcpu *v);
    63.9  int is_periodic_irq(struct vcpu *v, int irq, int type);
   63.10  
   63.11  #endif  /* __ASM_X86_HVM_VPIC_H__ */  
    64.1 --- a/xen/include/asm-x86/hvm/vpt.h	Wed Jun 20 12:47:52 2007 -0600
    64.2 +++ b/xen/include/asm-x86/hvm/vpt.h	Wed Jun 20 12:49:27 2007 -0600
    64.3 @@ -29,6 +29,7 @@
    64.4  #include <xen/timer.h>
    64.5  #include <xen/list.h>
    64.6  #include <asm/hvm/vpic.h>
    64.7 +#include <asm/hvm/irq.h>
    64.8  #include <public/hvm/save.h>
    64.9  
   64.10  struct HPETState;
   64.11 @@ -119,7 +120,7 @@ struct pl_time {    /* platform time */
   64.12  void pt_freeze_time(struct vcpu *v);
   64.13  void pt_thaw_time(struct vcpu *v);
   64.14  void pt_update_irq(struct vcpu *v);
   64.15 -void pt_intr_post(struct vcpu *v, int vector, int type);
   64.16 +void pt_intr_post(struct vcpu *v, int vector, enum hvm_intack src);
   64.17  void pt_reset(struct vcpu *v);
   64.18  void pt_migrate(struct vcpu *v);
   64.19  void create_periodic_time(
    65.1 --- a/xen/include/xen/compat.h	Wed Jun 20 12:47:52 2007 -0600
    65.2 +++ b/xen/include/xen/compat.h	Wed Jun 20 12:49:27 2007 -0600
    65.3 @@ -44,9 +44,10 @@
    65.4   * specifying an offset into the guest array.
    65.5   */
    65.6  #define copy_to_compat_offset(hnd, off, ptr, nr) ({                  \
    65.7 -    const typeof(ptr) _x = (typeof(**(hnd)._) *)(full_ptr_t)(hnd).c; \
    65.8 -    const typeof(*(ptr)) *const _y = (ptr);                          \
    65.9 -    copy_to_user(_x + (off), _y, sizeof(*_x) * (nr));                \
   65.10 +    const typeof(*(ptr)) *_s = (ptr);                                \
   65.11 +    char (*_d)[sizeof(*_s)] = (void *)(full_ptr_t)(hnd).c;           \
   65.12 +    ((void)((typeof(**(hnd)._) *)(full_ptr_t)(hnd).c == (ptr)));     \
   65.13 +    copy_to_user(_d + (off), _s, sizeof(*_s) * (nr));                \
   65.14  })
   65.15  
   65.16  /*
   65.17 @@ -54,9 +55,9 @@
   65.18   * specifying an offset into the guest array.
   65.19   */
   65.20  #define copy_from_compat_offset(ptr, hnd, off, nr) ({                \
   65.21 -    const typeof(ptr) _x = (typeof(**(hnd)._) *)(full_ptr_t)(hnd).c; \
   65.22 -    const typeof(ptr) _y = (ptr);                                    \
   65.23 -    copy_from_user(_y, _x + (off), sizeof(*_x) * (nr));              \
   65.24 +    const typeof(*(ptr)) *_s = (typeof(**(hnd)._) *)(full_ptr_t)(hnd).c; \
   65.25 +    typeof(*(ptr)) *_d = (ptr);                                      \
   65.26 +    copy_from_user(_d, _s + (off), sizeof(*_d) * (nr));              \
   65.27  })
   65.28  
   65.29  #define copy_to_compat(hnd, ptr, nr)                                 \
   65.30 @@ -67,16 +68,19 @@
   65.31  
   65.32  /* Copy sub-field of a structure to guest context via a compat handle. */
   65.33  #define copy_field_to_compat(hnd, ptr, field) ({                     \
   65.34 -    typeof((ptr)->field) *const _x = &((typeof(**(hnd)._) *)(full_ptr_t)(hnd).c)->field; \
   65.35 -    const typeof((ptr)->field) *const _y = &(ptr)->field;            \
   65.36 -    copy_to_user(_x, _y, sizeof(*_x));                               \
   65.37 +    const typeof(&(ptr)->field) _s = &(ptr)->field;                  \
   65.38 +    void *_d = &((typeof(**(hnd)._) *)(full_ptr_t)(hnd).c)->field;   \
   65.39 +    ((void)(&((typeof(**(hnd)._) *)(full_ptr_t)(hnd).c)->field ==    \
   65.40 +            &(ptr)->field));                                         \
   65.41 +    copy_to_user(_d, _s, sizeof(*_s));                               \
   65.42  })
   65.43  
   65.44  /* Copy sub-field of a structure from guest context via a compat handle. */
   65.45  #define copy_field_from_compat(ptr, hnd, field) ({                   \
   65.46 -    typeof((ptr)->field) *const _x = &((typeof(**(hnd)._) *)(full_ptr_t)(hnd).c)->field; \
   65.47 -    typeof((ptr)->field) *const _y = &(ptr)->field;                  \
   65.48 -    copy_from_user(_y, _x, sizeof(*_x));                             \
   65.49 +    const typeof(&(ptr)->field) _s =                                 \
   65.50 +        &((typeof(**(hnd)._) *)(full_ptr_t)(hnd).c)->field;          \
   65.51 +    typeof(&(ptr)->field) _d = &(ptr)->field;                        \
   65.52 +    copy_from_user(_d, _s, sizeof(*_d));                             \
   65.53  })
   65.54  
   65.55  /*
   65.56 @@ -84,18 +88,20 @@
   65.57   * Allows use of faster __copy_* functions.
   65.58   */
   65.59  #define compat_handle_okay(hnd, nr)                                  \
   65.60 -    compat_array_access_ok((void *)(full_ptr_t)(hnd).c, (nr), sizeof(**(hnd)._))
   65.61 +    compat_array_access_ok((void *)(full_ptr_t)(hnd).c, (nr),        \
   65.62 +                           sizeof(**(hnd)._))
   65.63  
   65.64  #define __copy_to_compat_offset(hnd, off, ptr, nr) ({                \
   65.65 -    const typeof(ptr) _x = (typeof(**(hnd)._) *)(full_ptr_t)(hnd).c; \
   65.66 -    const typeof(*(ptr)) *const _y = (ptr);                          \
   65.67 -    __copy_to_user(_x + (off), _y, sizeof(*_x) * (nr));              \
   65.68 +    const typeof(*(ptr)) *_s = (ptr);                                \
   65.69 +    char (*_d)[sizeof(*_s)] = (void *)(full_ptr_t)(hnd).c;           \
   65.70 +    ((void)((typeof(**(hnd)._) *)(full_ptr_t)(hnd).c == (ptr)));     \
   65.71 +    __copy_to_user(_d + (off), _s, sizeof(*_s) * (nr));              \
   65.72  })
   65.73  
   65.74  #define __copy_from_compat_offset(ptr, hnd, off, nr) ({              \
   65.75 -    const typeof(ptr) _x = (typeof(**(hnd)._) *)(full_ptr_t)(hnd).c; \
   65.76 -    const typeof(ptr) _y = (ptr);                                    \
   65.77 -    __copy_from_user(_y, _x + (off), sizeof(*_x) * (nr));            \
   65.78 +    const typeof(*(ptr)) *_s = (typeof(**(hnd)._) *)(full_ptr_t)(hnd).c; \
   65.79 +    typeof(*(ptr)) *_d = (ptr);                                      \
   65.80 +    __copy_from_user(_d, _s + (off), sizeof(*_d) * (nr));            \
   65.81  })
   65.82  
   65.83  #define __copy_to_compat(hnd, ptr, nr)                               \
   65.84 @@ -105,15 +111,18 @@
   65.85      __copy_from_compat_offset(ptr, hnd, 0, nr)
   65.86  
   65.87  #define __copy_field_to_compat(hnd, ptr, field) ({                   \
   65.88 -    typeof((ptr)->field) *const _x = &((typeof(**(hnd)._) *)(full_ptr_t)(hnd).c)->field; \
   65.89 -    const typeof((ptr)->field) *const _y = &(ptr)->field;            \
   65.90 -    __copy_to_user(_x, _y, sizeof(*_x));                             \
   65.91 +    const typeof(&(ptr)->field) _s = &(ptr)->field;                  \
   65.92 +    void *_d = &((typeof(**(hnd)._) *)(full_ptr_t)(hnd).c)->field;   \
   65.93 +    ((void)(&((typeof(**(hnd)._) *)(full_ptr_t)(hnd).c)->field ==    \
   65.94 +            &(ptr)->field));                                         \
   65.95 +    __copy_to_user(_d, _s, sizeof(*_s));                             \
   65.96  })
   65.97  
   65.98  #define __copy_field_from_compat(ptr, hnd, field) ({                 \
   65.99 -    typeof((ptr)->field) *const _x = &((typeof(**(hnd)._) *)(full_ptr_t)(hnd).c)->field; \
  65.100 -    typeof((ptr)->field) *const _y = &(ptr)->field;                  \
  65.101 -    __copy_from_user(_y, _x, sizeof(*_x));                           \
  65.102 +    const typeof(&(ptr)->field) _s =                                 \
  65.103 +        &((typeof(**(hnd)._) *)(full_ptr_t)(hnd).c)->field;          \
  65.104 +    typeof(&(ptr)->field) _d = &(ptr)->field;                        \
  65.105 +    __copy_from_user(_d, _s, sizeof(*_d));                           \
  65.106  })
  65.107  
  65.108  
  65.109 @@ -169,7 +178,8 @@ void xlat_vcpu_runstate_info(struct vcpu
  65.110  int switch_compat(struct domain *);
  65.111  int switch_native(struct domain *);
  65.112  
  65.113 -#define BITS_PER_GUEST_LONG(d) (!IS_COMPAT(d) ? BITS_PER_LONG : COMPAT_BITS_PER_LONG)
  65.114 +#define BITS_PER_GUEST_LONG(d) \
  65.115 +    (!IS_COMPAT(d) ? BITS_PER_LONG : COMPAT_BITS_PER_LONG)
  65.116  
  65.117  #else
  65.118  
    66.1 --- a/xen/include/xen/xencomm.h	Wed Jun 20 12:47:52 2007 -0600
    66.2 +++ b/xen/include/xen/xencomm.h	Wed Jun 20 12:49:27 2007 -0600
    66.3 @@ -47,17 +47,17 @@ static inline unsigned long xencomm_inli
    66.4      ((hnd).p == NULL || xencomm_handle_is_null((hnd).p))
    66.5  
    66.6  /* Offset the given guest handle into the array it refers to. */
    66.7 -#define guest_handle_add_offset(hnd, nr) ({         \
    66.8 -    const typeof((hnd).p) _ptr;                     \
    66.9 -    xencomm_add_offset((void **)&((hnd).p), nr * sizeof(*_ptr));   \
   66.10 +#define guest_handle_add_offset(hnd, nr) ({                             \
   66.11 +    const typeof((hnd).p) _ptr;                                         \
   66.12 +    xencomm_add_offset((void **)&((hnd).p), nr * sizeof(*_ptr));        \
   66.13  })
   66.14  
   66.15  /* Cast a guest handle to the specified type of handle. */
   66.16  #define guest_handle_cast(hnd, type) ({         \
   66.17      type *_x = (hnd).p;                         \
   66.18 -    XEN_GUEST_HANDLE(type) _y; \
   66.19 -    set_xen_guest_handle(_y, _x); \
   66.20 -    _y; \
   66.21 +    XEN_GUEST_HANDLE(type) _y;                  \
   66.22 +    set_xen_guest_handle(_y, _x);               \
   66.23 +    _y;                                         \
   66.24  })
   66.25  
   66.26  /* Since we run in real mode, we can safely access all addresses. That also
   66.27 @@ -87,29 +87,32 @@ static inline unsigned long xencomm_inli
   66.28      __copy_field_from_guest(ptr, hnd, field)
   66.29  
   66.30  #define __copy_to_guest_offset(hnd, idx, ptr, nr) ({                \
   66.31 -    const typeof(ptr) _x = (hnd).p;                                 \
   66.32 -    const typeof(ptr) _y = (ptr);                                   \
   66.33 -    xencomm_copy_to_guest(_x, _y, sizeof(*_x)*(nr), sizeof(*_x)*(idx)); \
   66.34 +    const typeof(*(ptr)) *_s = (ptr);                               \
   66.35 +    void *_d = (hnd).p;                                             \
   66.36 +    ((void)((hnd).p == (ptr)));                                     \
   66.37 +    xencomm_copy_to_guest(_d, _s, sizeof(*_s)*(nr), sizeof(*_s)*(idx)); \
   66.38  })
   66.39  
   66.40  #define __copy_field_to_guest(hnd, ptr, field) ({                   \
   66.41 -    const int _off = offsetof(typeof(*ptr), field);                  \
   66.42 -    const typeof(&(ptr)->field) _x = &(hnd).p->field;               \
   66.43 -    const typeof(&(ptr)->field) _y = &(ptr)->field;                 \
   66.44 -    xencomm_copy_to_guest(_x, _y, sizeof(*_x), sizeof(*_x)*(_off)); \
   66.45 +    unsigned int _off = offsetof(typeof(*(hnd).p), field);          \
   66.46 +    const typeof(&(ptr)->field) _s = &(ptr)->field;                 \
   66.47 +    void *_d = (hnd).p;                                             \
   66.48 +    ((void)(&(hnd).p->field == &(ptr)->field));                     \
   66.49 +    xencomm_copy_to_guest(_d, _s, sizeof(*_s), _off);               \
   66.50  })
   66.51  
   66.52  #define __copy_from_guest_offset(ptr, hnd, idx, nr) ({              \
   66.53 -    const typeof(ptr) _x = (hnd).p;                                 \
   66.54 -    const typeof(ptr) _y = (ptr);                                   \
   66.55 -    xencomm_copy_from_guest(_y, _x, sizeof(*_x)*(nr), sizeof(*_x)*(idx));  \
   66.56 +    const typeof(*(ptr)) *_s = (hnd).p;                             \
   66.57 +    typeof(*(ptr)) *_d = (ptr);                                     \
   66.58 +    xencomm_copy_from_guest(_d, _s, sizeof(*_d)*(nr), sizeof(*_d)*(idx)); \
   66.59  })
   66.60  
   66.61  #define __copy_field_from_guest(ptr, hnd, field) ({                 \
   66.62 -    const int _off = offsetof(typeof(*ptr), field);                 \
   66.63 -    const typeof(&(ptr)->field) _x = &(hnd).p->field;               \
   66.64 -    const typeof(&(ptr)->field) _y = &(ptr)->field;                 \
   66.65 -    xencomm_copy_to_guest(_y, _x, sizeof(*_x), sizeof(*_x)*(_off)); \
   66.66 +    unsigned int _off = offsetof(typeof(*(hnd).p), field);          \
   66.67 +    const void *_s = (hnd).p;                                       \
   66.68 +    typeof(&(ptr)->field) _d = &(ptr)->field;                       \
   66.69 +    ((void)(&(hnd).p->field == &(ptr)->field));                     \
   66.70 +    xencomm_copy_from_guest(_d, _s, sizeof(*_d), _off);             \
   66.71  })
   66.72  
   66.73  #endif /* __XENCOMM_H__ */