ia64/xen-unstable

changeset 14708:f378c424e0ce

merge with xen-unstable.hg
author Alex Williamson <alex.williamson@hp.com>
date Tue Apr 03 13:04:51 2007 -0600 (2007-04-03)
parents fc9e2f7920c9 7e431ea834a8
children 770c465344d3 c278b1c580db
files
line diff
     1.1 --- a/README	Fri Mar 30 17:18:42 2007 -0600
     1.2 +++ b/README	Tue Apr 03 13:04:51 2007 -0600
     1.3 @@ -177,3 +177,25 @@ 5. To rebuild a kernel with a modified c
     1.4     an initial ram disk, just like a native system e.g.
     1.5      # depmod 2.6.16-xen
     1.6      # mkinitrd -v -f --with=aacraid --with=sd_mod --with=scsi_mod initrd-2.6.16-xen.img 2.6.16-xen
     1.7 +
     1.8 +
     1.9 +Python Runtime Libraries
    1.10 +========================
    1.11 +
    1.12 +Xend (the Xen daemon) has the following runtime dependencies:
    1.13 +
    1.14 +    * Python 2.3 or later.
    1.15 +      In many distros, the XML-aspects to the standard library
    1.16 +      (xml.dom.minidom etc) are broken out into a separate python-xml package.
    1.17 +      This is also required.
    1.18 +
    1.19 +          URL:    http://www.python.org/
    1.20 +          Debian: python, python-xml
    1.21 +
    1.22 +    * For optional SSL support, pyOpenSSL:
    1.23 +          URL:    http://pyopenssl.sourceforge.net/
    1.24 +          Debian: python-pyopenssl
    1.25 +
    1.26 +    * For optional PAM support, PyPAM:
    1.27 +          URL:    http://www.pangalactic.org/PyPAM/
    1.28 +          Debian: python-pam
     2.1 --- a/docs/src/user.tex	Fri Mar 30 17:18:42 2007 -0600
     2.2 +++ b/docs/src/user.tex	Tue Apr 03 13:04:51 2007 -0600
     2.3 @@ -3250,6 +3250,10 @@ editing \path{grub.conf}.
     2.4  \item [ dma\_emergency\_pool=xxx ] Specify lower bound on size of DMA
     2.5    pool below which ordinary allocations will fail rather than fall
     2.6    back to allocating from the DMA pool.
     2.7 +\item [ hap ] Instruct Xen to detect hardware-assisted paging support, such
     2.8 +  as AMD-V's nested paging or Intel\textregistered VT's extended paging. If 
     2.9 +  available, Xen will use hardware-assisted paging instead of shadow paging 
    2.10 +  for guest memory management.
    2.11  \end{description}
    2.12  
    2.13  In addition, the following options may be specified on the Xen command
     3.1 --- a/linux-2.6-xen-sparse/arch/ia64/Kconfig	Fri Mar 30 17:18:42 2007 -0600
     3.2 +++ b/linux-2.6-xen-sparse/arch/ia64/Kconfig	Tue Apr 03 13:04:51 2007 -0600
     3.3 @@ -576,15 +576,6 @@ source "crypto/Kconfig"
     3.4  # override default values of drivers/xen/Kconfig
     3.5  #
     3.6  if XEN
     3.7 -config XEN_UTIL
     3.8 -	default n
     3.9 -
    3.10 -config XEN_BALLOON
    3.11 -	default y
    3.12 -
    3.13 -config XEN_REBOOT
    3.14 -	default y
    3.15 -
    3.16  config XEN_SMPBOOT
    3.17  	default n
    3.18  endif
     4.1 --- a/linux-2.6-xen-sparse/drivers/xen/Kconfig	Fri Mar 30 17:18:42 2007 -0600
     4.2 +++ b/linux-2.6-xen-sparse/drivers/xen/Kconfig	Tue Apr 03 13:04:51 2007 -0600
     4.3 @@ -253,22 +253,6 @@ config NO_IDLE_HZ
     4.4  	bool
     4.5  	default y
     4.6  
     4.7 -config XEN_UTIL
     4.8 -	bool
     4.9 -	default y
    4.10 -
    4.11 -config XEN_BALLOON
    4.12 -	bool
    4.13 -	default y
    4.14 -
    4.15 -config XEN_DEVMEM
    4.16 -	bool
    4.17 -	default y
    4.18 -
    4.19 -config XEN_REBOOT
    4.20 -	bool
    4.21 -	default y
    4.22 -
    4.23  config XEN_SMPBOOT
    4.24  	bool
    4.25  	default y
     5.1 --- a/linux-2.6-xen-sparse/drivers/xen/Makefile	Fri Mar 30 17:18:42 2007 -0600
     5.2 +++ b/linux-2.6-xen-sparse/drivers/xen/Makefile	Tue Apr 03 13:04:51 2007 -0600
     5.3 @@ -3,10 +3,11 @@ obj-y	+= console/
     5.4  obj-y	+= evtchn/
     5.5  obj-y	+= privcmd/
     5.6  obj-y	+= xenbus/
     5.7 +obj-y	+= gntdev/
     5.8 +obj-y	+= balloon/
     5.9 +obj-y	+= char/
    5.10  
    5.11 -obj-$(CONFIG_XEN_UTIL)			+= util.o
    5.12 -obj-$(CONFIG_XEN_BALLOON)		+= balloon/
    5.13 -obj-$(CONFIG_XEN_DEVMEM)		+= char/
    5.14 +obj-y	+= util.o
    5.15  obj-$(CONFIG_XEN_BLKDEV_BACKEND)	+= blkback/
    5.16  obj-$(CONFIG_XEN_BLKDEV_TAP)		+= blktap/
    5.17  obj-$(CONFIG_XEN_NETDEV_BACKEND)	+= netback/
     6.1 --- a/linux-2.6-xen-sparse/drivers/xen/blktap/blktap.c	Fri Mar 30 17:18:42 2007 -0600
     6.2 +++ b/linux-2.6-xen-sparse/drivers/xen/blktap/blktap.c	Tue Apr 03 13:04:51 2007 -0600
     6.3 @@ -44,6 +44,7 @@
     6.4  #include <asm/hypervisor.h>
     6.5  #include "common.h"
     6.6  #include <xen/balloon.h>
     6.7 +#include <xen/driver_util.h>
     6.8  #include <linux/kernel.h>
     6.9  #include <linux/fs.h>
    6.10  #include <linux/mm.h>
    6.11 @@ -56,30 +57,6 @@
    6.12  #define MAX_TAP_DEV 256     /*the maximum number of tapdisk ring devices    */
    6.13  #define MAX_DEV_NAME 100    /*the max tapdisk ring device name e.g. blktap0 */
    6.14  
    6.15 -
    6.16 -struct class *xen_class;
    6.17 -EXPORT_SYMBOL_GPL(xen_class);
    6.18 -
    6.19 -/*
    6.20 - * Setup the xen class.  This should probably go in another file, but
    6.21 - * since blktap is the only user of it so far, it gets to keep it.
    6.22 - */
    6.23 -int setup_xen_class(void)
    6.24 -{
    6.25 -	int ret;
    6.26 -
    6.27 -	if (xen_class)
    6.28 -		return 0;
    6.29 -
    6.30 -	xen_class = class_create(THIS_MODULE, "xen");
    6.31 -	if ((ret = IS_ERR(xen_class))) {
    6.32 -		xen_class = NULL;
    6.33 -		return ret;
    6.34 -	}
    6.35 -
    6.36 -	return 0;
    6.37 -}
    6.38 -
    6.39  /*
    6.40   * The maximum number of requests that can be outstanding at any time
    6.41   * is determined by 
    6.42 @@ -347,6 +324,7 @@ static const struct file_operations blkt
    6.43  
    6.44  static tap_blkif_t *get_next_free_dev(void)
    6.45  {
    6.46 +	struct class *class;
    6.47  	tap_blkif_t *info;
    6.48  	int minor;
    6.49  
    6.50 @@ -409,9 +387,10 @@ found:
    6.51  		wmb();
    6.52  		tapfds[minor] = info;
    6.53  
    6.54 -		class_device_create(xen_class, NULL,
    6.55 -				    MKDEV(blktap_major, minor), NULL,
    6.56 -				    "blktap%d", minor);
    6.57 +		if ((class = get_xen_class()) != NULL)
    6.58 +			class_device_create(class, NULL,
    6.59 +					    MKDEV(blktap_major, minor), NULL,
    6.60 +					    "blktap%d", minor);
    6.61  	}
    6.62  
    6.63  out:
    6.64 @@ -1487,6 +1466,7 @@ static void make_response(blkif_t *blkif
    6.65  static int __init blkif_init(void)
    6.66  {
    6.67  	int i, ret;
    6.68 +	struct class *class;
    6.69  
    6.70  	if (!is_running_on_xen())
    6.71  		return -ENODEV;
    6.72 @@ -1522,7 +1502,7 @@ static int __init blkif_init(void)
    6.73  	DPRINTK("Created misc_dev [/dev/xen/blktap%d]\n",i);
    6.74  
    6.75  	/* Make sure the xen class exists */
    6.76 -	if (!setup_xen_class()) {
    6.77 +	if ((class = get_xen_class()) != NULL) {
    6.78  		/*
    6.79  		 * This will allow udev to create the blktap ctrl device.
    6.80  		 * We only want to create blktap0 first.  We don't want
    6.81 @@ -1530,7 +1510,7 @@ static int __init blkif_init(void)
    6.82  		 * We only create the device when a request of a new device is
    6.83  		 * made.
    6.84  		 */
    6.85 -		class_device_create(xen_class, NULL,
    6.86 +		class_device_create(class, NULL,
    6.87  				    MKDEV(blktap_major, 0), NULL,
    6.88  				    "blktap0");
    6.89  	} else {
     7.1 --- a/linux-2.6-xen-sparse/drivers/xen/core/Makefile	Fri Mar 30 17:18:42 2007 -0600
     7.2 +++ b/linux-2.6-xen-sparse/drivers/xen/core/Makefile	Tue Apr 03 13:04:51 2007 -0600
     7.3 @@ -2,12 +2,11 @@
     7.4  # Makefile for the linux kernel.
     7.5  #
     7.6  
     7.7 -obj-y := evtchn.o gnttab.o features.o
     7.8 +obj-y := evtchn.o gnttab.o features.o reboot.o machine_reboot.o
     7.9  
    7.10  obj-$(CONFIG_PROC_FS)		+= xen_proc.o
    7.11  obj-$(CONFIG_SYSFS)		+= hypervisor_sysfs.o
    7.12  obj-$(CONFIG_HOTPLUG_CPU)	+= cpu_hotplug.o
    7.13  obj-$(CONFIG_XEN_SYSFS)		+= xen_sysfs.o
    7.14 -obj-$(CONFIG_XEN_REBOOT)	+= reboot.o machine_reboot.o
    7.15  obj-$(CONFIG_XEN_SMPBOOT)	+= smpboot.o
    7.16  obj-$(CONFIG_KEXEC)		+= machine_kexec.o
     8.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
     8.2 +++ b/linux-2.6-xen-sparse/drivers/xen/gntdev/Makefile	Tue Apr 03 13:04:51 2007 -0600
     8.3 @@ -0,0 +1,1 @@
     8.4 +obj-y	:= gntdev.o
     9.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
     9.2 +++ b/linux-2.6-xen-sparse/drivers/xen/gntdev/gntdev.c	Tue Apr 03 13:04:51 2007 -0600
     9.3 @@ -0,0 +1,973 @@
     9.4 +/******************************************************************************
     9.5 + * gntdev.c
     9.6 + * 
     9.7 + * Device for accessing (in user-space) pages that have been granted by other
     9.8 + * domains.
     9.9 + *
    9.10 + * Copyright (c) 2006-2007, D G Murray.
    9.11 + * 
    9.12 + * This program is distributed in the hope that it will be useful,
    9.13 + * but WITHOUT ANY WARRANTY; without even the implied warranty of
    9.14 + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
    9.15 + * GNU General Public License for more details.
    9.16 + * 
    9.17 + * You should have received a copy of the GNU General Public License
    9.18 + * along with this program; if not, write to the Free Software
    9.19 + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
    9.20 + */
    9.21 +
    9.22 +#include <asm/atomic.h>
    9.23 +#include <linux/module.h>
    9.24 +#include <linux/kernel.h>
    9.25 +#include <linux/init.h>
    9.26 +#include <linux/fs.h>
    9.27 +#include <linux/device.h>
    9.28 +#include <linux/mm.h>
    9.29 +#include <linux/mman.h>
    9.30 +#include <asm/uaccess.h>
    9.31 +#include <asm/io.h>
    9.32 +#include <xen/gnttab.h>
    9.33 +#include <asm/hypervisor.h>
    9.34 +#include <xen/balloon.h>
    9.35 +#include <xen/evtchn.h>
    9.36 +#include <xen/driver_util.h>
    9.37 +
    9.38 +#include <linux/types.h>
    9.39 +#include <xen/public/gntdev.h>
    9.40 +
    9.41 +
    9.42 +#define DRIVER_AUTHOR "Derek G. Murray <Derek.Murray@cl.cam.ac.uk>"
    9.43 +#define DRIVER_DESC   "User-space granted page access driver"
    9.44 +
    9.45 +MODULE_LICENSE("GPL");
    9.46 +MODULE_AUTHOR(DRIVER_AUTHOR);
    9.47 +MODULE_DESCRIPTION(DRIVER_DESC);
    9.48 +
    9.49 +#define MAX_GRANTS 128
    9.50 +
    9.51 +/* A slot can be in one of three states:
    9.52 + *
    9.53 + * 0. GNTDEV_SLOT_INVALID:
    9.54 + *    This slot is not associated with a grant reference, and is therefore free
    9.55 + *    to be overwritten by a new grant reference.
    9.56 + *
    9.57 + * 1. GNTDEV_SLOT_NOT_YET_MAPPED:
    9.58 + *    This slot is associated with a grant reference (via the 
    9.59 + *    IOCTL_GNTDEV_MAP_GRANT_REF ioctl), but it has not yet been mmap()-ed.
    9.60 + *
    9.61 + * 2. GNTDEV_SLOT_MAPPED:
    9.62 + *    This slot is associated with a grant reference, and has been mmap()-ed.
    9.63 + */
    9.64 +typedef enum gntdev_slot_state {
    9.65 +	GNTDEV_SLOT_INVALID = 0,
    9.66 +	GNTDEV_SLOT_NOT_YET_MAPPED,
    9.67 +	GNTDEV_SLOT_MAPPED
    9.68 +} gntdev_slot_state_t;
    9.69 +
    9.70 +#define GNTDEV_INVALID_HANDLE    -1
    9.71 +#define GNTDEV_FREE_LIST_INVALID -1
    9.72 +/* Each opened instance of gntdev is associated with a list of grants,
    9.73 + * represented by an array of elements of the following type,
    9.74 + * gntdev_grant_info_t.
    9.75 + */
    9.76 +typedef struct gntdev_grant_info {
    9.77 +	gntdev_slot_state_t state;
    9.78 +	union {
    9.79 +		uint32_t free_list_index;
    9.80 +		struct {
    9.81 +			domid_t domid;
    9.82 +			grant_ref_t ref;
    9.83 +			grant_handle_t kernel_handle;
    9.84 +			grant_handle_t user_handle;
    9.85 +			uint64_t dev_bus_addr;
    9.86 +		} valid;
    9.87 +	} u;
    9.88 +} gntdev_grant_info_t;
    9.89 +
    9.90 +/* Private data structure, which is stored in the file pointer for files
    9.91 + * associated with this device.
    9.92 + */
    9.93 +typedef struct gntdev_file_private_data {
    9.94 +  
    9.95 +	/* Array of grant information. */
    9.96 +	gntdev_grant_info_t grants[MAX_GRANTS];
    9.97 +
    9.98 +	/* Read/write semaphore used to protect the grants array. */
    9.99 +	struct rw_semaphore grants_sem;
   9.100 +
   9.101 +	/* An array of indices of free slots in the grants array.
   9.102 +	 * N.B. An entry in this list may temporarily have the value
   9.103 +	 * GNTDEV_FREE_LIST_INVALID if the corresponding slot has been removed
   9.104 +	 * from the list by the contiguous allocator, but the list has not yet
   9.105 +	 * been compressed. However, this is not visible across invocations of
   9.106 +	 * the device.
   9.107 +	 */
   9.108 +	int32_t free_list[MAX_GRANTS];
   9.109 +	
   9.110 +	/* The number of free slots in the grants array. */
   9.111 +	uint32_t free_list_size;
   9.112 +
   9.113 +	/* Read/write semaphore used to protect the free list. */
   9.114 +	struct rw_semaphore free_list_sem;
   9.115 +	
   9.116 +	/* Index of the next slot after the most recent contiguous allocation, 
   9.117 +	 * for use in a next-fit allocator.
   9.118 +	 */
   9.119 +	uint32_t next_fit_index;
   9.120 +
   9.121 +	/* Used to map grants into the kernel, before mapping them into user
   9.122 +	 * space.
   9.123 +	 */
   9.124 +	struct page **foreign_pages;
   9.125 +
   9.126 +} gntdev_file_private_data_t;
   9.127 +
   9.128 +/* Module lifecycle operations. */
   9.129 +static int __init gntdev_init(void);
   9.130 +static void __exit gntdev_exit(void);
   9.131 +
   9.132 +module_init(gntdev_init);
   9.133 +module_exit(gntdev_exit);
   9.134 +
   9.135 +/* File operations. */
   9.136 +static int gntdev_open(struct inode *inode, struct file *flip);
   9.137 +static int gntdev_release(struct inode *inode, struct file *flip);
   9.138 +static int gntdev_mmap(struct file *flip, struct vm_area_struct *vma);
   9.139 +static int gntdev_ioctl (struct inode *inode, struct file *flip,
   9.140 +			 unsigned int cmd, unsigned long arg);
   9.141 +
   9.142 +static struct file_operations gntdev_fops = {
   9.143 +	.owner = THIS_MODULE,
   9.144 +	.open = gntdev_open,
   9.145 +	.release = gntdev_release,
   9.146 +	.mmap = gntdev_mmap,
   9.147 +	.ioctl = gntdev_ioctl
   9.148 +};
   9.149 +
   9.150 +/* VM operations. */
   9.151 +static void gntdev_vma_close(struct vm_area_struct *vma);
   9.152 +static pte_t gntdev_clear_pte(struct vm_area_struct *vma, unsigned long addr,
   9.153 +			      pte_t *ptep, int is_fullmm);
   9.154 +
   9.155 +static struct vm_operations_struct gntdev_vmops = {
   9.156 +	.close = gntdev_vma_close,
   9.157 +	.zap_pte = gntdev_clear_pte
   9.158 +};
   9.159 +
   9.160 +/* Global variables. */
   9.161 +
   9.162 +/* The driver major number, for use when unregistering the driver. */
   9.163 +static int gntdev_major;
   9.164 +
   9.165 +#define GNTDEV_NAME "gntdev"
   9.166 +
   9.167 +/* Memory mapping functions
   9.168 + * ------------------------
   9.169 + *
   9.170 + * Every granted page is mapped into both kernel and user space, and the two
   9.171 + * following functions return the respective virtual addresses of these pages.
   9.172 + *
   9.173 + * When shadow paging is disabled, the granted page is mapped directly into
   9.174 + * user space; when it is enabled, it is mapped into the kernel and remapped
   9.175 + * into user space using vm_insert_page() (see gntdev_mmap(), below).
   9.176 + */
   9.177 +
   9.178 +/* Returns the virtual address (in user space) of the @page_index'th page
   9.179 + * in the given VM area.
   9.180 + */
   9.181 +static inline unsigned long get_user_vaddr (struct vm_area_struct *vma,
   9.182 +					    int page_index)
   9.183 +{
   9.184 +	return (unsigned long) vma->vm_start + (page_index << PAGE_SHIFT);
   9.185 +}
   9.186 +
   9.187 +/* Returns the virtual address (in kernel space) of the @slot_index'th page
   9.188 + * mapped by the gntdev instance that owns the given private data struct.
   9.189 + */
   9.190 +static inline unsigned long get_kernel_vaddr (gntdev_file_private_data_t *priv,
   9.191 +					      int slot_index)
   9.192 +{
   9.193 +	unsigned long pfn;
   9.194 +	void *kaddr;
   9.195 +	pfn = page_to_pfn(priv->foreign_pages[slot_index]);
   9.196 +	kaddr = pfn_to_kaddr(pfn);
   9.197 +	return (unsigned long) kaddr;
   9.198 +}
   9.199 +
   9.200 +/* Helper functions. */
   9.201 +
   9.202 +/* Adds information about a grant reference to the list of grants in the file's
   9.203 + * private data structure. Returns non-zero on failure. On success, sets the
   9.204 + * value of *offset to the offset that should be mmap()-ed in order to map the
   9.205 + * grant reference.
   9.206 + */
   9.207 +static int add_grant_reference(struct file *flip,
   9.208 +			       struct ioctl_gntdev_grant_ref *op,
   9.209 +			       uint64_t *offset)
   9.210 +{
   9.211 +	gntdev_file_private_data_t *private_data 
   9.212 +		= (gntdev_file_private_data_t *) flip->private_data;
   9.213 +
   9.214 +	uint32_t slot_index;
   9.215 +
   9.216 +	if (unlikely(private_data->free_list_size == 0)) {
   9.217 +		return -ENOMEM;
   9.218 +	}
   9.219 +
   9.220 +	slot_index = private_data->free_list[--private_data->free_list_size];
   9.221 +
   9.222 +	/* Copy the grant information into file's private data. */
   9.223 +	private_data->grants[slot_index].state = GNTDEV_SLOT_NOT_YET_MAPPED;
   9.224 +	private_data->grants[slot_index].u.valid.domid = op->domid;
   9.225 +	private_data->grants[slot_index].u.valid.ref = op->ref;
   9.226 +
   9.227 +	/* The offset is calculated as the index of the chosen entry in the
   9.228 +	 * file's private data's array of grant information. This is then
   9.229 +	 * shifted to give an offset into the virtual "file address space".
   9.230 +	 */
   9.231 +	*offset = slot_index << PAGE_SHIFT;
   9.232 +
   9.233 +	return 0;
   9.234 +}
   9.235 +
   9.236 +/* Adds the @count grant references to the contiguous range in the slot array
   9.237 + * beginning at @first_slot. It is assumed that @first_slot was returned by a
   9.238 + * previous invocation of find_contiguous_free_range(), during the same
   9.239 + * invocation of the driver.
   9.240 + */
   9.241 +static int add_grant_references(struct file *flip,
   9.242 +				int count,
   9.243 +				struct ioctl_gntdev_grant_ref *ops,
   9.244 +				uint32_t first_slot)
   9.245 +{
   9.246 +	gntdev_file_private_data_t *private_data 
   9.247 +		= (gntdev_file_private_data_t *) flip->private_data;
   9.248 +	int i;
   9.249 +	
   9.250 +	for (i = 0; i < count; ++i) {
   9.251 +
   9.252 +		/* First, mark the slot's entry in the free list as invalid. */
   9.253 +		int free_list_index = 
   9.254 +			private_data->grants[first_slot+i].u.free_list_index;
   9.255 +		private_data->free_list[free_list_index] = 
   9.256 +			GNTDEV_FREE_LIST_INVALID;
   9.257 +
   9.258 +		/* Now, update the slot. */
   9.259 +		private_data->grants[first_slot+i].state = 
   9.260 +			GNTDEV_SLOT_NOT_YET_MAPPED;
   9.261 +		private_data->grants[first_slot+i].u.valid.domid =
   9.262 +			ops[i].domid;
   9.263 +		private_data->grants[first_slot+i].u.valid.ref = ops[i].ref;
   9.264 +	}
   9.265 +
   9.266 +	return 0;	
   9.267 +}
   9.268 +
   9.269 +/* Scans through the free list for @flip, removing entries that are marked as
   9.270 + * GNTDEV_SLOT_INVALID. This will reduce the recorded size of the free list to
   9.271 + * the number of valid entries.
   9.272 + */
   9.273 +static void compress_free_list(struct file *flip) 
   9.274 +{
   9.275 +	gntdev_file_private_data_t *private_data 
   9.276 +		= (gntdev_file_private_data_t *) flip->private_data;
   9.277 +	int i, j = 0, old_size;
   9.278 +	
   9.279 +	old_size = private_data->free_list_size;
   9.280 +	for (i = 0; i < old_size; ++i) {
   9.281 +		if (private_data->free_list[i] != GNTDEV_FREE_LIST_INVALID) {
   9.282 +			private_data->free_list[j] = 
   9.283 +				private_data->free_list[i];
   9.284 +			++j;
   9.285 +		} else {
   9.286 +			--private_data->free_list_size;
   9.287 +		}
   9.288 +	}
   9.289 +}
   9.290 +
   9.291 +/* Searches the grant array in the private data of @flip for a range of
   9.292 + * @num_slots contiguous slots in the GNTDEV_SLOT_INVALID state.
   9.293 + *
   9.294 + * Returns the index of the first slot if a range is found, otherwise -ENOMEM.
   9.295 + */
   9.296 +static int find_contiguous_free_range(struct file *flip,
   9.297 +				      uint32_t num_slots) 
   9.298 +{
   9.299 +	gntdev_file_private_data_t *private_data 
   9.300 +		= (gntdev_file_private_data_t *) flip->private_data;
   9.301 +	
   9.302 +	int i;
   9.303 +	int start_index = private_data->next_fit_index;
   9.304 +	int range_start = 0, range_length;
   9.305 +
   9.306 +	if (private_data->free_list_size < num_slots) {
   9.307 +		return -ENOMEM;
   9.308 +	}
   9.309 +
   9.310 +	/* First search from the start_index to the end of the array. */
   9.311 +	range_length = 0;
   9.312 +	for (i = start_index; i < MAX_GRANTS; ++i) {
   9.313 +		if (private_data->grants[i].state == GNTDEV_SLOT_INVALID) {
   9.314 +			if (range_length == 0) {
   9.315 +				range_start = i;
   9.316 +			}
   9.317 +			++range_length;
   9.318 +			if (range_length == num_slots) {
   9.319 +				return range_start;
   9.320 +			}
   9.321 +		}
   9.322 +	}
   9.323 +	
   9.324 +	/* Now search from the start of the array to the start_index. */
   9.325 +	range_length = 0;
   9.326 +	for (i = 0; i < start_index; ++i) {
   9.327 +		if (private_data->grants[i].state == GNTDEV_SLOT_INVALID) {
   9.328 +			if (range_length == 0) {
   9.329 +				range_start = i;
   9.330 +			}
   9.331 +			++range_length;
   9.332 +			if (range_length == num_slots) {
   9.333 +				return range_start;
   9.334 +			}
   9.335 +		}
   9.336 +	}
   9.337 +	
   9.338 +	return -ENOMEM;
   9.339 +}
   9.340 +
   9.341 +/* Interface functions. */
   9.342 +
   9.343 +/* Initialises the driver. Called when the module is loaded. */
   9.344 +static int __init gntdev_init(void)
   9.345 +{
   9.346 +	struct class *class;
   9.347 +	struct class_device *device;
   9.348 +
   9.349 +	if (!is_running_on_xen()) {
   9.350 +		printk(KERN_ERR "You must be running Xen to use gntdev\n");
   9.351 +		return -ENODEV;
   9.352 +	}
   9.353 +
   9.354 +	gntdev_major = register_chrdev(0, GNTDEV_NAME, &gntdev_fops);
   9.355 +	if (gntdev_major < 0)
   9.356 +	{
   9.357 +		printk(KERN_ERR "Could not register gntdev device\n");
   9.358 +		return -ENOMEM;
   9.359 +	}
   9.360 +
   9.361 +	/* Note that if the sysfs code fails, we will still initialise the
   9.362 +	 * device, and output the major number so that the device can be
   9.363 +	 * created manually using mknod.
   9.364 +	 */
   9.365 +	if ((class = get_xen_class()) == NULL) {
   9.366 +		printk(KERN_ERR "Error setting up xen_class\n");
   9.367 +		printk(KERN_ERR "gntdev created with major number = %d\n", 
   9.368 +		       gntdev_major);
   9.369 +		return 0;
   9.370 +	}
   9.371 +
   9.372 +	device = class_device_create(class, NULL, MKDEV(gntdev_major, 0),
   9.373 +				     NULL, GNTDEV_NAME);
   9.374 +	if (IS_ERR(device)) {
   9.375 +		printk(KERN_ERR "Error creating gntdev device in xen_class\n");
   9.376 +		printk(KERN_ERR "gntdev created with major number = %d\n",
   9.377 +		       gntdev_major);
   9.378 +		return 0;
   9.379 +	}
   9.380 +
   9.381 +	return 0;
   9.382 +}
   9.383 +
   9.384 +/* Cleans up and unregisters the driver. Called when the driver is unloaded.
   9.385 + */
   9.386 +static void __exit gntdev_exit(void)
   9.387 +{
   9.388 +	struct class *class;
   9.389 +	if ((class = get_xen_class()) != NULL)
   9.390 +		class_device_destroy(class, MKDEV(gntdev_major, 0));
   9.391 +	unregister_chrdev(gntdev_major, GNTDEV_NAME);
   9.392 +}
   9.393 +
   9.394 +/* Called when the device is opened. */
   9.395 +static int gntdev_open(struct inode *inode, struct file *flip)
   9.396 +{
   9.397 +	gntdev_file_private_data_t *private_data;
   9.398 +	int i;
   9.399 +
   9.400 +	try_module_get(THIS_MODULE);
   9.401 +
   9.402 +	/* Allocate space for the per-instance private data. */
   9.403 +	private_data = kmalloc(sizeof(*private_data), GFP_KERNEL);
   9.404 +	if (!private_data)
   9.405 +		goto nomem_out;
   9.406 +
   9.407 +	/* Allocate space for the kernel-mapping of granted pages. */
   9.408 +	private_data->foreign_pages = 
   9.409 +		alloc_empty_pages_and_pagevec(MAX_GRANTS);
   9.410 +	if (!private_data->foreign_pages)
   9.411 +		goto nomem_out2;
   9.412 +
   9.413 +	/* Initialise the free-list, which contains all slots at first.
   9.414 +	 */
   9.415 +	for (i = 0; i < MAX_GRANTS; ++i) {
   9.416 +		private_data->free_list[MAX_GRANTS - i - 1] = i;
   9.417 +		private_data->grants[i].state = GNTDEV_SLOT_INVALID;
   9.418 +		private_data->grants[i].u.free_list_index = MAX_GRANTS - i - 1;
   9.419 +	}
   9.420 +	private_data->free_list_size = MAX_GRANTS;
   9.421 +	private_data->next_fit_index = 0;
   9.422 +
   9.423 +	init_rwsem(&private_data->grants_sem);
   9.424 +	init_rwsem(&private_data->free_list_sem);
   9.425 +
   9.426 +	flip->private_data = private_data;
   9.427 +
   9.428 +	return 0;
   9.429 +
   9.430 +nomem_out2:
   9.431 +	kfree(private_data);
   9.432 +nomem_out:
   9.433 +	return -ENOMEM;
   9.434 +}
   9.435 +
   9.436 +/* Called when the device is closed.
   9.437 + */
   9.438 +static int gntdev_release(struct inode *inode, struct file *flip)
   9.439 +{
   9.440 +	if (flip->private_data) {
   9.441 +		gntdev_file_private_data_t *private_data = 
   9.442 +			(gntdev_file_private_data_t *) flip->private_data;
   9.443 +		if (private_data->foreign_pages) {
   9.444 +			free_empty_pages_and_pagevec
   9.445 +				(private_data->foreign_pages, MAX_GRANTS);
   9.446 +		}
   9.447 +		kfree(private_data);
   9.448 +	}
   9.449 +	module_put(THIS_MODULE);
   9.450 +	return 0;
   9.451 +}
   9.452 +
   9.453 +/* Called when an attempt is made to mmap() the device. The private data from
   9.454 + * @flip contains the list of grant references that can be mapped. The vm_pgoff
   9.455 + * field of @vma contains the index into that list that refers to the grant
   9.456 + * reference that will be mapped. Only mappings that are a multiple of
   9.457 + * PAGE_SIZE are handled.
   9.458 + */
   9.459 +static int gntdev_mmap (struct file *flip, struct vm_area_struct *vma) 
   9.460 +{
   9.461 +	struct gnttab_map_grant_ref op;
   9.462 +	unsigned long slot_index = vma->vm_pgoff;
   9.463 +	unsigned long kernel_vaddr, user_vaddr;
   9.464 +	uint32_t size = (vma->vm_end - vma->vm_start) >> PAGE_SHIFT;
   9.465 +	uint64_t ptep;
   9.466 +	int ret;
   9.467 +	int flags;
   9.468 +	int i;
   9.469 +	struct page *page;
   9.470 +	gntdev_file_private_data_t *private_data = flip->private_data;
   9.471 +
   9.472 +	if (unlikely(!private_data)) {
   9.473 +		printk(KERN_ERR "File's private data is NULL.\n");
   9.474 +		return -EINVAL;
   9.475 +	}
   9.476 +
   9.477 +	if (unlikely((size <= 0) || (size + slot_index) > MAX_GRANTS)) {
   9.478 +		printk(KERN_ERR "Invalid number of pages or offset"
   9.479 +		       "(num_pages = %d, first_slot = %ld).\n",
   9.480 +		       size, slot_index);
   9.481 +		return -ENXIO;
   9.482 +	}
   9.483 +
   9.484 +	if ((vma->vm_flags & VM_WRITE) && !(vma->vm_flags & VM_SHARED)) {
   9.485 +		printk(KERN_ERR "Writable mappings must be shared.\n");
   9.486 +		return -EINVAL;
   9.487 +	}
   9.488 +
   9.489 +	/* Slots must be in the NOT_YET_MAPPED state. */
   9.490 +	down_write(&private_data->grants_sem);
   9.491 +	for (i = 0; i < size; ++i) {
   9.492 +		if (private_data->grants[slot_index + i].state != 
   9.493 +		    GNTDEV_SLOT_NOT_YET_MAPPED) {
   9.494 +			printk(KERN_ERR "Slot (index = %ld) is in the wrong "
   9.495 +			       "state (%d).\n", slot_index + i, 
   9.496 +			       private_data->grants[slot_index + i].state);
   9.497 +			up_write(&private_data->grants_sem);
   9.498 +			return -EINVAL;
   9.499 +		}
   9.500 +	}
   9.501 +
   9.502 +	/* Install the hook for unmapping. */
   9.503 +	vma->vm_ops = &gntdev_vmops;
   9.504 +    
   9.505 +	/* The VM area contains pages from another VM. */
   9.506 +	vma->vm_flags |= VM_FOREIGN;
   9.507 +	vma->vm_private_data = kzalloc(size * sizeof(struct page_struct *), 
   9.508 +				       GFP_KERNEL);
   9.509 +	if (vma->vm_private_data == NULL) {
   9.510 +		printk(KERN_ERR "Couldn't allocate mapping structure for VM "
   9.511 +		       "area.\n");
   9.512 +		return -ENOMEM;
   9.513 +	}
   9.514 +
   9.515 +	/* This flag prevents Bad PTE errors when the memory is unmapped. */
   9.516 +	vma->vm_flags |= VM_RESERVED;
   9.517 +
   9.518 +	/* This flag prevents this VM area being copied on a fork(). A better
   9.519 +	 * behaviour might be to explicitly carry out the appropriate mappings
   9.520 +	 * on fork(), but I don't know if there's a hook for this.
   9.521 +	 */
   9.522 +	vma->vm_flags |= VM_DONTCOPY;
   9.523 +
   9.524 +#ifdef CONFIG_X86
   9.525 +	/* This flag ensures that the page tables are not unpinned before the
   9.526 +	 * VM area is unmapped. Therefore Xen still recognises the PTE as
   9.527 +	 * belonging to an L1 pagetable, and the grant unmap operation will
   9.528 +	 * succeed, even if the process does not exit cleanly.
   9.529 +	 */
   9.530 +	vma->vm_mm->context.has_foreign_mappings = 1;
   9.531 +#endif
   9.532 +
   9.533 +	for (i = 0; i < size; ++i) {
   9.534 +
   9.535 +		flags = GNTMAP_host_map;
   9.536 +		if (!(vma->vm_flags & VM_WRITE))
   9.537 +			flags |= GNTMAP_readonly;
   9.538 +
   9.539 +		kernel_vaddr = get_kernel_vaddr(private_data, slot_index + i);
   9.540 +		user_vaddr = get_user_vaddr(vma, i);
   9.541 +		page = pfn_to_page(__pa(kernel_vaddr) >> PAGE_SHIFT);
   9.542 +
   9.543 +		gnttab_set_map_op(&op, kernel_vaddr, flags,   
   9.544 +				  private_data->grants[slot_index+i]
   9.545 +				  .u.valid.ref, 
   9.546 +				  private_data->grants[slot_index+i]
   9.547 +				  .u.valid.domid);
   9.548 +
   9.549 +		/* Carry out the mapping of the grant reference. */
   9.550 +		ret = HYPERVISOR_grant_table_op(GNTTABOP_map_grant_ref, 
   9.551 +						&op, 1);
   9.552 +		BUG_ON(ret);
   9.553 +		if (op.status) {
   9.554 +			printk(KERN_ERR "Error mapping the grant reference "
   9.555 +			       "into the kernel (%d). domid = %d; ref = %d\n",
   9.556 +			       op.status,
   9.557 +			       private_data->grants[slot_index+i]
   9.558 +			       .u.valid.domid,
   9.559 +			       private_data->grants[slot_index+i]
   9.560 +			       .u.valid.ref);
   9.561 +			goto undo_map_out;
   9.562 +		}
   9.563 +
   9.564 +		/* Store a reference to the page that will be mapped into user
   9.565 +		 * space.
   9.566 +		 */
   9.567 +		((struct page **) vma->vm_private_data)[i] = page;
   9.568 +
   9.569 +		/* Mark mapped page as reserved. */
   9.570 +		SetPageReserved(page);
   9.571 +
   9.572 +		/* Record the grant handle, for use in the unmap operation. */
   9.573 +		private_data->grants[slot_index+i].u.valid.kernel_handle = 
   9.574 +			op.handle;
   9.575 +		private_data->grants[slot_index+i].u.valid.dev_bus_addr = 
   9.576 +			op.dev_bus_addr;
   9.577 +		
   9.578 +		private_data->grants[slot_index+i].state = GNTDEV_SLOT_MAPPED;
   9.579 +		private_data->grants[slot_index+i].u.valid.user_handle =
   9.580 +			GNTDEV_INVALID_HANDLE;
   9.581 +
   9.582 +		/* Now perform the mapping to user space. */
   9.583 +		if (!xen_feature(XENFEAT_auto_translated_physmap)) {
   9.584 +
   9.585 +			/* NOT USING SHADOW PAGE TABLES. */
   9.586 +			/* In this case, we map the grant(s) straight into user
   9.587 +			 * space.
   9.588 +			 */
   9.589 +
   9.590 +			/* Get the machine address of the PTE for the user 
   9.591 +			 *  page.
   9.592 +			 */
   9.593 +			if ((ret = create_lookup_pte_addr(vma->vm_mm, 
   9.594 +							  vma->vm_start 
   9.595 +							  + (i << PAGE_SHIFT), 
   9.596 +							  &ptep)))
   9.597 +			{
   9.598 +				printk(KERN_ERR "Error obtaining PTE pointer "
   9.599 +				       "(%d).\n", ret);
   9.600 +				goto undo_map_out;
   9.601 +			}
   9.602 +			
   9.603 +			/* Configure the map operation. */
   9.604 +		
   9.605 +			/* The reference is to be used by host CPUs. */
   9.606 +			flags = GNTMAP_host_map;
   9.607 +			
   9.608 +			/* Specifies a user space mapping. */
   9.609 +			flags |= GNTMAP_application_map;
   9.610 +			
   9.611 +			/* The map request contains the machine address of the
   9.612 +			 * PTE to update.
   9.613 +			 */
   9.614 +			flags |= GNTMAP_contains_pte;
   9.615 +			
   9.616 +			if (!(vma->vm_flags & VM_WRITE))
   9.617 +				flags |= GNTMAP_readonly;
   9.618 +
   9.619 +			gnttab_set_map_op(&op, ptep, flags, 
   9.620 +					  private_data->grants[slot_index+i]
   9.621 +					  .u.valid.ref, 
   9.622 +					  private_data->grants[slot_index+i]
   9.623 +					  .u.valid.domid);
   9.624 +
   9.625 +			/* Carry out the mapping of the grant reference. */
   9.626 +			ret = HYPERVISOR_grant_table_op(GNTTABOP_map_grant_ref,
   9.627 +							&op, 1);
   9.628 +			BUG_ON(ret);
   9.629 +			if (op.status) {
   9.630 +				printk(KERN_ERR "Error mapping the grant "
   9.631 +				       "reference into user space (%d). domid "
   9.632 +				       "= %d; ref = %d\n", op.status,
   9.633 +				       private_data->grants[slot_index+i].u
   9.634 +				       .valid.domid,
   9.635 +				       private_data->grants[slot_index+i].u
   9.636 +				       .valid.ref);
   9.637 +				goto undo_map_out;
   9.638 +			}
   9.639 +			
   9.640 +			/* Record the grant handle, for use in the unmap 
   9.641 +			 * operation. 
   9.642 +			 */
   9.643 +			private_data->grants[slot_index+i].u.
   9.644 +				valid.user_handle = op.handle;
   9.645 +
   9.646 +			/* Update p2m structure with the new mapping. */
   9.647 +			set_phys_to_machine(__pa(kernel_vaddr) >> PAGE_SHIFT,
   9.648 +					    FOREIGN_FRAME(private_data->
   9.649 +							  grants[slot_index+i]
   9.650 +							  .u.valid.dev_bus_addr
   9.651 +							  >> PAGE_SHIFT));
   9.652 +		} else {
   9.653 +			/* USING SHADOW PAGE TABLES. */
   9.654 +			/* In this case, we simply insert the page into the VM
   9.655 +			 * area. */
   9.656 +			ret = vm_insert_page(vma, user_vaddr, page);
   9.657 +		}
   9.658 +
   9.659 +	}
   9.660 +
   9.661 +	up_write(&private_data->grants_sem);
   9.662 +	return 0;
   9.663 +
   9.664 +undo_map_out:
   9.665 +	/* If we have a mapping failure, the unmapping will be taken care of
   9.666 +	 * by do_mmap_pgoff(), which will eventually call gntdev_clear_pte().
   9.667 +	 * All we need to do here is free the vma_private_data.
   9.668 +	 */
   9.669 +	kfree(vma->vm_private_data);
   9.670 +
   9.671 +	/* THIS IS VERY UNPLEASANT: do_mmap_pgoff() will set the vma->vm_file
   9.672 +	 * to NULL on failure. However, we need this in gntdev_clear_pte() to
   9.673 +	 * unmap the grants. Therefore, we smuggle a reference to the file's
   9.674 +	 * private data in the VM area's private data pointer.
   9.675 +	 */
   9.676 +	vma->vm_private_data = private_data;
   9.677 +	
   9.678 +	up_write(&private_data->grants_sem);
   9.679 +
   9.680 +	return -ENOMEM;
   9.681 +}
   9.682 +
   9.683 +static pte_t gntdev_clear_pte(struct vm_area_struct *vma, unsigned long addr,
   9.684 +			      pte_t *ptep, int is_fullmm)
   9.685 +{
   9.686 +	int slot_index, ret;
   9.687 +	pte_t copy;
   9.688 +	struct gnttab_unmap_grant_ref op;
   9.689 +	gntdev_file_private_data_t *private_data;
   9.690 +
   9.691 +	/* THIS IS VERY UNPLEASANT: do_mmap_pgoff() will set the vma->vm_file
   9.692 +	 * to NULL on failure. However, we need this in gntdev_clear_pte() to
   9.693 +	 * unmap the grants. Therefore, we smuggle a reference to the file's
   9.694 +	 * private data in the VM area's private data pointer.
   9.695 +	 */
   9.696 +	if (vma->vm_file) {
   9.697 +		private_data = (gntdev_file_private_data_t *)
   9.698 +			vma->vm_file->private_data;
   9.699 +	} else if (vma->vm_private_data) {
   9.700 +		private_data = (gntdev_file_private_data_t *)
   9.701 +			vma->vm_private_data;
   9.702 +	} else {
   9.703 +		private_data = NULL; /* gcc warning */
   9.704 +		BUG();
   9.705 +	}
   9.706 +
   9.707 +	/* Copy the existing value of the PTE for returning. */
   9.708 +	copy = *ptep;
   9.709 +
   9.710 +	/* Calculate the grant relating to this PTE. */
   9.711 +	slot_index = vma->vm_pgoff + ((addr - vma->vm_start) >> PAGE_SHIFT);
   9.712 +
   9.713 +	/* Only unmap grants if the slot has been mapped. This could be being
   9.714 +	 * called from a failing mmap().
   9.715 +	 */
   9.716 +	if (private_data->grants[slot_index].state == GNTDEV_SLOT_MAPPED) {
   9.717 +
   9.718 +		/* First, we clear the user space mapping, if it has been made.
   9.719 +		 */
   9.720 +		if (private_data->grants[slot_index].u.valid.user_handle !=
   9.721 +		    GNTDEV_INVALID_HANDLE && 
   9.722 +		    !xen_feature(XENFEAT_auto_translated_physmap)) {
   9.723 +			/* NOT USING SHADOW PAGE TABLES. */
   9.724 +			gnttab_set_unmap_op(&op, virt_to_machine(ptep), 
   9.725 +					    GNTMAP_contains_pte,
   9.726 +					    private_data->grants[slot_index]
   9.727 +					    .u.valid.user_handle);
   9.728 +			ret = HYPERVISOR_grant_table_op(
   9.729 +				GNTTABOP_unmap_grant_ref, &op, 1);
   9.730 +			BUG_ON(ret);
   9.731 +			if (op.status)
   9.732 +				printk("User unmap grant status = %d\n", 
   9.733 +				       op.status);
   9.734 +		} else {
   9.735 +			/* USING SHADOW PAGE TABLES. */
   9.736 +			pte_clear_full(vma->vm_mm, addr, ptep, is_fullmm);
   9.737 +		}
   9.738 +
   9.739 +		/* Finally, we unmap the grant from kernel space. */
   9.740 +		gnttab_set_unmap_op(&op, 
   9.741 +				    get_kernel_vaddr(private_data, slot_index),
   9.742 +				    GNTMAP_host_map, 
   9.743 +				    private_data->grants[slot_index].u.valid
   9.744 +				    .kernel_handle);
   9.745 +		ret = HYPERVISOR_grant_table_op(GNTTABOP_unmap_grant_ref, 
   9.746 +						&op, 1);
   9.747 +		BUG_ON(ret);
   9.748 +		if (op.status)
   9.749 +			printk("Kernel unmap grant status = %d\n", op.status);
   9.750 +
   9.751 +
   9.752 +		/* Return slot to the not-yet-mapped state, so that it may be
   9.753 +		 * mapped again, or removed by a subsequent ioctl.
   9.754 +		 */
   9.755 +		private_data->grants[slot_index].state = 
   9.756 +			GNTDEV_SLOT_NOT_YET_MAPPED;
   9.757 +
   9.758 +		/* Invalidate the physical to machine mapping for this page. */
   9.759 +		set_phys_to_machine(__pa(get_kernel_vaddr(private_data, 
   9.760 +							  slot_index)) 
   9.761 +				    >> PAGE_SHIFT, INVALID_P2M_ENTRY);
   9.762 +
   9.763 +	} else {
   9.764 +		pte_clear_full(vma->vm_mm, addr, ptep, is_fullmm);
   9.765 +	}
   9.766 +
   9.767 +	return copy;
   9.768 +}
   9.769 +
   9.770 +/* "Destructor" for a VM area.
   9.771 + */
   9.772 +static void gntdev_vma_close(struct vm_area_struct *vma) {
   9.773 +	if (vma->vm_private_data) {
   9.774 +		kfree(vma->vm_private_data);
   9.775 +	}
   9.776 +}
   9.777 +
   9.778 +/* Called when an ioctl is made on the device.
   9.779 + */
   9.780 +static int gntdev_ioctl(struct inode *inode, struct file *flip,
   9.781 +			unsigned int cmd, unsigned long arg)
   9.782 +{
   9.783 +	int rc = 0;
   9.784 +	gntdev_file_private_data_t *private_data = 
   9.785 +		(gntdev_file_private_data_t *) flip->private_data;
   9.786 +
   9.787 +	switch (cmd) {
   9.788 +	case IOCTL_GNTDEV_MAP_GRANT_REF:
   9.789 +	{
   9.790 +		struct ioctl_gntdev_map_grant_ref op;
   9.791 +		down_write(&private_data->grants_sem);
   9.792 +		down_write(&private_data->free_list_sem);
   9.793 +
   9.794 +		if ((rc = copy_from_user(&op, (void __user *) arg, 
   9.795 +					 sizeof(op)))) {
   9.796 +			rc = -EFAULT;
   9.797 +			goto map_out;
   9.798 +		}
   9.799 +		if (unlikely(op.count <= 0)) {
   9.800 +			rc = -EINVAL;
   9.801 +			goto map_out;
   9.802 +		}
   9.803 +
   9.804 +		if (op.count == 1) {
   9.805 +			if ((rc = add_grant_reference(flip, &op.refs[0],
   9.806 +						      &op.index)) < 0) {
   9.807 +				printk(KERN_ERR "Adding grant reference "
   9.808 +				       "failed (%d).\n", rc);
   9.809 +				goto map_out;
   9.810 +			}
   9.811 +		} else {
   9.812 +			struct ioctl_gntdev_grant_ref *refs, *u;
   9.813 +			refs = kmalloc(op.count * sizeof(*refs), GFP_KERNEL);
   9.814 +			if (!refs) {
   9.815 +				rc = -ENOMEM;
   9.816 +				goto map_out;
   9.817 +			}
   9.818 +			u = ((struct ioctl_gntdev_map_grant_ref *)arg)->refs;
   9.819 +			if ((rc = copy_from_user(refs,
   9.820 +						 (void __user *)u,
   9.821 +						 sizeof(*refs) * op.count))) {
   9.822 +				printk(KERN_ERR "Copying refs from user failed"
   9.823 +				       " (%d).\n", rc);
   9.824 +				rc = -EINVAL;
   9.825 +				goto map_out;
   9.826 +			}
   9.827 +			if ((rc = find_contiguous_free_range(flip, op.count))
   9.828 +			    < 0) {
   9.829 +				printk(KERN_ERR "Finding contiguous range "
   9.830 +				       "failed (%d).\n", rc);
   9.831 +				kfree(refs);
   9.832 +				goto map_out;
   9.833 +			}
   9.834 +			op.index = rc << PAGE_SHIFT;
   9.835 +			if ((rc = add_grant_references(flip, op.count,
   9.836 +						       refs, rc))) {
   9.837 +				printk(KERN_ERR "Adding grant references "
   9.838 +				       "failed (%d).\n", rc);
   9.839 +				kfree(refs);
   9.840 +				goto map_out;
   9.841 +			}
   9.842 +			compress_free_list(flip);
   9.843 +			kfree(refs);
   9.844 +		}
   9.845 +		if ((rc = copy_to_user((void __user *) arg, 
   9.846 +				       &op, 
   9.847 +				       sizeof(op)))) {
   9.848 +			printk(KERN_ERR "Copying result back to user failed "
   9.849 +			       "(%d)\n", rc);
   9.850 +			rc = -EFAULT;
   9.851 +			goto map_out;
   9.852 +		}
   9.853 +	map_out:
   9.854 +		up_write(&private_data->grants_sem);
   9.855 +		up_write(&private_data->free_list_sem);
   9.856 +		return rc;
   9.857 +	}
   9.858 +	case IOCTL_GNTDEV_UNMAP_GRANT_REF:
   9.859 +	{
   9.860 +		struct ioctl_gntdev_unmap_grant_ref op;
   9.861 +		int i, start_index;
   9.862 +
   9.863 +		down_write(&private_data->grants_sem);
   9.864 +		down_write(&private_data->free_list_sem);
   9.865 +
   9.866 +		if ((rc = copy_from_user(&op, 
   9.867 +					 (void __user *) arg, 
   9.868 +					 sizeof(op)))) {
   9.869 +			rc = -EFAULT;
   9.870 +			goto unmap_out;
   9.871 +		}
   9.872 +
   9.873 +		start_index = op.index >> PAGE_SHIFT;
   9.874 +
   9.875 +		/* First, check that all pages are in the NOT_YET_MAPPED
   9.876 +		 * state.
   9.877 +		 */
   9.878 +		for (i = 0; i < op.count; ++i) {
   9.879 +			if (unlikely
   9.880 +			    (private_data->grants[start_index + i].state
   9.881 +			     != GNTDEV_SLOT_NOT_YET_MAPPED)) {
   9.882 +				if (private_data->grants[start_index + i].state
   9.883 +				    == GNTDEV_SLOT_INVALID) {
   9.884 +					printk(KERN_ERR
   9.885 +					       "Tried to remove an invalid "
   9.886 +					       "grant at offset 0x%x.",
   9.887 +					       (start_index + i) 
   9.888 +					       << PAGE_SHIFT);
   9.889 +					rc = -EINVAL;
   9.890 +				} else {
   9.891 +					printk(KERN_ERR
   9.892 +					       "Tried to remove a grant which "
   9.893 +					       "is currently mmap()-ed at "
   9.894 +					       "offset 0x%x.",
   9.895 +					       (start_index + i) 
   9.896 +					       << PAGE_SHIFT);
   9.897 +					rc = -EBUSY;
   9.898 +				}
   9.899 +				goto unmap_out;
   9.900 +			}
   9.901 +		}
   9.902 +
   9.903 +		/* Unmap pages and add them to the free list.
   9.904 +		 */
   9.905 +		for (i = 0; i < op.count; ++i) {
   9.906 +			private_data->grants[start_index+i].state = 
   9.907 +				GNTDEV_SLOT_INVALID;
   9.908 +			private_data->grants[start_index+i].u.free_list_index =
   9.909 +				private_data->free_list_size;
   9.910 +			private_data->free_list[private_data->free_list_size] =
   9.911 +				start_index + i;
   9.912 +			++private_data->free_list_size;
   9.913 +		}
   9.914 +		compress_free_list(flip);
   9.915 +
   9.916 +	unmap_out:
   9.917 +		up_write(&private_data->grants_sem);
   9.918 +		up_write(&private_data->free_list_sem);
   9.919 +		return rc;
   9.920 +	}
   9.921 +	case IOCTL_GNTDEV_GET_OFFSET_FOR_VADDR:
   9.922 +	{
   9.923 +		struct ioctl_gntdev_get_offset_for_vaddr op;
   9.924 +		struct vm_area_struct *vma;
   9.925 +		unsigned long vaddr;
   9.926 +
   9.927 +		if ((rc = copy_from_user(&op, 
   9.928 +					 (void __user *) arg, 
   9.929 +					 sizeof(op)))) {
   9.930 +			rc = -EFAULT;
   9.931 +			goto get_offset_out;
   9.932 +		}
   9.933 +		vaddr = (unsigned long)op.vaddr;
   9.934 +
   9.935 +		down_read(&current->mm->mmap_sem);		
   9.936 +		vma = find_vma(current->mm, vaddr);
   9.937 +		if (vma == NULL) {
   9.938 +			rc = -EFAULT;
   9.939 +			goto get_offset_unlock_out;
   9.940 +		}
   9.941 +		if ((!vma->vm_ops) || (vma->vm_ops != &gntdev_vmops)) {
   9.942 +			printk(KERN_ERR "The vaddr specified does not belong "
   9.943 +			       "to a gntdev instance: %#lx\n", vaddr);
   9.944 +			rc = -EFAULT;
   9.945 +			goto get_offset_unlock_out;
   9.946 +		}
   9.947 +		if (vma->vm_start != vaddr) {
   9.948 +			printk(KERN_ERR "The vaddr specified in an "
   9.949 +			       "IOCTL_GNTDEV_GET_OFFSET_FOR_VADDR must be at "
   9.950 +			       "the start of the VM area. vma->vm_start = "
   9.951 +			       "%#lx; vaddr = %#lx\n",
   9.952 +			       vma->vm_start, vaddr);
   9.953 +			rc = -EFAULT;
   9.954 +			goto get_offset_unlock_out;
   9.955 +		}
   9.956 +		op.offset = vma->vm_pgoff << PAGE_SHIFT;
   9.957 +		op.count = (vma->vm_end - vma->vm_start) >> PAGE_SHIFT;
   9.958 +		up_read(&current->mm->mmap_sem);
   9.959 +		if ((rc = copy_to_user((void __user *) arg, 
   9.960 +				       &op, 
   9.961 +				       sizeof(op)))) {
   9.962 +			rc = -EFAULT;
   9.963 +			goto get_offset_out;
   9.964 +		}
   9.965 +		goto get_offset_out;
   9.966 +	get_offset_unlock_out:
   9.967 +		up_read(&current->mm->mmap_sem);
   9.968 +	get_offset_out:
   9.969 +		return rc;
   9.970 +	}
   9.971 +	default:
   9.972 +		return -ENOIOCTLCMD;
   9.973 +	}
   9.974 +
   9.975 +	return 0;
   9.976 +}
    10.1 --- a/linux-2.6-xen-sparse/drivers/xen/util.c	Fri Mar 30 17:18:42 2007 -0600
    10.2 +++ b/linux-2.6-xen-sparse/drivers/xen/util.c	Tue Apr 03 13:04:51 2007 -0600
    10.3 @@ -5,6 +5,26 @@
    10.4  #include <asm/uaccess.h>
    10.5  #include <xen/driver_util.h>
    10.6  
    10.7 +struct class *get_xen_class(void)
    10.8 +{
    10.9 +	static struct class *xen_class;
   10.10 +
   10.11 +	if (xen_class)
   10.12 +		return xen_class;
   10.13 +
   10.14 +	xen_class = class_create(THIS_MODULE, "xen");
   10.15 +	if (IS_ERR(xen_class)) {
   10.16 +		printk("Failed to create xen sysfs class.\n");
   10.17 +		xen_class = NULL;
   10.18 +	}
   10.19 +
   10.20 +	return xen_class;
   10.21 +}
   10.22 +EXPORT_SYMBOL_GPL(get_xen_class);
   10.23 +
   10.24 +/* Todo: merge ia64 ('auto-translate physmap') versions of these functions. */
   10.25 +#ifndef __ia64__
   10.26 +
   10.27  static int f(pte_t *pte, struct page *pmd_page, unsigned long addr, void *data)
   10.28  {
   10.29  	/* apply_to_page_range() does all the hard work. */
   10.30 @@ -46,3 +66,5 @@ void free_vm_area(struct vm_struct *area
   10.31  	kfree(area);
   10.32  }
   10.33  EXPORT_SYMBOL_GPL(free_vm_area);
   10.34 +
   10.35 +#endif /* !__ia64__ */
    11.1 --- a/linux-2.6-xen-sparse/include/linux/mm.h	Fri Mar 30 17:18:42 2007 -0600
    11.2 +++ b/linux-2.6-xen-sparse/include/linux/mm.h	Tue Apr 03 13:04:51 2007 -0600
    11.3 @@ -205,6 +205,10 @@ struct vm_operations_struct {
    11.4  	/* notification that a previously read-only page is about to become
    11.5  	 * writable, if an error is returned it will cause a SIGBUS */
    11.6  	int (*page_mkwrite)(struct vm_area_struct *vma, struct page *page);
    11.7 +	/* Area-specific function for clearing the PTE at @ptep. Returns the
    11.8 +	 * original value of @ptep. */
    11.9 +	pte_t (*zap_pte)(struct vm_area_struct *vma, 
   11.10 +			 unsigned long addr, pte_t *ptep, int is_fullmm);
   11.11  #ifdef CONFIG_NUMA
   11.12  	int (*set_policy)(struct vm_area_struct *vma, struct mempolicy *new);
   11.13  	struct mempolicy *(*get_policy)(struct vm_area_struct *vma,
    12.1 --- a/linux-2.6-xen-sparse/include/xen/driver_util.h	Fri Mar 30 17:18:42 2007 -0600
    12.2 +++ b/linux-2.6-xen-sparse/include/xen/driver_util.h	Tue Apr 03 13:04:51 2007 -0600
    12.3 @@ -3,9 +3,12 @@
    12.4  #define __ASM_XEN_DRIVER_UTIL_H__
    12.5  
    12.6  #include <linux/vmalloc.h>
    12.7 +#include <linux/device.h>
    12.8  
    12.9  /* Allocate/destroy a 'vmalloc' VM area. */
   12.10  extern struct vm_struct *alloc_vm_area(unsigned long size);
   12.11  extern void free_vm_area(struct vm_struct *area);
   12.12  
   12.13 +extern struct class *get_xen_class(void);
   12.14 +
   12.15  #endif /* __ASM_XEN_DRIVER_UTIL_H__ */
    13.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
    13.2 +++ b/linux-2.6-xen-sparse/include/xen/public/gntdev.h	Tue Apr 03 13:04:51 2007 -0600
    13.3 @@ -0,0 +1,105 @@
    13.4 +/******************************************************************************
    13.5 + * gntdev.h
    13.6 + * 
    13.7 + * Interface to /dev/xen/gntdev.
    13.8 + * 
    13.9 + * Copyright (c) 2007, D G Murray
   13.10 + * 
   13.11 + * This program is free software; you can redistribute it and/or
   13.12 + * modify it under the terms of the GNU General Public License version 2
   13.13 + * as published by the Free Software Foundation; or, when distributed
   13.14 + * separately from the Linux kernel or incorporated into other
   13.15 + * software packages, subject to the following license:
   13.16 + * 
   13.17 + * Permission is hereby granted, free of charge, to any person obtaining a copy
   13.18 + * of this source file (the "Software"), to deal in the Software without
   13.19 + * restriction, including without limitation the rights to use, copy, modify,
   13.20 + * merge, publish, distribute, sublicense, and/or sell copies of the Software,
   13.21 + * and to permit persons to whom the Software is furnished to do so, subject to
   13.22 + * the following conditions:
   13.23 + * 
   13.24 + * The above copyright notice and this permission notice shall be included in
   13.25 + * all copies or substantial portions of the Software.
   13.26 + * 
   13.27 + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
   13.28 + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
   13.29 + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
   13.30 + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
   13.31 + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
   13.32 + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
   13.33 + * IN THE SOFTWARE.
   13.34 + */
   13.35 +
   13.36 +#ifndef __LINUX_PUBLIC_GNTDEV_H__
   13.37 +#define __LINUX_PUBLIC_GNTDEV_H__
   13.38 +
   13.39 +struct ioctl_gntdev_grant_ref {
   13.40 +	/* The domain ID of the grant to be mapped. */
   13.41 +	uint32_t domid;
   13.42 +	/* The grant reference of the grant to be mapped. */
   13.43 +	uint32_t ref;
   13.44 +};
   13.45 +
   13.46 +/*
   13.47 + * Inserts the grant references into the mapping table of an instance
   13.48 + * of gntdev. N.B. This does not perform the mapping, which is deferred
   13.49 + * until mmap() is called with @index as the offset.
   13.50 + */
   13.51 +#define IOCTL_GNTDEV_MAP_GRANT_REF \
   13.52 +_IOC(_IOC_NONE, 'G', 0, sizeof(struct ioctl_gntdev_map_grant_ref))
   13.53 +struct ioctl_gntdev_map_grant_ref {
   13.54 +	/* IN parameters */
   13.55 +	/* The number of grants to be mapped. */
   13.56 +	uint32_t count;
   13.57 +	uint32_t pad;
   13.58 +	/* OUT parameters */
   13.59 +	/* The offset to be used on a subsequent call to mmap(). */
   13.60 +	uint64_t index;
   13.61 +	/* Variable IN parameter. */
   13.62 +	/* Array of grant references, of size @count. */
   13.63 +	struct ioctl_gntdev_grant_ref refs[1];
   13.64 +};
   13.65 +
   13.66 +/*
   13.67 + * Removes the grant references from the mapping table of an instance of
   13.68 + * of gntdev. N.B. munmap() must be called on the relevant virtual address(es)
   13.69 + * before this ioctl is called, or an error will result.
   13.70 + */
   13.71 +#define IOCTL_GNTDEV_UNMAP_GRANT_REF \
   13.72 +_IOC(_IOC_NONE, 'G', 1, sizeof(struct ioctl_gntdev_unmap_grant_ref))       
   13.73 +struct ioctl_gntdev_unmap_grant_ref {
   13.74 +	/* IN parameters */
   13.75 +	/* The offset was returned by the corresponding map operation. */
   13.76 +	uint64_t index;
   13.77 +	/* The number of pages to be unmapped. */
   13.78 +	uint32_t count;
   13.79 +	uint32_t pad;
   13.80 +};
   13.81 +
   13.82 +/*
   13.83 + * Returns the offset in the driver's address space that corresponds
   13.84 + * to @vaddr. This can be used to perform a munmap(), followed by an
   13.85 + * UNMAP_GRANT_REF ioctl, where no state about the offset is retained by
   13.86 + * the caller. The number of pages that were allocated at the same time as
   13.87 + * @vaddr is returned in @count.
   13.88 + *
   13.89 + * N.B. Where more than one page has been mapped into a contiguous range, the
   13.90 + *      supplied @vaddr must correspond to the start of the range; otherwise
   13.91 + *      an error will result. It is only possible to munmap() the entire
   13.92 + *      contiguously-allocated range at once, and not any subrange thereof.
   13.93 + */
   13.94 +#define IOCTL_GNTDEV_GET_OFFSET_FOR_VADDR \
   13.95 +_IOC(_IOC_NONE, 'G', 2, sizeof(struct ioctl_gntdev_get_offset_for_vaddr))
   13.96 +struct ioctl_gntdev_get_offset_for_vaddr {
   13.97 +	/* IN parameters */
   13.98 +	/* The virtual address of the first mapped page in a range. */
   13.99 +	uint64_t vaddr;
  13.100 +	/* OUT parameters */
  13.101 +	/* The offset that was used in the initial mmap() operation. */
  13.102 +	uint64_t offset;
  13.103 +	/* The number of pages mapped in the VM area that begins at @vaddr. */
  13.104 +	uint32_t count;
  13.105 +	uint32_t pad;
  13.106 +};
  13.107 +
  13.108 +#endif /* __LINUX_PUBLIC_GNTDEV_H__ */
    14.1 --- a/linux-2.6-xen-sparse/mm/memory.c	Fri Mar 30 17:18:42 2007 -0600
    14.2 +++ b/linux-2.6-xen-sparse/mm/memory.c	Tue Apr 03 13:04:51 2007 -0600
    14.3 @@ -659,8 +659,12 @@ static unsigned long zap_pte_range(struc
    14.4  				     page->index > details->last_index))
    14.5  					continue;
    14.6  			}
    14.7 -			ptent = ptep_get_and_clear_full(mm, addr, pte,
    14.8 -							tlb->fullmm);
    14.9 +			if (unlikely(vma->vm_ops && vma->vm_ops->zap_pte))
   14.10 +				ptent = vma->vm_ops->zap_pte(vma, addr, pte,
   14.11 +							     tlb->fullmm);
   14.12 +			else
   14.13 +				ptent = ptep_get_and_clear_full(mm, addr, pte,
   14.14 +								tlb->fullmm);
   14.15  			tlb_remove_tlb_entry(tlb, pte, addr);
   14.16  			if (unlikely(!page))
   14.17  				continue;
   14.18 @@ -755,6 +759,7 @@ static unsigned long unmap_page_range(st
   14.19  		details = NULL;
   14.20  
   14.21  	BUG_ON(addr >= end);
   14.22 +
   14.23  	tlb_start_vma(tlb, vma);
   14.24  	pgd = pgd_offset(vma->vm_mm, addr);
   14.25  	do {
    15.1 --- a/tools/blktap/drivers/qcow2raw.c	Fri Mar 30 17:18:42 2007 -0600
    15.2 +++ b/tools/blktap/drivers/qcow2raw.c	Tue Apr 03 13:04:51 2007 -0600
    15.3 @@ -51,7 +51,6 @@
    15.4  #define BLOCK_PROCESSSZ 4096
    15.5  
    15.6  static int maxfds, *qcowio_fd, *aio_fd, running = 1, complete = 0; 
    15.7 -static int read_complete = 0, write_complete = 0;
    15.8  static int returned_read_events = 0, returned_write_events = 0;
    15.9  static int submit_events = 0;
   15.10  static uint32_t read_idx = 0, write_idx = 0;
   15.11 @@ -109,8 +108,6 @@ static int send_write_responses(struct d
   15.12  	written += BLOCK_PROCESSSZ;
   15.13  	returned_write_events++;
   15.14  	write_idx = idx;
   15.15 -	if (complete && (returned_write_events == submit_events)) 
   15.16 -		write_complete = 1;
   15.17  
   15.18  	debug_output(written, dd->td_state->size << 9);
   15.19  	free(private);
   15.20 @@ -126,8 +123,6 @@ static int send_read_responses(struct di
   15.21  	
   15.22  	returned_read_events++;
   15.23  	read_idx = idx;
   15.24 -	if (complete && (returned_read_events == submit_events)) 
   15.25 -		read_complete = 1;
   15.26  	
   15.27  	ret = ddaio.drv->td_queue_write(&ddaio, idx, BLOCK_PROCESSSZ>>9, private, 
   15.28  					send_write_responses, idx, private);
   15.29 @@ -136,7 +131,7 @@ static int send_read_responses(struct di
   15.30  		return 0;
   15.31  	}
   15.32  
   15.33 -	if ( (complete && returned_read_events == submit_events) || 
   15.34 +	if ( (returned_read_events == submit_events) || 
   15.35  	     (returned_read_events % 10 == 0) ) {
   15.36  		ddaio.drv->td_submit(&ddaio);
   15.37  	}
   15.38 @@ -299,6 +294,7 @@ int main(int argc, char *argv[])
   15.39  			}
   15.40  		
   15.41  			/*Attempt to read 4k sized blocks*/
   15.42 +			submit_events++;
   15.43  			ret = ddqcow.drv->td_queue_read(&ddqcow, i>>9,
   15.44  							BLOCK_PROCESSSZ>>9, buf, 
   15.45  							send_read_responses, i>>9, buf);
   15.46 @@ -309,7 +305,6 @@ int main(int argc, char *argv[])
   15.47  				exit(-1);
   15.48  			} else {
   15.49  				i += BLOCK_PROCESSSZ;
   15.50 -				submit_events++;
   15.51  			}
   15.52  
   15.53  			if (i >= ddqcow.td_state->size<<9) {
    16.1 --- a/tools/examples/xmexample.hvm	Fri Mar 30 17:18:42 2007 -0600
    16.2 +++ b/tools/examples/xmexample.hvm	Tue Apr 03 13:04:51 2007 -0600
    16.3 @@ -180,6 +180,10 @@ serial='pty'
    16.4  
    16.5  
    16.6  #-----------------------------------------------------------------------------
    16.7 +#    set the real time clock offset in seconds [default=0 i.e. same as dom0]
    16.8 +#rtc_timeoffset=3600
    16.9 +
   16.10 +#-----------------------------------------------------------------------------
   16.11  #    start in full screen
   16.12  #full-screen=1   
   16.13  
    17.1 --- a/tools/ioemu/target-i386-dm/helper2.c	Fri Mar 30 17:18:42 2007 -0600
    17.2 +++ b/tools/ioemu/target-i386-dm/helper2.c	Tue Apr 03 13:04:51 2007 -0600
    17.3 @@ -74,6 +74,8 @@ int vcpus = 1;
    17.4  
    17.5  int xc_handle;
    17.6  
    17.7 +long time_offset = 0;
    17.8 +
    17.9  shared_iopage_t *shared_page = NULL;
   17.10  
   17.11  #define BUFFER_IO_MAX_DELAY  100
   17.12 @@ -439,6 +441,34 @@ void cpu_ioreq_xor(CPUState *env, ioreq_
   17.13      req->data = tmp1;
   17.14  }
   17.15  
   17.16 +void timeoffset_get()
   17.17 +{
   17.18 +    char *p;
   17.19 +
   17.20 +    p = xenstore_vm_read(domid, "rtc/timeoffset", NULL);
   17.21 +    if (!p)
   17.22 +	return;
   17.23 +
   17.24 +    if (sscanf(p, "%ld", &time_offset) == 1)
   17.25 +	fprintf(logfile, "Time offset set %ld\n", time_offset);
   17.26 +    else
   17.27 +	time_offset = 0;
   17.28 +
   17.29 +    xc_domain_set_time_offset(xc_handle, domid, time_offset);
   17.30 +
   17.31 +    free(p);
   17.32 +}
   17.33 +
   17.34 +void cpu_ioreq_timeoffset(CPUState *env, ioreq_t *req)
   17.35 +{
   17.36 +    char b[64];
   17.37 +
   17.38 +    time_offset += (ulong)req->data;
   17.39 +
   17.40 +    sprintf(b, "%ld", time_offset);
   17.41 +    xenstore_vm_write(domid, "rtc/timeoffset", b);
   17.42 +}
   17.43 +
   17.44  void cpu_ioreq_xchg(CPUState *env, ioreq_t *req)
   17.45  {
   17.46      unsigned long tmp1;
   17.47 @@ -478,6 +508,9 @@ void __handle_ioreq(CPUState *env, ioreq
   17.48      case IOREQ_TYPE_XCHG:
   17.49          cpu_ioreq_xchg(env, req);
   17.50          break;
   17.51 +    case IOREQ_TYPE_TIMEOFFSET:
   17.52 +	cpu_ioreq_timeoffset(env, req);
   17.53 +	break;
   17.54      default:
   17.55          hw_error("Invalid ioreq type 0x%x\n", req->type);
   17.56      }
    18.1 --- a/tools/ioemu/vl.c	Fri Mar 30 17:18:42 2007 -0600
    18.2 +++ b/tools/ioemu/vl.c	Tue Apr 03 13:04:51 2007 -0600
    18.3 @@ -6670,6 +6670,9 @@ int main(int argc, char **argv)
    18.4      }
    18.5      free(page_array);
    18.6  #endif
    18.7 +
    18.8 +    timeoffset_get();
    18.9 +
   18.10  #else  /* !CONFIG_DM */
   18.11  
   18.12      phys_ram_base = qemu_vmalloc(phys_ram_size);
    19.1 --- a/tools/ioemu/vl.h	Fri Mar 30 17:18:42 2007 -0600
    19.2 +++ b/tools/ioemu/vl.h	Tue Apr 03 13:04:51 2007 -0600
    19.3 @@ -1276,6 +1276,12 @@ int xenstore_unsubscribe_from_hotplug_st
    19.4                                               const char *inst,
    19.5                                               const char *token);
    19.6  
    19.7 +int xenstore_vm_write(int domid, char *key, char *val);
    19.8 +char *xenstore_vm_read(int domid, char *key, int *len);
    19.9 +
   19.10 +/* helper2.c */
   19.11 +extern long time_offset;
   19.12 +void timeoffset_get(void);
   19.13  
   19.14  /* xen_platform.c */
   19.15  void pci_xen_platform_init(PCIBus *bus);
    20.1 --- a/tools/ioemu/xenstore.c	Fri Mar 30 17:18:42 2007 -0600
    20.2 +++ b/tools/ioemu/xenstore.c	Tue Apr 03 13:04:51 2007 -0600
    20.3 @@ -567,3 +567,72 @@ int xenstore_unsubscribe_from_hotplug_st
    20.4  
    20.5      return rc;
    20.6  }
    20.7 +
    20.8 +char *xenstore_vm_read(int domid, char *key, int *len)
    20.9 +{
   20.10 +    char *buf = NULL, *path = NULL, *value = NULL;
   20.11 +
   20.12 +    if (xsh == NULL)
   20.13 +	goto out;
   20.14 +
   20.15 +    path = xs_get_domain_path(xsh, domid);
   20.16 +    if (path == NULL) {
   20.17 +	fprintf(logfile, "xs_get_domain_path(%d): error\n", domid);
   20.18 +	goto out;
   20.19 +    }
   20.20 +
   20.21 +    pasprintf(&buf, "%s/vm", path);
   20.22 +    free(path);
   20.23 +    path = xs_read(xsh, XBT_NULL, buf, NULL);
   20.24 +    if (path == NULL) {
   20.25 +	fprintf(logfile, "xs_read(%s): read error\n", buf);
   20.26 +	goto out;
   20.27 +    }
   20.28 +
   20.29 +    pasprintf(&buf, "%s/%s", path, key);
   20.30 +    value = xs_read(xsh, XBT_NULL, buf, len);
   20.31 +    if (value == NULL) {
   20.32 +	fprintf(logfile, "xs_read(%s): read error\n", buf);
   20.33 +	goto out;
   20.34 +    }
   20.35 +
   20.36 + out:
   20.37 +    free(path);
   20.38 +    free(buf);
   20.39 +    return value;
   20.40 +}
   20.41 +
   20.42 +int xenstore_vm_write(int domid, char *key, char *value)
   20.43 +{
   20.44 +    char *buf = NULL, *path = NULL;
   20.45 +    int rc = -1;
   20.46 +
   20.47 +    if (xsh == NULL)
   20.48 +	goto out;
   20.49 +
   20.50 +    path = xs_get_domain_path(xsh, domid);
   20.51 +    if (path == NULL) {
   20.52 +	fprintf(logfile, "xs_get_domain_path(%d): error\n");
   20.53 +	goto out;
   20.54 +    }
   20.55 +
   20.56 +    pasprintf(&buf, "%s/vm", path);
   20.57 +    free(path);
   20.58 +    path = xs_read(xsh, XBT_NULL, buf, NULL);
   20.59 +    if (path == NULL) {
   20.60 +	fprintf(logfile, "xs_read(%s): read error\n", buf);
   20.61 +	goto out;
   20.62 +    }
   20.63 +
   20.64 +    pasprintf(&buf, "%s/%s", path, key);
   20.65 +    rc = xs_write(xsh, XBT_NULL, buf, value, strlen(value));
   20.66 +    if (rc) {
   20.67 +	fprintf(logfile, "xs_write(%s, %s): write error\n", buf, key);
   20.68 +	goto out;
   20.69 +    }
   20.70 +
   20.71 + out:
   20.72 +    free(path);
   20.73 +    free(buf);
   20.74 +    return rc;
   20.75 +}
    21.1 --- a/tools/libxc/ia64/xc_ia64_linux_restore.c	Fri Mar 30 17:18:42 2007 -0600
    21.2 +++ b/tools/libxc/ia64/xc_ia64_linux_restore.c	Tue Apr 03 13:04:51 2007 -0600
    21.3 @@ -14,8 +14,14 @@
    21.4  
    21.5  #define PFN_TO_KB(_pfn) ((_pfn) << (PAGE_SHIFT - 10))
    21.6  
    21.7 -/* total number of pages used by the current guest */
    21.8 -static unsigned long max_pfn;
    21.9 +/* number of pfns this guest has (i.e. number of entries in the P2M) */
   21.10 +static unsigned long p2m_size;
   21.11 +
   21.12 +/* number of 'in use' pfns in the guest (i.e. #P2M entries with a valid mfn) */
   21.13 +static unsigned long nr_pfns;
   21.14 +
   21.15 +/* largest possible value of nr_pfns (i.e. domain's maximum memory size) */
   21.16 +static unsigned long max_nr_pfns;
   21.17  
   21.18  static ssize_t
   21.19  read_exact(int fd, void *buf, size_t count)
   21.20 @@ -57,9 +63,9 @@ read_page(int xc_handle, int io_fd, uint
   21.21  
   21.22  int
   21.23  xc_linux_restore(int xc_handle, int io_fd, uint32_t dom,
   21.24 -                 unsigned long nr_pfns, unsigned int store_evtchn,
   21.25 -                 unsigned long *store_mfn, unsigned int console_evtchn,
   21.26 -                 unsigned long *console_mfn)
   21.27 +                 unsigned long p2msize, unsigned long maxnrpfns,
   21.28 +                 unsigned int store_evtchn, unsigned long *store_mfn,
   21.29 +                 unsigned int console_evtchn, unsigned long *console_mfn)
   21.30  {
   21.31      DECLARE_DOMCTL;
   21.32      int rc = 1, i;
   21.33 @@ -79,10 +85,13 @@ xc_linux_restore(int xc_handle, int io_f
   21.34      /* A temporary mapping of the guest's start_info page. */
   21.35      start_info_t *start_info;
   21.36  
   21.37 -    max_pfn = nr_pfns;
   21.38 +    p2m_size = p2msize;
   21.39 +    max_nr_pfns = maxnrpfns;
   21.40  
   21.41 -    DPRINTF("xc_linux_restore start: max_pfn = %ld\n", max_pfn);
   21.42 +    /* For info only */
   21.43 +    nr_pfns = 0;
   21.44  
   21.45 +    DPRINTF("xc_linux_restore start: p2m_size = %lx\n", p2m_size);
   21.46  
   21.47      if (!read_exact(io_fd, &ver, sizeof(unsigned long))) {
   21.48  	ERROR("Error when reading version");
   21.49 @@ -99,29 +108,29 @@ xc_linux_restore(int xc_handle, int io_f
   21.50          return 1;
   21.51      }
   21.52  
   21.53 -    if (xc_domain_setmaxmem(xc_handle, dom, PFN_TO_KB(max_pfn)) != 0) {
   21.54 +    if (xc_domain_setmaxmem(xc_handle, dom, PFN_TO_KB(max_nr_pfns)) != 0) {
   21.55          errno = ENOMEM;
   21.56          goto out;
   21.57      }
   21.58  
   21.59      /* Get pages.  */
   21.60 -    page_array = malloc(max_pfn * sizeof(unsigned long));
   21.61 +    page_array = malloc(p2m_size * sizeof(unsigned long));
   21.62      if (page_array == NULL) {
   21.63          ERROR("Could not allocate memory");
   21.64          goto out;
   21.65      }
   21.66  
   21.67 -    for ( i = 0; i < max_pfn; i++ )
   21.68 +    for ( i = 0; i < p2m_size; i++ )
   21.69          page_array[i] = i;
   21.70  
   21.71 -    if ( xc_domain_memory_populate_physmap(xc_handle, dom, max_pfn,
   21.72 +    if ( xc_domain_memory_populate_physmap(xc_handle, dom, p2m_size,
   21.73                                             0, 0, page_array) )
   21.74      {
   21.75          ERROR("Failed to allocate memory for %ld KB to dom %d.\n",
   21.76 -              PFN_TO_KB(max_pfn), dom);
   21.77 +              PFN_TO_KB(p2m_size), dom);
   21.78          goto out;
   21.79      }
   21.80 -    DPRINTF("Allocated memory by %ld KB\n", PFN_TO_KB(max_pfn));
   21.81 +    DPRINTF("Allocated memory by %ld KB\n", PFN_TO_KB(p2m_size));
   21.82  
   21.83      if (!read_exact(io_fd, &domctl.u.arch_setup, sizeof(domctl.u.arch_setup))) {
   21.84          ERROR("read: domain setup");
   21.85 @@ -131,9 +140,9 @@ xc_linux_restore(int xc_handle, int io_f
   21.86      /* Build firmware (will be overwritten).  */
   21.87      domctl.domain = (domid_t)dom;
   21.88      domctl.u.arch_setup.flags &= ~XEN_DOMAINSETUP_query;
   21.89 -    domctl.u.arch_setup.bp = ((nr_pfns - 3) << PAGE_SHIFT)
   21.90 +    domctl.u.arch_setup.bp = ((p2m_size - 3) << PAGE_SHIFT)
   21.91                             + sizeof (start_info_t);
   21.92 -    domctl.u.arch_setup.maxmem = (nr_pfns - 3) << PAGE_SHIFT;
   21.93 +    domctl.u.arch_setup.maxmem = (p2m_size - 3) << PAGE_SHIFT;
   21.94      
   21.95      domctl.cmd = XEN_DOMCTL_arch_setup;
   21.96      if (xc_domctl(xc_handle, &domctl))
   21.97 @@ -158,8 +167,6 @@ xc_linux_restore(int xc_handle, int io_f
   21.98  	if (gmfn == INVALID_MFN)
   21.99  		break;
  21.100  
  21.101 -       //DPRINTF("xc_linux_restore: page %lu/%lu at %lx\n", gmfn, max_pfn, pfn);
  21.102 -
  21.103  	if (read_page(xc_handle, io_fd, dom, gmfn) < 0)
  21.104  		goto out;
  21.105      }
  21.106 @@ -281,7 +288,7 @@ xc_linux_restore(int xc_handle, int io_f
  21.107      /* Uncanonicalise the suspend-record frame number and poke resume rec. */
  21.108      start_info = xc_map_foreign_range(xc_handle, dom, PAGE_SIZE,
  21.109                                        PROT_READ | PROT_WRITE, gmfn);
  21.110 -    start_info->nr_pages = max_pfn;
  21.111 +    start_info->nr_pages = p2m_size;
  21.112      start_info->shared_info = shared_info_frame << PAGE_SHIFT;
  21.113      start_info->flags = 0;
  21.114      *store_mfn = start_info->store_mfn;
    22.1 --- a/tools/libxc/xc_core.c	Fri Mar 30 17:18:42 2007 -0600
    22.2 +++ b/tools/libxc/xc_core.c	Tue Apr 03 13:04:51 2007 -0600
    22.3 @@ -312,7 +312,7 @@ xc_domain_dumpcore_via_callback(int xc_h
    22.4  
    22.5      int auto_translated_physmap;
    22.6      xen_pfn_t *p2m = NULL;
    22.7 -    unsigned long max_pfn = 0;
    22.8 +    unsigned long p2m_size = 0;
    22.9      struct xen_dumpcore_p2m *p2m_array = NULL;
   22.10  
   22.11      uint64_t *pfn_array = NULL;
   22.12 @@ -396,7 +396,7 @@ xc_domain_dumpcore_via_callback(int xc_h
   22.13          }
   22.14  
   22.15          sts = xc_core_arch_map_p2m(xc_handle, &info, live_shinfo,
   22.16 -                                   &p2m, &max_pfn);
   22.17 +                                   &p2m, &p2m_size);
   22.18          if ( sts != 0 )
   22.19              goto out;
   22.20      }
    23.1 --- a/tools/libxc/xc_core_x86.c	Fri Mar 30 17:18:42 2007 -0600
    23.2 +++ b/tools/libxc/xc_core_x86.c	Tue Apr 03 13:04:51 2007 -0600
    23.3 @@ -38,7 +38,7 @@ xc_core_arch_memory_map_get(int xc_handl
    23.4                              xc_core_memory_map_t **mapp,
    23.5                              unsigned int *nr_entries)
    23.6  {
    23.7 -    unsigned long max_pfn = max_gpfn(xc_handle, info->domid);
    23.8 +    unsigned long p2m_size = max_gpfn(xc_handle, info->domid);
    23.9      xc_core_memory_map_t *map;
   23.10  
   23.11      map = malloc(sizeof(*map));
   23.12 @@ -49,7 +49,7 @@ xc_core_arch_memory_map_get(int xc_handl
   23.13      }
   23.14  
   23.15      map->addr = 0;
   23.16 -    map->size = max_pfn << PAGE_SHIFT;
   23.17 +    map->size = p2m_size << PAGE_SHIFT;
   23.18  
   23.19      *mapp = map;
   23.20      *nr_entries = 1;
   23.21 @@ -65,13 +65,13 @@ xc_core_arch_map_p2m(int xc_handle, xc_d
   23.22      xen_pfn_t *live_p2m_frame_list_list = NULL;
   23.23      xen_pfn_t *live_p2m_frame_list = NULL;
   23.24      uint32_t dom = info->domid;
   23.25 -    unsigned long max_pfn = max_gpfn(xc_handle, info->domid);
   23.26 +    unsigned long p2m_size = max_gpfn(xc_handle, info->domid);
   23.27      int ret = -1;
   23.28      int err;
   23.29  
   23.30 -    if ( max_pfn < info->nr_pages  )
   23.31 +    if ( p2m_size < info->nr_pages  )
   23.32      {
   23.33 -        ERROR("max_pfn < nr_pages -1 (%lx < %lx", max_pfn, info->nr_pages - 1);
   23.34 +        ERROR("p2m_size < nr_pages -1 (%lx < %lx", p2m_size, info->nr_pages - 1);
   23.35          goto out;
   23.36      }
   23.37  
   23.38 @@ -106,7 +106,7 @@ xc_core_arch_map_p2m(int xc_handle, xc_d
   23.39          goto out;
   23.40      }
   23.41  
   23.42 -    *pfnp = max_pfn;
   23.43 +    *pfnp = p2m_size;
   23.44  
   23.45      ret = 0;
   23.46  
    24.1 --- a/tools/libxc/xc_hvm_restore.c	Fri Mar 30 17:18:42 2007 -0600
    24.2 +++ b/tools/libxc/xc_hvm_restore.c	Tue Apr 03 13:04:51 2007 -0600
    24.3 @@ -95,7 +95,7 @@ int xc_hvm_restore(int xc_handle, int io
    24.4      unsigned long pfn_array_size = max_pfn + 1;
    24.5  
    24.6      /* Number of pages of memory the guest has.  *Not* the same as max_pfn. */
    24.7 -    unsigned long nr_pages = max_pfn + 1;
    24.8 +    unsigned long nr_pages = max_pfn;
    24.9      /* MMIO hole doesn't contain RAM */
   24.10      if ( nr_pages >= HVM_BELOW_4G_MMIO_START >> PAGE_SHIFT ) 
   24.11          nr_pages -= HVM_BELOW_4G_MMIO_LENGTH >> PAGE_SHIFT; 
   24.12 @@ -270,7 +270,6 @@ int xc_hvm_restore(int xc_handle, int io
   24.13  
   24.14      }/*while 1*/
   24.15      
   24.16 -/*    xc_set_hvm_param(xc_handle, dom, HVM_PARAM_APIC_ENABLED, apic);*/
   24.17      xc_set_hvm_param(xc_handle, dom, HVM_PARAM_PAE_ENABLED, pae);
   24.18      xc_set_hvm_param(xc_handle, dom, HVM_PARAM_STORE_EVTCHN, store_evtchn);
   24.19  
   24.20 @@ -279,13 +278,22 @@ int xc_hvm_restore(int xc_handle, int io
   24.21      else
   24.22          shared_page_nr = (v_end >> PAGE_SHIFT) - 1;
   24.23  
   24.24 +    /* Ensure we clear these pages */
   24.25 +    if ( xc_clear_domain_page(xc_handle, dom, shared_page_nr) ||
   24.26 +         xc_clear_domain_page(xc_handle, dom, shared_page_nr-1) ||
   24.27 +         xc_clear_domain_page(xc_handle, dom, shared_page_nr-2) ) {
   24.28 +        rc = -1;
   24.29 +        goto out;
   24.30 +    }
   24.31 +
   24.32      xc_set_hvm_param(xc_handle, dom, HVM_PARAM_STORE_PFN, shared_page_nr-1);
   24.33      xc_set_hvm_param(xc_handle, dom, HVM_PARAM_BUFIOREQ_PFN, shared_page_nr-2);
   24.34      xc_set_hvm_param(xc_handle, dom, HVM_PARAM_IOREQ_PFN, shared_page_nr);
   24.35  
   24.36      /* caculate the store_mfn , wrong val cause hang when introduceDomain */
   24.37      *store_mfn = (v_end >> PAGE_SHIFT) - 2;
   24.38 -    DPRINTF("hvm restore:calculate new store_mfn=0x%lx,v_end=0x%llx..\n", *store_mfn, v_end);
   24.39 +    DPRINTF("hvm restore: calculate new store_mfn=0x%lx, v_end=0x%llx.\n", 
   24.40 +            *store_mfn, v_end);
   24.41  
   24.42      if (!read_exact(io_fd, &nr_vcpus, sizeof(uint32_t))) {
   24.43          ERROR("error read nr vcpu !\n");
    25.1 --- a/tools/libxc/xc_hvm_save.c	Fri Mar 30 17:18:42 2007 -0600
    25.2 +++ b/tools/libxc/xc_hvm_save.c	Tue Apr 03 13:04:51 2007 -0600
    25.3 @@ -332,10 +332,10 @@ int xc_hvm_save(int xc_handle, int io_fd
    25.4  
    25.5      unsigned long total_sent    = 0;
    25.6  
    25.7 -    DPRINTF("xc_hvm_save:dom=%d, max_iters=%d, max_factor=%d, flags=0x%x, live=%d, debug=%d.\n",
    25.8 -            dom, max_iters, max_factor, flags,
    25.9 +    DPRINTF("xc_hvm_save: dom=%d, max_iters=%d, max_factor=%d, flags=0x%x, "
   25.10 +            "live=%d, debug=%d.\n", dom, max_iters, max_factor, flags,
   25.11              live, debug);
   25.12 -
   25.13 +    
   25.14      /* If no explicit control parameters given, use defaults */
   25.15      if(!max_iters)
   25.16          max_iters = DEF_MAX_ITERS;
   25.17 @@ -382,7 +382,6 @@ int xc_hvm_save(int xc_handle, int io_fd
   25.18          ERROR("HVM: Could not read magic PFN parameters");
   25.19          goto out;
   25.20      }
   25.21 -
   25.22      DPRINTF("saved hvm domain info:max_memkb=0x%lx, max_mfn=0x%lx, "
   25.23              "nr_pages=0x%lx\n", info.max_memkb, max_mfn, info.nr_pages); 
   25.24  
    26.1 --- a/tools/libxc/xc_linux.c	Fri Mar 30 17:18:42 2007 -0600
    26.2 +++ b/tools/libxc/xc_linux.c	Tue Apr 03 13:04:51 2007 -0600
    26.3 @@ -3,6 +3,9 @@
    26.4   * Copyright 2006 Sun Microsystems, Inc.  All rights reserved.
    26.5   * Use is subject to license terms.
    26.6   *
    26.7 + * xc_gnttab functions:
    26.8 + * Copyright (c) 2007, D G Murray <Derek.Murray@cl.cam.ac.uk>
    26.9 + *
   26.10   * This program is free software; you can redistribute it and/or
   26.11   * modify it under the terms of the GNU General Public License as
   26.12   * published by the Free Software Foundation, version 2 of the
   26.13 @@ -13,6 +16,7 @@
   26.14  
   26.15  #include <xen/memory.h>
   26.16  #include <xen/sys/evtchn.h>
   26.17 +#include <xen/sys/gntdev.h>
   26.18  #include <unistd.h>
   26.19  #include <fcntl.h>
   26.20  
   26.21 @@ -363,6 +367,158 @@ void discard_file_cache(int fd, int flus
   26.22      errno = saved_errno;
   26.23  }
   26.24  
   26.25 +#define GNTTAB_DEV_NAME "/dev/xen/gntdev"
   26.26 +
   26.27 +int xc_gnttab_open(void)
   26.28 +{
   26.29 +    struct stat st;
   26.30 +    int fd;
   26.31 +    int devnum;
   26.32 +    
   26.33 +    devnum = xc_find_device_number("gntdev");
   26.34 +    
   26.35 +    /* Make sure any existing device file links to correct device. */
   26.36 +    if ( (lstat(GNTTAB_DEV_NAME, &st) != 0) || !S_ISCHR(st.st_mode) ||
   26.37 +         (st.st_rdev != devnum) )
   26.38 +        (void)unlink(GNTTAB_DEV_NAME);
   26.39 +    
   26.40 +reopen:
   26.41 +    if ( (fd = open(GNTTAB_DEV_NAME, O_RDWR)) == -1 )
   26.42 +    {
   26.43 +        if ( (errno == ENOENT) &&
   26.44 +             ((mkdir("/dev/xen", 0755) == 0) || (errno == EEXIST)) &&
   26.45 +             (mknod(GNTTAB_DEV_NAME, S_IFCHR|0600, devnum) == 0) )
   26.46 +            goto reopen;
   26.47 +        
   26.48 +        PERROR("Could not open grant table interface");
   26.49 +        return -1;
   26.50 +    }
   26.51 +    
   26.52 +    return fd;
   26.53 +}
   26.54 +
   26.55 +int xc_gnttab_close(int xcg_handle)
   26.56 +{
   26.57 +    return close(xcg_handle);
   26.58 +}
   26.59 +
   26.60 +void *xc_gnttab_map_grant_ref(int xcg_handle,
   26.61 +                              uint32_t domid,
   26.62 +                              uint32_t ref,
   26.63 +                              int prot)
   26.64 +{
   26.65 +    struct ioctl_gntdev_map_grant_ref map;
   26.66 +    void *addr;
   26.67 +    
   26.68 +    map.count = 1;
   26.69 +    map.refs[0].domid = domid;
   26.70 +    map.refs[0].ref   = ref;
   26.71 +
   26.72 +    if ( ioctl(xcg_handle, IOCTL_GNTDEV_MAP_GRANT_REF, &map) )
   26.73 +        return NULL;
   26.74 +    
   26.75 +    addr = mmap(NULL, PAGE_SIZE, prot, MAP_SHARED, xcg_handle, map.index);
   26.76 +    if ( addr == MAP_FAILED )
   26.77 +    {
   26.78 +        int saved_errno = errno;
   26.79 +        struct ioctl_gntdev_unmap_grant_ref unmap_grant;
   26.80 +        /* Unmap the driver slots used to store the grant information. */
   26.81 +        unmap_grant.index = map.index;
   26.82 +        unmap_grant.count = 1;
   26.83 +        ioctl(xcg_handle, IOCTL_GNTDEV_UNMAP_GRANT_REF, &unmap_grant);
   26.84 +        errno = saved_errno;
   26.85 +        return NULL;
   26.86 +    }
   26.87 +    
   26.88 +    return addr;
   26.89 +}
   26.90 +
   26.91 +void *xc_gnttab_map_grant_refs(int xcg_handle,
   26.92 +                               uint32_t count,
   26.93 +                               uint32_t *domids,
   26.94 +                               uint32_t *refs,
   26.95 +                               int prot)
   26.96 +{
   26.97 +    struct ioctl_gntdev_map_grant_ref *map;
   26.98 +    void *addr = NULL;
   26.99 +    int i;
  26.100 +    
  26.101 +    map = malloc(sizeof(*map) +
  26.102 +                 (count-1) * sizeof(struct ioctl_gntdev_map_grant_ref));
  26.103 +    if ( map == NULL )
  26.104 +        return NULL;
  26.105 +
  26.106 +    for ( i = 0; i < count; i++ )
  26.107 +    {
  26.108 +        map->refs[i].domid = domids[i];
  26.109 +        map->refs[i].ref   = refs[i];
  26.110 +    }
  26.111 +
  26.112 +    map->count = count;
  26.113 +    
  26.114 +    if ( ioctl(xcg_handle, IOCTL_GNTDEV_MAP_GRANT_REF, &map) )
  26.115 +        goto out;
  26.116 +
  26.117 +    addr = mmap(NULL, PAGE_SIZE * count, prot, MAP_SHARED, xcg_handle,
  26.118 +                map->index);
  26.119 +    if ( addr == MAP_FAILED )
  26.120 +    {
  26.121 +        int saved_errno = errno;
  26.122 +        struct ioctl_gntdev_unmap_grant_ref unmap_grant;
  26.123 +        /* Unmap the driver slots used to store the grant information. */
  26.124 +        unmap_grant.index = map->index;
  26.125 +        unmap_grant.count = count;
  26.126 +        ioctl(xcg_handle, IOCTL_GNTDEV_UNMAP_GRANT_REF, &unmap_grant);
  26.127 +        errno = saved_errno;
  26.128 +        addr = NULL;
  26.129 +    }
  26.130 +
  26.131 + out:
  26.132 +    free(map);
  26.133 +    return addr;
  26.134 +}
  26.135 +
  26.136 +int xc_gnttab_munmap(int xcg_handle,
  26.137 +                     void *start_address,
  26.138 +                     uint32_t count)
  26.139 +{
  26.140 +    struct ioctl_gntdev_get_offset_for_vaddr get_offset;
  26.141 +    struct ioctl_gntdev_unmap_grant_ref unmap_grant;
  26.142 +    int rc;
  26.143 +
  26.144 +    if ( start_address == NULL )
  26.145 +    {
  26.146 +        errno = EINVAL;
  26.147 +        return -1;
  26.148 +    }
  26.149 +
  26.150 +    /* First, it is necessary to get the offset which was initially used to
  26.151 +     * mmap() the pages.
  26.152 +     */
  26.153 +    get_offset.vaddr = (unsigned long)start_address;
  26.154 +    if ( (rc = ioctl(xcg_handle, IOCTL_GNTDEV_GET_OFFSET_FOR_VADDR, 
  26.155 +                     &get_offset)) )
  26.156 +        return rc;
  26.157 +
  26.158 +    if ( get_offset.count != count )
  26.159 +    {
  26.160 +        errno = EINVAL;
  26.161 +        return -1;
  26.162 +    }
  26.163 +
  26.164 +    /* Next, unmap the memory. */
  26.165 +    if ( (rc = munmap(start_address, count * getpagesize())) )
  26.166 +        return rc;
  26.167 +    
  26.168 +    /* Finally, unmap the driver slots used to store the grant information. */
  26.169 +    unmap_grant.index = get_offset.offset;
  26.170 +    unmap_grant.count = count;
  26.171 +    if ( (rc = ioctl(xcg_handle, IOCTL_GNTDEV_UNMAP_GRANT_REF, &unmap_grant)) )
  26.172 +        return rc;
  26.173 +
  26.174 +    return 0;
  26.175 +}
  26.176 +
  26.177  /*
  26.178   * Local variables:
  26.179   * mode: C
    27.1 --- a/tools/libxc/xc_linux_restore.c	Fri Mar 30 17:18:42 2007 -0600
    27.2 +++ b/tools/libxc/xc_linux_restore.c	Tue Apr 03 13:04:51 2007 -0600
    27.3 @@ -22,8 +22,14 @@ static unsigned long hvirt_start;
    27.4  /* #levels of page tables used by the current guest */
    27.5  static unsigned int pt_levels;
    27.6  
    27.7 -/* total number of pages used by the current guest */
    27.8 -static unsigned long max_pfn;
    27.9 +/* number of pfns this guest has (i.e. number of entries in the P2M) */
   27.10 +static unsigned long p2m_size;
   27.11 +
   27.12 +/* number of 'in use' pfns in the guest (i.e. #P2M entries with a valid mfn) */
   27.13 +static unsigned long nr_pfns;
   27.14 +
   27.15 +/* largest possible value of nr_pfns (i.e. domain's maximum memory size) */
   27.16 +static unsigned long max_nr_pfns;
   27.17  
   27.18  /* Live mapping of the table mapping each PFN to its current MFN. */
   27.19  static xen_pfn_t *live_p2m = NULL;
   27.20 @@ -34,7 +40,6 @@ static xen_pfn_t *p2m = NULL;
   27.21  /* A table of P2M mappings in the current region */
   27.22  static xen_pfn_t *p2m_batch = NULL;
   27.23  
   27.24 -
   27.25  static ssize_t
   27.26  read_exact(int fd, void *buf, size_t count)
   27.27  {
   27.28 @@ -85,11 +90,11 @@ static int uncanonicalize_pagetable(int 
   27.29          
   27.30          pfn = (pte >> PAGE_SHIFT) & MFN_MASK_X86;
   27.31          
   27.32 -        if(pfn >= max_pfn) {
   27.33 +        if(pfn >= p2m_size) {
   27.34              /* This "page table page" is probably not one; bail. */
   27.35              ERROR("Frame number in type %lu page table is out of range: "
   27.36 -                  "i=%d pfn=0x%lx max_pfn=%lu",
   27.37 -                  type >> 28, i, pfn, max_pfn);
   27.38 +                  "i=%d pfn=0x%lx p2m_size=%lu",
   27.39 +                  type >> 28, i, pfn, p2m_size);
   27.40              return 0;
   27.41          }
   27.42          
   27.43 @@ -138,8 +143,9 @@ static int uncanonicalize_pagetable(int 
   27.44      return 1;
   27.45  }
   27.46  
   27.47 -int xc_linux_restore(int xc_handle, int io_fd,
   27.48 -                     uint32_t dom, unsigned long nr_pfns,
   27.49 +
   27.50 +int xc_linux_restore(int xc_handle, int io_fd, uint32_t dom,
   27.51 +                     unsigned long p2msize, unsigned long maxnrpfns,
   27.52                       unsigned int store_evtchn, unsigned long *store_mfn,
   27.53                       unsigned int console_evtchn, unsigned long *console_mfn)
   27.54  {
   27.55 @@ -191,9 +197,13 @@ int xc_linux_restore(int xc_handle, int 
   27.56      unsigned int max_vcpu_id = 0;
   27.57      int new_ctxt_format = 0;
   27.58  
   27.59 -    max_pfn = nr_pfns;
   27.60 +    p2m_size    = p2msize;
   27.61 +    max_nr_pfns = maxnrpfns;
   27.62  
   27.63 -    DPRINTF("xc_linux_restore start: max_pfn = %lx\n", max_pfn);
   27.64 +    /* For info only */
   27.65 +    nr_pfns = 0;
   27.66 +
   27.67 +    DPRINTF("xc_linux_restore start: p2m_size = %lx\n", p2m_size);
   27.68  
   27.69      /*
   27.70       * XXX For now, 32bit dom0's can only save/restore 32bit domUs
   27.71 @@ -294,8 +304,8 @@ int xc_linux_restore(int xc_handle, int 
   27.72      }
   27.73  
   27.74      /* We want zeroed memory so use calloc rather than malloc. */
   27.75 -    p2m        = calloc(max_pfn, sizeof(xen_pfn_t));
   27.76 -    pfn_type   = calloc(max_pfn, sizeof(unsigned long));
   27.77 +    p2m        = calloc(p2m_size, sizeof(xen_pfn_t));
   27.78 +    pfn_type   = calloc(p2m_size, sizeof(unsigned long));
   27.79      region_mfn = calloc(MAX_BATCH_SIZE, sizeof(xen_pfn_t));
   27.80      p2m_batch  = calloc(MAX_BATCH_SIZE, sizeof(xen_pfn_t));
   27.81  
   27.82 @@ -325,13 +335,13 @@ int xc_linux_restore(int xc_handle, int 
   27.83      }
   27.84      shared_info_frame = domctl.u.getdomaininfo.shared_info_frame;
   27.85  
   27.86 -    if (xc_domain_setmaxmem(xc_handle, dom, PFN_TO_KB(max_pfn)) != 0) {
   27.87 +    if (xc_domain_setmaxmem(xc_handle, dom, PFN_TO_KB(max_nr_pfns)) != 0) {
   27.88          errno = ENOMEM;
   27.89          goto out;
   27.90      }
   27.91  
   27.92      /* Mark all PFNs as invalid; we allocate on demand */
   27.93 -    for ( pfn = 0; pfn < max_pfn; pfn++ )
   27.94 +    for ( pfn = 0; pfn < p2m_size; pfn++ )
   27.95          p2m[pfn] = INVALID_P2M_ENTRY;
   27.96  
   27.97      if(!(mmu = xc_init_mmu_updates(xc_handle, dom))) {
   27.98 @@ -352,7 +362,7 @@ int xc_linux_restore(int xc_handle, int 
   27.99  
  27.100          int j, nr_mfns = 0; 
  27.101  
  27.102 -        this_pc = (n * 100) / max_pfn;
  27.103 +        this_pc = (n * 100) / p2m_size;
  27.104          if ( (this_pc - prev_pc) >= 5 )
  27.105          {
  27.106              PPRINTF("\b\b\b\b%3d%%", this_pc);
  27.107 @@ -436,6 +446,7 @@ int xc_linux_restore(int xc_handle, int 
  27.108                  if (p2m[pfn] == INVALID_P2M_ENTRY) {
  27.109                      /* We just allocated a new mfn above; update p2m */
  27.110                      p2m[pfn] = p2m_batch[nr_mfns++]; 
  27.111 +                    nr_pfns++; 
  27.112                  }
  27.113  
  27.114                  /* setup region_mfn[] for batch map */
  27.115 @@ -465,7 +476,7 @@ int xc_linux_restore(int xc_handle, int 
  27.116                  /* a bogus/unmapped page: skip it */
  27.117                  continue;
  27.118  
  27.119 -            if ( pfn > max_pfn )
  27.120 +            if ( pfn > p2m_size )
  27.121              {
  27.122                  ERROR("pfn out of range");
  27.123                  goto out;
  27.124 @@ -518,7 +529,7 @@ int xc_linux_restore(int xc_handle, int 
  27.125              else if ( pagetype != XEN_DOMCTL_PFINFO_NOTAB )
  27.126              {
  27.127                  ERROR("Bogus page type %lx page table is out of range: "
  27.128 -                    "i=%d max_pfn=%lu", pagetype, i, max_pfn);
  27.129 +                    "i=%d p2m_size=%lu", pagetype, i, p2m_size);
  27.130                  goto out;
  27.131  
  27.132              }
  27.133 @@ -598,7 +609,7 @@ int xc_linux_restore(int xc_handle, int 
  27.134          int j, k;
  27.135          
  27.136          /* First pass: find all L3TABs current in > 4G mfns and get new mfns */
  27.137 -        for ( i = 0; i < max_pfn; i++ )
  27.138 +        for ( i = 0; i < p2m_size; i++ )
  27.139          {
  27.140              if ( ((pfn_type[i] & XEN_DOMCTL_PFINFO_LTABTYPE_MASK) ==
  27.141                    XEN_DOMCTL_PFINFO_L3TAB) &&
  27.142 @@ -646,7 +657,7 @@ int xc_linux_restore(int xc_handle, int 
  27.143          /* Second pass: find all L1TABs and uncanonicalize them */
  27.144          j = 0;
  27.145  
  27.146 -        for ( i = 0; i < max_pfn; i++ )
  27.147 +        for ( i = 0; i < p2m_size; i++ )
  27.148          {
  27.149              if ( ((pfn_type[i] & XEN_DOMCTL_PFINFO_LTABTYPE_MASK) ==
  27.150                    XEN_DOMCTL_PFINFO_L1TAB) )
  27.151 @@ -655,7 +666,7 @@ int xc_linux_restore(int xc_handle, int 
  27.152                  j++;
  27.153              }
  27.154  
  27.155 -            if(i == (max_pfn-1) || j == MAX_BATCH_SIZE) {
  27.156 +            if(i == (p2m_size-1) || j == MAX_BATCH_SIZE) {
  27.157  
  27.158                  if (!(region_base = xc_map_foreign_batch(
  27.159                            xc_handle, dom, PROT_READ | PROT_WRITE,
  27.160 @@ -689,7 +700,7 @@ int xc_linux_restore(int xc_handle, int 
  27.161       * will barf when doing the type-checking.
  27.162       */
  27.163      nr_pins = 0;
  27.164 -    for ( i = 0; i < max_pfn; i++ )
  27.165 +    for ( i = 0; i < p2m_size; i++ )
  27.166      {
  27.167          if ( (pfn_type[i] & XEN_DOMCTL_PFINFO_LPINTAB) == 0 )
  27.168              continue;
  27.169 @@ -736,7 +747,7 @@ int xc_linux_restore(int xc_handle, int 
  27.170      }
  27.171  
  27.172      DPRINTF("\b\b\b\b100%%\n");
  27.173 -    DPRINTF("Memory reloaded.\n");
  27.174 +    DPRINTF("Memory reloaded (%ld pages of max %ld)\n", nr_pfns, max_nr_pfns);
  27.175  
  27.176      /* Get the list of PFNs that are not in the psuedo-phys map */
  27.177      {
  27.178 @@ -808,7 +819,7 @@ int xc_linux_restore(int xc_handle, int 
  27.179               * resume record.
  27.180               */
  27.181              pfn = ctxt.user_regs.edx;
  27.182 -            if ((pfn >= max_pfn) ||
  27.183 +            if ((pfn >= p2m_size) ||
  27.184                  (pfn_type[pfn] != XEN_DOMCTL_PFINFO_NOTAB)) {
  27.185                  ERROR("Suspend record frame number is bad");
  27.186                  goto out;
  27.187 @@ -816,7 +827,7 @@ int xc_linux_restore(int xc_handle, int 
  27.188              ctxt.user_regs.edx = mfn = p2m[pfn];
  27.189              start_info = xc_map_foreign_range(
  27.190                  xc_handle, dom, PAGE_SIZE, PROT_READ | PROT_WRITE, mfn);
  27.191 -            start_info->nr_pages = max_pfn;
  27.192 +            start_info->nr_pages = p2m_size;
  27.193              start_info->shared_info = shared_info_frame << PAGE_SHIFT;
  27.194              start_info->flags = 0;
  27.195              *store_mfn = start_info->store_mfn = p2m[start_info->store_mfn];
  27.196 @@ -835,7 +846,7 @@ int xc_linux_restore(int xc_handle, int 
  27.197  
  27.198          for (j = 0; (512*j) < ctxt.gdt_ents; j++) {
  27.199              pfn = ctxt.gdt_frames[j];
  27.200 -            if ((pfn >= max_pfn) ||
  27.201 +            if ((pfn >= p2m_size) ||
  27.202                  (pfn_type[pfn] != XEN_DOMCTL_PFINFO_NOTAB)) {
  27.203                  ERROR("GDT frame number is bad");
  27.204                  goto out;
  27.205 @@ -846,16 +857,16 @@ int xc_linux_restore(int xc_handle, int 
  27.206          /* Uncanonicalise the page table base pointer. */
  27.207          pfn = xen_cr3_to_pfn(ctxt.ctrlreg[3]);
  27.208  
  27.209 -        if (pfn >= max_pfn) {
  27.210 -            ERROR("PT base is bad: pfn=%lu max_pfn=%lu type=%08lx",
  27.211 -                  pfn, max_pfn, pfn_type[pfn]);
  27.212 +        if (pfn >= p2m_size) {
  27.213 +            ERROR("PT base is bad: pfn=%lu p2m_size=%lu type=%08lx",
  27.214 +                  pfn, p2m_size, pfn_type[pfn]);
  27.215              goto out;
  27.216          }
  27.217  
  27.218          if ( (pfn_type[pfn] & XEN_DOMCTL_PFINFO_LTABTYPE_MASK) !=
  27.219               ((unsigned long)pt_levels<<XEN_DOMCTL_PFINFO_LTAB_SHIFT) ) {
  27.220              ERROR("PT base is bad. pfn=%lu nr=%lu type=%08lx %08lx",
  27.221 -                  pfn, max_pfn, pfn_type[pfn],
  27.222 +                  pfn, p2m_size, pfn_type[pfn],
  27.223                    (unsigned long)pt_levels<<XEN_DOMCTL_PFINFO_LTAB_SHIFT);
  27.224              goto out;
  27.225          }
  27.226 @@ -867,16 +878,16 @@ int xc_linux_restore(int xc_handle, int 
  27.227          {
  27.228              pfn = xen_cr3_to_pfn(ctxt.ctrlreg[1]);
  27.229  
  27.230 -            if (pfn >= max_pfn) {
  27.231 -                ERROR("User PT base is bad: pfn=%lu max_pfn=%lu type=%08lx",
  27.232 -                      pfn, max_pfn, pfn_type[pfn]);
  27.233 +            if (pfn >= p2m_size) {
  27.234 +                ERROR("User PT base is bad: pfn=%lu p2m_size=%lu type=%08lx",
  27.235 +                      pfn, p2m_size, pfn_type[pfn]);
  27.236                  goto out;
  27.237              }
  27.238  
  27.239              if ( (pfn_type[pfn] & XEN_DOMCTL_PFINFO_LTABTYPE_MASK) !=
  27.240                   ((unsigned long)pt_levels<<XEN_DOMCTL_PFINFO_LTAB_SHIFT) ) {
  27.241                  ERROR("User PT base is bad. pfn=%lu nr=%lu type=%08lx %08lx",
  27.242 -                      pfn, max_pfn, pfn_type[pfn],
  27.243 +                      pfn, p2m_size, pfn_type[pfn],
  27.244                        (unsigned long)pt_levels<<XEN_DOMCTL_PFINFO_LTAB_SHIFT);
  27.245                  goto out;
  27.246              }
  27.247 @@ -915,7 +926,7 @@ int xc_linux_restore(int xc_handle, int 
  27.248      /* Uncanonicalise the pfn-to-mfn table frame-number list. */
  27.249      for (i = 0; i < P2M_FL_ENTRIES; i++) {
  27.250          pfn = p2m_frame_list[i];
  27.251 -        if ((pfn >= max_pfn) || (pfn_type[pfn] != XEN_DOMCTL_PFINFO_NOTAB)) {
  27.252 +        if ((pfn >= p2m_size) || (pfn_type[pfn] != XEN_DOMCTL_PFINFO_NOTAB)) {
  27.253              ERROR("PFN-to-MFN frame number is bad");
  27.254              goto out;
  27.255          }
  27.256 @@ -930,8 +941,8 @@ int xc_linux_restore(int xc_handle, int 
  27.257          goto out;
  27.258      }
  27.259  
  27.260 -    memcpy(live_p2m, p2m, P2M_SIZE);
  27.261 -    munmap(live_p2m, P2M_SIZE);
  27.262 +    memcpy(live_p2m, p2m, ROUNDUP(p2m_size * sizeof(xen_pfn_t), PAGE_SHIFT));
  27.263 +    munmap(live_p2m, ROUNDUP(p2m_size * sizeof(xen_pfn_t), PAGE_SHIFT));
  27.264  
  27.265      DPRINTF("Domain ready to be built.\n");
  27.266  
    28.1 --- a/tools/libxc/xc_linux_save.c	Fri Mar 30 17:18:42 2007 -0600
    28.2 +++ b/tools/libxc/xc_linux_save.c	Tue Apr 03 13:04:51 2007 -0600
    28.3 @@ -25,7 +25,7 @@
    28.4  **
    28.5  */
    28.6  #define DEF_MAX_ITERS   29   /* limit us to 30 times round loop   */
    28.7 -#define DEF_MAX_FACTOR   3   /* never send more than 3x nr_pfns   */
    28.8 +#define DEF_MAX_FACTOR   3   /* never send more than 3x p2m_size  */
    28.9  
   28.10  
   28.11  /* max mfn of the whole machine */
   28.12 @@ -37,8 +37,8 @@ static unsigned long hvirt_start;
   28.13  /* #levels of page tables used by the current guest */
   28.14  static unsigned int pt_levels;
   28.15  
   28.16 -/* total number of pages used by the current guest */
   28.17 -static unsigned long max_pfn;
   28.18 +/* number of pfns this guest has (i.e. number of entries in the P2M) */
   28.19 +static unsigned long p2m_size;
   28.20  
   28.21  /* Live mapping of the table mapping each PFN to its current MFN. */
   28.22  static xen_pfn_t *live_p2m = NULL;
   28.23 @@ -57,7 +57,7 @@ static unsigned long m2p_mfn0;
   28.24   */
   28.25  #define MFN_IS_IN_PSEUDOPHYS_MAP(_mfn)          \
   28.26  (((_mfn) < (max_mfn)) &&                        \
   28.27 - ((mfn_to_pfn(_mfn) < (max_pfn)) &&               \
   28.28 + ((mfn_to_pfn(_mfn) < (p2m_size)) &&               \
   28.29    (live_p2m[mfn_to_pfn(_mfn)] == (_mfn))))
   28.30  
   28.31  
   28.32 @@ -79,7 +79,7 @@ static unsigned long m2p_mfn0;
   28.33  */
   28.34  
   28.35  #define BITS_PER_LONG (sizeof(unsigned long) * 8)
   28.36 -#define BITMAP_SIZE   ((max_pfn + BITS_PER_LONG - 1) / 8)
   28.37 +#define BITMAP_SIZE   ((p2m_size + BITS_PER_LONG - 1) / 8)
   28.38  
   28.39  #define BITMAP_ENTRY(_nr,_bmap) \
   28.40     ((volatile unsigned long *)(_bmap))[(_nr)/BITS_PER_LONG]
   28.41 @@ -343,7 +343,7 @@ static int print_stats(int xc_handle, ui
   28.42  }
   28.43  
   28.44  
   28.45 -static int analysis_phase(int xc_handle, uint32_t domid, int max_pfn,
   28.46 +static int analysis_phase(int xc_handle, uint32_t domid, int p2m_size,
   28.47                            unsigned long *arr, int runs)
   28.48  {
   28.49      long long start, now;
   28.50 @@ -356,7 +356,7 @@ static int analysis_phase(int xc_handle,
   28.51          int i;
   28.52  
   28.53          xc_shadow_control(xc_handle, domid, XEN_DOMCTL_SHADOW_OP_CLEAN,
   28.54 -                          arr, max_pfn, NULL, 0, NULL);
   28.55 +                          arr, p2m_size, NULL, 0, NULL);
   28.56          DPRINTF("#Flush\n");
   28.57          for ( i = 0; i < 40; i++ ) {
   28.58              usleep(50000);
   28.59 @@ -682,7 +682,7 @@ int xc_linux_save(int xc_handle, int io_
   28.60      /* base of the region in which domain memory is mapped */
   28.61      unsigned char *region_base = NULL;
   28.62  
   28.63 -    /* power of 2 order of max_pfn */
   28.64 +    /* power of 2 order of p2m_size */
   28.65      int order_nr;
   28.66  
   28.67      /* bitmap of pages:
   28.68 @@ -730,7 +730,7 @@ int xc_linux_save(int xc_handle, int io_
   28.69          goto out;
   28.70      }
   28.71  
   28.72 -    max_pfn = live_shinfo->arch.max_pfn;
   28.73 +    p2m_size = live_shinfo->arch.max_pfn;
   28.74  
   28.75      live_p2m_frame_list_list = map_frame_list_list(xc_handle, dom,
   28.76                                                     live_shinfo);
   28.77 @@ -777,7 +777,7 @@ int xc_linux_save(int xc_handle, int io_
   28.78      memcpy(p2m_frame_list, live_p2m_frame_list, P2M_FL_SIZE);
   28.79  
   28.80      /* Canonicalise the pfn-to-mfn table frame-number list. */
   28.81 -    for (i = 0; i < max_pfn; i += fpp) {
   28.82 +    for (i = 0; i < p2m_size; i += fpp) {
   28.83          if (!translate_mfn_to_pfn(&p2m_frame_list[i/fpp])) {
   28.84              ERROR("Frame# in pfn-to-mfn frame list is not in pseudophys");
   28.85              ERROR("entry %d: p2m_frame_list[%ld] is 0x%"PRIx64, i, i/fpp,
   28.86 @@ -813,12 +813,12 @@ int xc_linux_save(int xc_handle, int io_
   28.87      }
   28.88  
   28.89      /* pretend we sent all the pages last iteration */
   28.90 -    sent_last_iter = max_pfn;
   28.91 +    sent_last_iter = p2m_size;
   28.92  
   28.93  
   28.94 -    /* calculate the power of 2 order of max_pfn, e.g.
   28.95 +    /* calculate the power of 2 order of p2m_size, e.g.
   28.96         15->4 16->4 17->5 */
   28.97 -    for (i = max_pfn-1, order_nr = 0; i ; i >>= 1, order_nr++)
   28.98 +    for (i = p2m_size-1, order_nr = 0; i ; i >>= 1, order_nr++)
   28.99          continue;
  28.100  
  28.101      /* Setup to_send / to_fix and to_skip bitmaps */
  28.102 @@ -844,7 +844,7 @@ int xc_linux_save(int xc_handle, int io_
  28.103          return 1;
  28.104      }
  28.105  
  28.106 -    analysis_phase(xc_handle, dom, max_pfn, to_skip, 0);
  28.107 +    analysis_phase(xc_handle, dom, p2m_size, to_skip, 0);
  28.108  
  28.109      /* We want zeroed memory so use calloc rather than malloc. */
  28.110      pfn_type   = calloc(MAX_BATCH_SIZE, sizeof(*pfn_type));
  28.111 @@ -867,7 +867,7 @@ int xc_linux_save(int xc_handle, int io_
  28.112      {
  28.113          int err=0;
  28.114          unsigned long mfn;
  28.115 -        for (i = 0; i < max_pfn; i++) {
  28.116 +        for (i = 0; i < p2m_size; i++) {
  28.117  
  28.118              mfn = live_p2m[i];
  28.119              if((mfn != INVALID_P2M_ENTRY) && (mfn_to_pfn(mfn) != i)) {
  28.120 @@ -882,8 +882,8 @@ int xc_linux_save(int xc_handle, int io_
  28.121  
  28.122      /* Start writing out the saved-domain record. */
  28.123  
  28.124 -    if (!write_exact(io_fd, &max_pfn, sizeof(unsigned long))) {
  28.125 -        ERROR("write: max_pfn");
  28.126 +    if (!write_exact(io_fd, &p2m_size, sizeof(unsigned long))) {
  28.127 +        ERROR("write: p2m_size");
  28.128          goto out;
  28.129      }
  28.130  
  28.131 @@ -929,9 +929,9 @@ int xc_linux_save(int xc_handle, int io_
  28.132  
  28.133          DPRINTF("Saving memory pages: iter %d   0%%", iter);
  28.134  
  28.135 -        while( N < max_pfn ){
  28.136 +        while( N < p2m_size ){
  28.137  
  28.138 -            unsigned int this_pc = (N * 100) / max_pfn;
  28.139 +            unsigned int this_pc = (N * 100) / p2m_size;
  28.140  
  28.141              if ((this_pc - prev_pc) >= 5) {
  28.142                  DPRINTF("\b\b\b\b%3d%%", this_pc);
  28.143 @@ -942,7 +942,7 @@ int xc_linux_save(int xc_handle, int io_
  28.144                 but this is fast enough for the moment. */
  28.145              if (!last_iter && xc_shadow_control(
  28.146                      xc_handle, dom, XEN_DOMCTL_SHADOW_OP_PEEK,
  28.147 -                    to_skip, max_pfn, NULL, 0, NULL) != max_pfn) {
  28.148 +                    to_skip, p2m_size, NULL, 0, NULL) != p2m_size) {
  28.149                  ERROR("Error peeking shadow bitmap");
  28.150                  goto out;
  28.151              }
  28.152 @@ -950,9 +950,9 @@ int xc_linux_save(int xc_handle, int io_
  28.153  
  28.154              /* load pfn_type[] with the mfn of all the pages we're doing in
  28.155                 this batch. */
  28.156 -            for (batch = 0; batch < MAX_BATCH_SIZE && N < max_pfn ; N++) {
  28.157 +            for (batch = 0; batch < MAX_BATCH_SIZE && N < p2m_size ; N++) {
  28.158  
  28.159 -                int n = permute(N, max_pfn, order_nr);
  28.160 +                int n = permute(N, p2m_size, order_nr);
  28.161  
  28.162                  if (debug) {
  28.163                      DPRINTF("%d pfn= %08lx mfn= %08lx %d  [mfn]= %08lx\n",
  28.164 @@ -1123,7 +1123,7 @@ int xc_linux_save(int xc_handle, int io_
  28.165              print_stats( xc_handle, dom, sent_this_iter, &stats, 1);
  28.166  
  28.167              DPRINTF("Total pages sent= %ld (%.2fx)\n",
  28.168 -                    total_sent, ((float)total_sent)/max_pfn );
  28.169 +                    total_sent, ((float)total_sent)/p2m_size );
  28.170              DPRINTF("(of which %ld were fixups)\n", needed_to_fix  );
  28.171          }
  28.172  
  28.173 @@ -1150,7 +1150,7 @@ int xc_linux_save(int xc_handle, int io_
  28.174              if (((sent_this_iter > sent_last_iter) && RATE_IS_MAX()) ||
  28.175                  (iter >= max_iters) ||
  28.176                  (sent_this_iter+skip_this_iter < 50) ||
  28.177 -                (total_sent > max_pfn*max_factor)) {
  28.178 +                (total_sent > p2m_size*max_factor)) {
  28.179                  DPRINTF("Start last iteration\n");
  28.180                  last_iter = 1;
  28.181  
  28.182 @@ -1168,7 +1168,7 @@ int xc_linux_save(int xc_handle, int io_
  28.183  
  28.184              if (xc_shadow_control(xc_handle, dom, 
  28.185                                    XEN_DOMCTL_SHADOW_OP_CLEAN, to_send, 
  28.186 -                                  max_pfn, NULL, 0, &stats) != max_pfn) {
  28.187 +                                  p2m_size, NULL, 0, &stats) != p2m_size) {
  28.188                  ERROR("Error flushing shadow PT");
  28.189                  goto out;
  28.190              }
  28.191 @@ -1220,7 +1220,7 @@ int xc_linux_save(int xc_handle, int io_
  28.192          unsigned int i,j;
  28.193          unsigned long pfntab[1024];
  28.194  
  28.195 -        for (i = 0, j = 0; i < max_pfn; i++) {
  28.196 +        for (i = 0, j = 0; i < p2m_size; i++) {
  28.197              if (!is_mapped(live_p2m[i]))
  28.198                  j++;
  28.199          }
  28.200 @@ -1230,13 +1230,13 @@ int xc_linux_save(int xc_handle, int io_
  28.201              goto out;
  28.202          }
  28.203  
  28.204 -        for (i = 0, j = 0; i < max_pfn; ) {
  28.205 +        for (i = 0, j = 0; i < p2m_size; ) {
  28.206  
  28.207              if (!is_mapped(live_p2m[i]))
  28.208                  pfntab[j++] = i;
  28.209  
  28.210              i++;
  28.211 -            if (j == 1024 || i == max_pfn) {
  28.212 +            if (j == 1024 || i == p2m_size) {
  28.213                  if(!write_exact(io_fd, &pfntab, sizeof(unsigned long)*j)) {
  28.214                      ERROR("Error when writing to state file (6b) (errno %d)",
  28.215                            errno);
  28.216 @@ -1333,7 +1333,7 @@ int xc_linux_save(int xc_handle, int io_
  28.217          munmap(live_p2m_frame_list, P2M_FLL_ENTRIES * PAGE_SIZE);
  28.218  
  28.219      if (live_p2m)
  28.220 -        munmap(live_p2m, P2M_SIZE);
  28.221 +        munmap(live_p2m, ROUNDUP(p2m_size * sizeof(xen_pfn_t), PAGE_SHIFT));
  28.222  
  28.223      if (live_m2p)
  28.224          munmap(live_m2p, M2P_SIZE(max_mfn));
    29.1 --- a/tools/libxc/xc_resume.c	Fri Mar 30 17:18:42 2007 -0600
    29.2 +++ b/tools/libxc/xc_resume.c	Tue Apr 03 13:04:51 2007 -0600
    29.3 @@ -46,7 +46,7 @@ static int xc_domain_resume_any(int xc_h
    29.4      xc_dominfo_t info;
    29.5      int i, rc = -1;
    29.6  #if defined(__i386__) || defined(__x86_64__)
    29.7 -    unsigned long mfn, max_pfn = 0;
    29.8 +    unsigned long mfn, p2m_size = 0;
    29.9      vcpu_guest_context_t ctxt;
   29.10      start_info_t *start_info;
   29.11      shared_info_t *shinfo = NULL;
   29.12 @@ -74,7 +74,7 @@ static int xc_domain_resume_any(int xc_h
   29.13          goto out;
   29.14      }
   29.15  
   29.16 -    max_pfn = shinfo->arch.max_pfn;
   29.17 +    p2m_size = shinfo->arch.max_pfn;
   29.18  
   29.19      p2m_frame_list_list =
   29.20          xc_map_foreign_range(xc_handle, domid, PAGE_SIZE, PROT_READ,
    30.1 --- a/tools/libxc/xenctrl.h	Fri Mar 30 17:18:42 2007 -0600
    30.2 +++ b/tools/libxc/xenctrl.h	Tue Apr 03 13:04:51 2007 -0600
    30.3 @@ -4,6 +4,9 @@
    30.4   * A library for low-level access to the Xen control interfaces.
    30.5   *
    30.6   * Copyright (c) 2003-2004, K A Fraser.
    30.7 + *
    30.8 + * xc_gnttab functions:
    30.9 + * Copyright (c) 2007, D G Murray <Derek.Murray@cl.cam.ac.uk>
   30.10   */
   30.11  
   30.12  #ifndef XENCTRL_H
   30.13 @@ -740,6 +743,62 @@ evtchn_port_t xc_evtchn_pending(int xce_
   30.14   */
   30.15  int xc_evtchn_unmask(int xce_handle, evtchn_port_t port);
   30.16  
   30.17 +/**************************
   30.18 + * GRANT TABLE OPERATIONS *
   30.19 + **************************/
   30.20 +
   30.21 +/*
   30.22 + * Return a handle to the grant table driver, or -1 on failure, in which case
   30.23 + * errno will be set appropriately.
   30.24 + */
   30.25 +int xc_gnttab_open(void);
   30.26 +
   30.27 +/*
   30.28 + * Close a handle previously allocated with xc_gnttab_open().
   30.29 + */
   30.30 +int xc_gnttab_close(int xcg_handle);
   30.31 +
   30.32 +/*
   30.33 + * Memory maps a grant reference from one domain to a local address range.
   30.34 + * Mappings should be unmapped with xc_gnttab_munmap.  Returns NULL on failure.
   30.35 + *
   30.36 + * @parm xcg_handle a handle on an open grant table interface
   30.37 + * @parm domid the domain to map memory from
   30.38 + * @parm ref the grant reference ID to map
   30.39 + * @parm prot same flag as in mmap()
   30.40 + */
   30.41 +void *xc_gnttab_map_grant_ref(int xcg_handle,
   30.42 +                              uint32_t domid,
   30.43 +                              uint32_t ref,
   30.44 +                              int prot);
   30.45 +
   30.46 +/**
   30.47 + * Memory maps one or more grant references from one or more domains to a
   30.48 + * contiguous local address range. Mappings should be unmapped with
   30.49 + * xc_gnttab_munmap.  Returns NULL on failure.
   30.50 + *
   30.51 + * @parm xcg_handle a handle on an open grant table interface
   30.52 + * @parm count the number of grant references to be mapped
   30.53 + * @parm domids an array of @count domain IDs by which the corresponding @refs
   30.54 + *              were granted
   30.55 + * @parm refs an array of @count grant references to be mapped
   30.56 + * @parm prot same flag as in mmap()
   30.57 + */
   30.58 +void *xc_gnttab_map_grant_refs(int xcg_handle,
   30.59 +                               uint32_t count,
   30.60 +                               uint32_t *domids,
   30.61 +                               uint32_t *refs,
   30.62 +                               int prot);
   30.63 +
   30.64 +/*
   30.65 + * Unmaps the @count pages starting at @start_address, which were mapped by a
   30.66 + * call to xc_gnttab_map_grant_ref or xc_gnttab_map_grant_refs. Returns zero
   30.67 + * on success, otherwise sets errno and returns non-zero.
   30.68 + */
   30.69 +int xc_gnttab_munmap(int xcg_handle,
   30.70 +                     void *start_address,
   30.71 +                     uint32_t count);
   30.72 +
   30.73  int xc_hvm_set_pci_intx_level(
   30.74      int xc_handle, domid_t dom,
   30.75      uint8_t domain, uint8_t bus, uint8_t device, uint8_t intx,
    31.1 --- a/tools/libxc/xenguest.h	Fri Mar 30 17:18:42 2007 -0600
    31.2 +++ b/tools/libxc/xenguest.h	Tue Apr 03 13:04:51 2007 -0600
    31.3 @@ -43,15 +43,16 @@ int xc_hvm_save(int xc_handle, int io_fd
    31.4   * @parm xc_handle a handle to an open hypervisor interface
    31.5   * @parm fd the file descriptor to restore a domain from
    31.6   * @parm dom the id of the domain
    31.7 - * @parm nr_pfns the number of pages
    31.8 + * @parm p2m_size number of pages the guest has (i.e. number entries in P2M)
    31.9 + * @parm max_nr_pfns domains maximum real memory allocation, in pages
   31.10   * @parm store_evtchn the store event channel for this domain to use
   31.11   * @parm store_mfn returned with the mfn of the store page
   31.12   * @return 0 on success, -1 on failure
   31.13   */
   31.14  int xc_linux_restore(int xc_handle, int io_fd, uint32_t dom,
   31.15 -                     unsigned long nr_pfns, unsigned int store_evtchn,
   31.16 -                     unsigned long *store_mfn, unsigned int console_evtchn,
   31.17 -                     unsigned long *console_mfn);
   31.18 +                     unsigned long p2m_size, unsigned long max_nr_pfns,
   31.19 +                     unsigned int store_evtchn, unsigned long *store_mfn,
   31.20 +                     unsigned int console_evtchn, unsigned long *console_mfn);
   31.21  
   31.22  /**
   31.23   * This function will restore a saved hvm domain running unmodified guest.
    32.1 --- a/tools/libxc/xg_private.h	Fri Mar 30 17:18:42 2007 -0600
    32.2 +++ b/tools/libxc/xg_private.h	Tue Apr 03 13:04:51 2007 -0600
    32.3 @@ -148,17 +148,16 @@ typedef l4_pgentry_64_t l4_pgentry_t;
    32.4  
    32.5  #define ROUNDUP(_x,_w) (((unsigned long)(_x)+(1UL<<(_w))-1) & ~((1UL<<(_w))-1))
    32.6  
    32.7 -/* Size in bytes of the P2M (rounded up to the nearest PAGE_SIZE bytes) */
    32.8 -#define P2M_SIZE        ROUNDUP((max_pfn * sizeof(xen_pfn_t)), PAGE_SHIFT)
    32.9 -
   32.10  /* Number of xen_pfn_t in a page */
   32.11  #define fpp             (PAGE_SIZE/sizeof(xen_pfn_t))
   32.12  
   32.13 +/* XXX SMH: following 3 skanky macros rely on variable p2m_size being set */
   32.14 +
   32.15  /* Number of entries in the pfn_to_mfn_frame_list_list */
   32.16 -#define P2M_FLL_ENTRIES (((max_pfn)+(fpp*fpp)-1)/(fpp*fpp))
   32.17 +#define P2M_FLL_ENTRIES (((p2m_size)+(fpp*fpp)-1)/(fpp*fpp))
   32.18  
   32.19  /* Number of entries in the pfn_to_mfn_frame_list */
   32.20 -#define P2M_FL_ENTRIES  (((max_pfn)+fpp-1)/fpp)
   32.21 +#define P2M_FL_ENTRIES  (((p2m_size)+fpp-1)/fpp)
   32.22  
   32.23  /* Size in bytes of the pfn_to_mfn_frame_list     */
   32.24  #define P2M_FL_SIZE     ((P2M_FL_ENTRIES)*sizeof(unsigned long))
    33.1 --- a/tools/python/xen/lowlevel/scf/scf.c	Fri Mar 30 17:18:42 2007 -0600
    33.2 +++ b/tools/python/xen/lowlevel/scf/scf.c	Tue Apr 03 13:04:51 2007 -0600
    33.3 @@ -26,7 +26,7 @@
    33.4  #include <libscf.h>
    33.5  #include <stdio.h>
    33.6  
    33.7 -#define	XEND_FMRI "svc:/system/xen/xend:default"
    33.8 +#define	XEND_FMRI "svc:/system/xctl/xend:default"
    33.9  #define	XEND_PG "config"
   33.10  
   33.11  static PyObject *scf_exc;
    34.1 --- a/tools/python/xen/xend/XendCheckpoint.py	Fri Mar 30 17:18:42 2007 -0600
    34.2 +++ b/tools/python/xen/xend/XendCheckpoint.py	Tue Apr 03 13:04:51 2007 -0600
    34.3 @@ -187,6 +187,7 @@ def restore(xd, fd, dominfo = None, paus
    34.4      assert console_port
    34.5  
    34.6      nr_pfns = (dominfo.getMemoryTarget() + 3) / 4 
    34.7 +    max_nr_pfns = (dominfo.getMemoryMaximum() + 3) / 4 
    34.8  
    34.9      # if hvm, pass mem size to calculate the store_mfn
   34.10      image_cfg = dominfo.info.get('image', {})
   34.11 @@ -203,17 +204,17 @@ def restore(xd, fd, dominfo = None, paus
   34.12      try:
   34.13          l = read_exact(fd, sizeof_unsigned_long,
   34.14                         "not a valid guest state file: pfn count read")
   34.15 -        max_pfn = unpack("L", l)[0]    # native sizeof long
   34.16 +        p2m_size = unpack("L", l)[0]    # native sizeof long
   34.17  
   34.18 -        if max_pfn > 16*1024*1024:     # XXX 
   34.19 +        if p2m_size > 16*1024*1024:     # XXX 
   34.20              raise XendError(
   34.21                  "not a valid guest state file: pfn count out of range")
   34.22  
   34.23          shadow = dominfo.info['shadow_memory']
   34.24          log.debug("restore:shadow=0x%x, _static_max=0x%x, _static_min=0x%x, "
   34.25 -                  "nr_pfns=0x%x.", dominfo.info['shadow_memory'],
   34.26 +                  "p2m_size=0x%x.", dominfo.info['shadow_memory'],
   34.27                    dominfo.info['memory_static_max'],
   34.28 -                  dominfo.info['memory_static_min'], nr_pfns)
   34.29 +                  dominfo.info['memory_static_min'], p2m_size)
   34.30  
   34.31          balloon.free(xc.pages_to_kib(nr_pfns) + shadow * 1024)
   34.32  
   34.33 @@ -221,7 +222,7 @@ def restore(xd, fd, dominfo = None, paus
   34.34          dominfo.info['shadow_memory'] = shadow_cur
   34.35  
   34.36          cmd = map(str, [xen.util.auxbin.pathTo(XC_RESTORE),
   34.37 -                        fd, dominfo.getDomid(), max_pfn,
   34.38 +                        fd, dominfo.getDomid(), p2m_size, max_nr_pfns, 
   34.39                          store_port, console_port, int(is_hvm), pae, apic])
   34.40          log.debug("[xc_restore]: %s", string.join(cmd))
   34.41  
    35.1 --- a/tools/python/xen/xend/XendConfig.py	Fri Mar 30 17:18:42 2007 -0600
    35.2 +++ b/tools/python/xen/xend/XendConfig.py	Tue Apr 03 13:04:51 2007 -0600
    35.3 @@ -118,7 +118,7 @@ LEGACY_CFG_TO_XENAPI_CFG = reverse_dict(
    35.4  # Platform configuration keys.
    35.5  XENAPI_PLATFORM_CFG = [ 'acpi', 'apic', 'boot', 'device_model', 'display', 
    35.6                          'fda', 'fdb', 'keymap', 'isa', 'localtime',
    35.7 -                        'nographic', 'pae', 'serial', 'sdl',
    35.8 +                        'nographic', 'pae', 'rtc_timeoffset', 'serial', 'sdl',
    35.9                          'soundhw','stdvga', 'usb', 'usbdevice', 'vnc',
   35.10                          'vncconsole', 'vncdisplay', 'vnclisten',
   35.11                          'vncpasswd', 'vncunused', 'xauthority']
   35.12 @@ -203,6 +203,7 @@ LEGACY_CFG_TYPES = {
   35.13      'on_xend_stop':  str,
   35.14      'on_xend_start': str,
   35.15      'online_vcpus':  int,
   35.16 +    'rtc/timeoffset': str,
   35.17  }
   35.18  
   35.19  # Values that should be stored in xenstore's /vm/<uuid> that is used
    36.1 --- a/tools/python/xen/xend/XendDomainInfo.py	Fri Mar 30 17:18:42 2007 -0600
    36.2 +++ b/tools/python/xen/xend/XendDomainInfo.py	Tue Apr 03 13:04:51 2007 -0600
    36.3 @@ -859,7 +859,8 @@ class XendDomainInfo:
    36.4          # Check whether values in the configuration have
    36.5          # changed in Xenstore.
    36.6          
    36.7 -        cfg_vm = ['name', 'on_poweroff', 'on_reboot', 'on_crash']
    36.8 +        cfg_vm = ['name', 'on_poweroff', 'on_reboot', 'on_crash',
    36.9 +                  'rtc/timeoffset']
   36.10          
   36.11          vm_details = self._readVMDetails([(k,XendConfig.LEGACY_CFG_TYPES[k])
   36.12                                             for k in cfg_vm])
   36.13 @@ -888,6 +889,11 @@ class XendDomainInfo:
   36.14              self.info.update_with_image_sxp(sxp.from_string(image_sxp))
   36.15              changed = True
   36.16  
   36.17 +        # Check if the rtc offset has changes
   36.18 +        if vm_details.get("rtc/timeoffset", 0) != self.info["platform"].get("rtc_timeoffset", 0):
   36.19 +            self.info["platform"]["rtc_timeoffset"] = vm_details.get("rtc/timeoffset", 0)
   36.20 +            changed = True
   36.21 + 
   36.22          if changed:
   36.23              # Update the domain section of the store, as this contains some
   36.24              # parameters derived from the VM configuration.
    37.1 --- a/tools/python/xen/xend/balloon.py	Fri Mar 30 17:18:42 2007 -0600
    37.2 +++ b/tools/python/xen/xend/balloon.py	Tue Apr 03 13:04:51 2007 -0600
    37.3 @@ -25,9 +25,7 @@ import XendDomain
    37.4  import XendOptions
    37.5  from XendLogging import log
    37.6  from XendError import VmError
    37.7 -
    37.8 -
    37.9 -PROC_XEN_BALLOON = '/proc/xen/balloon'
   37.10 +import osdep
   37.11  
   37.12  RETRY_LIMIT = 20
   37.13  RETRY_LIMIT_INCR = 5
   37.14 @@ -51,19 +49,7 @@ def _get_proc_balloon(label):
   37.15      """Returns the value for the named label.  Returns None if the label was
   37.16         not found or the value was non-numeric."""
   37.17  
   37.18 -    f = file(PROC_XEN_BALLOON, 'r')
   37.19 -    try:
   37.20 -        for line in f:
   37.21 -            keyvalue = line.split(':')
   37.22 -            if keyvalue[0] == label:
   37.23 -                values = keyvalue[1].split()
   37.24 -                if values[0].isdigit():
   37.25 -                    return int(values[0])
   37.26 -                else:
   37.27 -                    return None
   37.28 -        return None
   37.29 -    finally:
   37.30 -        f.close()
   37.31 +    return osdep.lookup_balloon_stat(label)
   37.32  
   37.33  def get_dom0_current_alloc():
   37.34      """Returns the current memory allocation (in KiB) of dom0."""
    38.1 --- a/tools/python/xen/xend/image.py	Fri Mar 30 17:18:42 2007 -0600
    38.2 +++ b/tools/python/xen/xend/image.py	Tue Apr 03 13:04:51 2007 -0600
    38.3 @@ -256,9 +256,12 @@ class HVMImageHandler(ImageHandler):
    38.4          self.xauthority = vmConfig['platform'].get('xauthority')
    38.5          self.vncconsole = vmConfig['platform'].get('vncconsole')
    38.6  
    38.7 +        rtc_timeoffset = vmConfig['platform'].get('rtc_timeoffset')
    38.8 +
    38.9          self.vm.storeVm(("image/dmargs", " ".join(self.dmargs)),
   38.10                          ("image/device-model", self.device_model),
   38.11                          ("image/display", self.display))
   38.12 +        self.vm.storeVm(("rtc/timeoffset", rtc_timeoffset))
   38.13  
   38.14          self.pid = None
   38.15  
    39.1 --- a/tools/python/xen/xend/osdep.py	Fri Mar 30 17:18:42 2007 -0600
    39.2 +++ b/tools/python/xen/xend/osdep.py	Tue Apr 03 13:04:51 2007 -0600
    39.3 @@ -41,6 +41,55 @@ import os
    39.4      "SunOS": "vif-vnic"
    39.5  }
    39.6  
    39.7 +def _linux_balloon_stat(label):
    39.8 +    """Returns the value for the named label, or None if an error occurs."""
    39.9 +
   39.10 +    PROC_XEN_BALLOON = '/proc/xen/balloon'
   39.11 +    f = file(PROC_XEN_BALLOON, 'r')
   39.12 +    try:
   39.13 +        for line in f:
   39.14 +            keyvalue = line.split(':')
   39.15 +            if keyvalue[0] == label:
   39.16 +                values = keyvalue[1].split()
   39.17 +                if values[0].isdigit():
   39.18 +                    return int(values[0])
   39.19 +                else:
   39.20 +                    return None
   39.21 +        return None
   39.22 +    finally:
   39.23 +        f.close()
   39.24 +
   39.25 +def _solaris_balloon_stat(label):
   39.26 +    """Returns the value for the named label, or None if an error occurs."""
   39.27 +
   39.28 +    import fcntl
   39.29 +    import array
   39.30 +    DEV_XEN_BALLOON = '/dev/xen/balloon'
   39.31 +    BLN_IOCTL_CURRENT = 0x4201
   39.32 +    BLN_IOCTL_TARGET = 0x4202
   39.33 +    BLN_IOCTL_LOW = 0x4203
   39.34 +    BLN_IOCTL_HIGH = 0x4204
   39.35 +    BLN_IOCTL_LIMIT = 0x4205
   39.36 +    label_to_ioctl = {	'Current allocation'	: BLN_IOCTL_CURRENT,
   39.37 +			'Requested target'	: BLN_IOCTL_TARGET,
   39.38 +			'Low-mem balloon'	: BLN_IOCTL_LOW,
   39.39 +			'High-mem balloon'	: BLN_IOCTL_HIGH,
   39.40 +			'Xen hard limit'	: BLN_IOCTL_LIMIT }
   39.41 +
   39.42 +    f = file(DEV_XEN_BALLOON, 'r')
   39.43 +    try:
   39.44 +        values = array.array('L', [0])
   39.45 +        if fcntl.ioctl(f.fileno(), label_to_ioctl[label], values, 1) == 0:
   39.46 +            return values[0]
   39.47 +        else:
   39.48 +            return None
   39.49 +    finally:
   39.50 +        f.close()
   39.51 +
   39.52 +_balloon_stat = {
   39.53 +    "SunOS": _solaris_balloon_stat
   39.54 +}
   39.55 +
   39.56  def _get(var, default=None):
   39.57      return var.get(os.uname()[0], default)
   39.58  
   39.59 @@ -49,3 +98,4 @@ xend_autorestart = _get(_xend_autorestar
   39.60  pygrub_path = _get(_pygrub_path, "/usr/bin/pygrub")
   39.61  netback_type = _get(_netback_type, "netfront")
   39.62  vif_script = _get(_vif_script, "vif-bridge")
   39.63 +lookup_balloon_stat = _get(_balloon_stat, _linux_balloon_stat)
    40.1 --- a/tools/python/xen/xend/server/SrvServer.py	Fri Mar 30 17:18:42 2007 -0600
    40.2 +++ b/tools/python/xen/xend/server/SrvServer.py	Tue Apr 03 13:04:51 2007 -0600
    40.3 @@ -212,8 +212,8 @@ def _loadConfig(servers, root, reload):
    40.4                      if server_cfg[1] in [XendAPI.AUTH_PAM, XendAPI.AUTH_NONE]:
    40.5                          auth_method = server_cfg[1]
    40.6  
    40.7 -                if len(server_cfg) > 2:
    40.8 -                    hosts_allowed = server_cfg[2] or None
    40.9 +                if len(server_cfg) > 2 and len(server_cfg[2]):
   40.10 +                    hosts_allowed = map(re.compile, server_cfg[2].split(' '))
   40.11  
   40.12                  if len(server_cfg) > 4:
   40.13                      # SSL key and cert file
    41.1 --- a/tools/python/xen/xend/server/relocate.py	Fri Mar 30 17:18:42 2007 -0600
    41.2 +++ b/tools/python/xen/xend/server/relocate.py	Tue Apr 03 13:04:51 2007 -0600
    41.3 @@ -106,8 +106,12 @@ class RelocationProtocol(protocol.Protoc
    41.4      def op_receive(self, name, _):
    41.5          if self.transport:
    41.6              self.send_reply(["ready", name])
    41.7 -            XendDomain.instance().domain_restore_fd(
    41.8 -                self.transport.sock.fileno())
    41.9 +            try:
   41.10 +                XendDomain.instance().domain_restore_fd(
   41.11 +                    self.transport.sock.fileno())
   41.12 +            except:
   41.13 +                self.send_error()
   41.14 +                self.close()
   41.15          else:
   41.16              log.error(name + ": no transport")
   41.17              raise XendError(name + ": no transport")
    42.1 --- a/tools/python/xen/xm/create.py	Fri Mar 30 17:18:42 2007 -0600
    42.2 +++ b/tools/python/xen/xm/create.py	Tue Apr 03 13:04:51 2007 -0600
    42.3 @@ -186,6 +186,10 @@ gopts.var('cpus', val='CPUS',
    42.4            fn=set_value, default=None,
    42.5            use="CPUS to run the domain on.")
    42.6  
    42.7 +gopts.var('rtc_timeoffset', val='RTC_TIMEOFFSET',
    42.8 +          fn=set_value, default="0",
    42.9 +          use="Set RTC offset.")
   42.10 +
   42.11  gopts.var('pae', val='PAE',
   42.12            fn=set_int, default=1,
   42.13            use="Disable or enable PAE of HVM domain.")
   42.14 @@ -717,7 +721,7 @@ def configure_hvm(config_image, vals):
   42.15      args = [ 'device_model', 'pae', 'vcpus', 'boot', 'fda', 'fdb',
   42.16               'localtime', 'serial', 'stdvga', 'isa', 'nographic', 'soundhw',
   42.17               'vnc', 'vncdisplay', 'vncunused', 'vncconsole', 'vnclisten',
   42.18 -             'sdl', 'display', 'xauthority',
   42.19 +             'sdl', 'display', 'xauthority', 'rtc_timeoffset',
   42.20               'acpi', 'apic', 'usb', 'usbdevice', 'keymap' ]
   42.21      for a in args:
   42.22          if a in vals.__dict__ and vals.__dict__[a] is not None:
    43.1 --- a/tools/python/xen/xm/main.py	Fri Mar 30 17:18:42 2007 -0600
    43.2 +++ b/tools/python/xen/xm/main.py	Tue Apr 03 13:04:51 2007 -0600
    43.3 @@ -929,10 +929,10 @@ def xm_label_list(doms):
    43.4              if security.active_policy not in ['INACTIVE', 'NULL', 'DEFAULT']:
    43.5                  if not d['seclabel']:
    43.6                      d['seclabel'] = 'ERROR'
    43.7 -                elif security.active_policy in ['DEFAULT']:
    43.8 -                    d['seclabel'] = 'DEFAULT'
    43.9 -                else:
   43.10 -                    d['seclabel'] = 'INACTIVE'
   43.11 +            elif security.active_policy in ['DEFAULT']:
   43.12 +                d['seclabel'] = 'DEFAULT'
   43.13 +            else:
   43.14 +                d['seclabel'] = 'INACTIVE'
   43.15  
   43.16              output.append((format % d, d['seclabel']))
   43.17          
    44.1 --- a/tools/python/xen/xm/xenapi_create.py	Fri Mar 30 17:18:42 2007 -0600
    44.2 +++ b/tools/python/xen/xm/xenapi_create.py	Tue Apr 03 13:04:51 2007 -0600
    44.3 @@ -20,7 +20,6 @@
    44.4  
    44.5  from xen.xm.main import server, get_default_SR
    44.6  from xml.dom.minidom import parse, getDOMImplementation
    44.7 -from xml.dom.ext import PrettyPrint
    44.8  from xml.parsers.xmlproc import xmlproc, xmlval, xmldtd
    44.9  from xen.xend import sxp
   44.10  from xen.xend.XendAPIConstants import XEN_API_ON_NORMAL_EXIT, \
    45.1 --- a/tools/xcutils/xc_restore.c	Fri Mar 30 17:18:42 2007 -0600
    45.2 +++ b/tools/xcutils/xc_restore.c	Tue Apr 03 13:04:51 2007 -0600
    45.3 @@ -18,15 +18,14 @@
    45.4  int
    45.5  main(int argc, char **argv)
    45.6  {
    45.7 -    unsigned int xc_fd, io_fd, domid, max_pfn, store_evtchn, console_evtchn;
    45.8 +    unsigned int xc_fd, io_fd, domid, store_evtchn, console_evtchn;
    45.9      unsigned int hvm, pae, apic;
   45.10      int ret;
   45.11 -    unsigned long store_mfn, console_mfn;
   45.12 +    unsigned long p2m_size, max_nr_pfns, store_mfn, console_mfn;
   45.13  
   45.14 -    if (argc != 9)
   45.15 -	errx(1,
   45.16 -	     "usage: %s iofd domid max_pfn store_evtchn console_evtchn hvm pae apic",
   45.17 -	     argv[0]);
   45.18 +    if (argc != 10)
   45.19 +        errx(1, "usage: %s iofd domid p2m_size max_nr_pfns store_evtchn "
   45.20 +             "console_evtchn hvm pae apic", argv[0]);
   45.21  
   45.22      xc_fd = xc_interface_open();
   45.23      if (xc_fd < 0)
   45.24 @@ -34,19 +33,21 @@ main(int argc, char **argv)
   45.25  
   45.26      io_fd = atoi(argv[1]);
   45.27      domid = atoi(argv[2]);
   45.28 -    max_pfn = atoi(argv[3]);
   45.29 -    store_evtchn = atoi(argv[4]);
   45.30 -    console_evtchn = atoi(argv[5]);
   45.31 -    hvm  = atoi(argv[6]);
   45.32 -    pae  = atoi(argv[7]);
   45.33 -    apic = atoi(argv[8]);
   45.34 +    p2m_size = atoi(argv[3]);
   45.35 +    max_nr_pfns = atoi(argv[4]);
   45.36 +    store_evtchn = atoi(argv[5]);
   45.37 +    console_evtchn = atoi(argv[6]);
   45.38 +    hvm  = atoi(argv[7]);
   45.39 +    pae  = atoi(argv[8]);
   45.40 +    apic = atoi(argv[9]);
   45.41  
   45.42      if (hvm) {
   45.43 -        ret = xc_hvm_restore(xc_fd, io_fd, domid, max_pfn, store_evtchn,
   45.44 +        ret = xc_hvm_restore(xc_fd, io_fd, domid, max_nr_pfns, store_evtchn,
   45.45                  &store_mfn, pae, apic);
   45.46 -    } else 
   45.47 -        ret = xc_linux_restore(xc_fd, io_fd, domid, max_pfn, store_evtchn,
   45.48 -                &store_mfn, console_evtchn, &console_mfn);
   45.49 +    } else
   45.50 +        ret = xc_linux_restore(xc_fd, io_fd, domid, p2m_size,
   45.51 +                               max_nr_pfns, store_evtchn, &store_mfn,
   45.52 +                               console_evtchn, &console_mfn);
   45.53  
   45.54      if (ret == 0) {
   45.55  	printf("store-mfn %li\n", store_mfn);
    46.1 --- a/tools/xenstat/xentop/xentop.c	Fri Mar 30 17:18:42 2007 -0600
    46.2 +++ b/tools/xenstat/xentop/xentop.c	Tue Apr 03 13:04:51 2007 -0600
    46.3 @@ -984,6 +984,8 @@ static void top(void)
    46.4  
    46.5  	if(!batch)
    46.6  	do_bottom_line();
    46.7 +
    46.8 +	free(domains);
    46.9  }
   46.10  
   46.11  int main(int argc, char **argv)
    47.1 --- a/xen/arch/x86/hvm/hvm.c	Fri Mar 30 17:18:42 2007 -0600
    47.2 +++ b/xen/arch/x86/hvm/hvm.c	Tue Apr 03 13:04:51 2007 -0600
    47.3 @@ -59,9 +59,6 @@ struct hvm_function_table hvm_funcs __re
    47.4  /* I/O permission bitmap is globally shared by all HVM guests. */
    47.5  char __attribute__ ((__section__ (".bss.page_aligned")))
    47.6      hvm_io_bitmap[3*PAGE_SIZE];
    47.7 -/* MSR permission bitmap is globally shared by all HVM guests. */
    47.8 -char __attribute__ ((__section__ (".bss.page_aligned")))
    47.9 -    hvm_msr_bitmap[PAGE_SIZE];
   47.10  
   47.11  void hvm_enable(struct hvm_function_table *fns)
   47.12  {
   47.13 @@ -75,9 +72,6 @@ void hvm_enable(struct hvm_function_tabl
   47.14      memset(hvm_io_bitmap, ~0, sizeof(hvm_io_bitmap));
   47.15      clear_bit(0x80, hvm_io_bitmap);
   47.16  
   47.17 -    /* All MSR accesses are intercepted by default. */
   47.18 -    memset(hvm_msr_bitmap, ~0, sizeof(hvm_msr_bitmap));
   47.19 -
   47.20      hvm_funcs   = *fns;
   47.21      hvm_enabled = 1;
   47.22  }
   47.23 @@ -379,6 +373,9 @@ void hvm_send_assist_req(struct vcpu *v)
   47.24  {
   47.25      ioreq_t *p;
   47.26  
   47.27 +    if ( unlikely(!vcpu_start_shutdown_deferral(v)) )
   47.28 +        return; /* implicitly bins the i/o operation */
   47.29 +
   47.30      p = &get_vio(v->domain, v->vcpu_id)->vp_ioreq;
   47.31      if ( unlikely(p->state != STATE_IOREQ_NONE) )
   47.32      {
    48.1 --- a/xen/arch/x86/hvm/intercept.c	Fri Mar 30 17:18:42 2007 -0600
    48.2 +++ b/xen/arch/x86/hvm/intercept.c	Tue Apr 03 13:04:51 2007 -0600
    48.3 @@ -155,28 +155,13 @@ static inline void hvm_mmio_access(struc
    48.4      }
    48.5  }
    48.6  
    48.7 -int hvm_buffered_io_intercept(ioreq_t *p)
    48.8 +int hvm_buffered_io_send(ioreq_t *p)
    48.9  {
   48.10      struct vcpu *v = current;
   48.11      spinlock_t  *buffered_io_lock;
   48.12      buffered_iopage_t *buffered_iopage =
   48.13          (buffered_iopage_t *)(v->domain->arch.hvm_domain.buffered_io_va);
   48.14      unsigned long tmp_write_pointer = 0;
   48.15 -    int i;
   48.16 -
   48.17 -    /* ignore READ ioreq_t! */
   48.18 -    if ( p->dir == IOREQ_READ )
   48.19 -        return 0;
   48.20 -
   48.21 -    for ( i = 0; i < HVM_BUFFERED_IO_RANGE_NR; i++ ) {
   48.22 -        if ( p->addr >= hvm_buffered_io_ranges[i]->start_addr &&
   48.23 -             p->addr + p->size - 1 < hvm_buffered_io_ranges[i]->start_addr +
   48.24 -                                     hvm_buffered_io_ranges[i]->length )
   48.25 -            break;
   48.26 -    }
   48.27 -
   48.28 -    if ( i == HVM_BUFFERED_IO_RANGE_NR )
   48.29 -        return 0;
   48.30  
   48.31      buffered_io_lock = &v->domain->arch.hvm_domain.buffered_io_lock;
   48.32      spin_lock(buffered_io_lock);
   48.33 @@ -205,6 +190,27 @@ int hvm_buffered_io_intercept(ioreq_t *p
   48.34      return 1;
   48.35  }
   48.36  
   48.37 +int hvm_buffered_io_intercept(ioreq_t *p)
   48.38 +{
   48.39 +    int i;
   48.40 +
   48.41 +    /* ignore READ ioreq_t! */
   48.42 +    if ( p->dir == IOREQ_READ )
   48.43 +        return 0;
   48.44 +
   48.45 +    for ( i = 0; i < HVM_BUFFERED_IO_RANGE_NR; i++ ) {
   48.46 +        if ( p->addr >= hvm_buffered_io_ranges[i]->start_addr &&
   48.47 +             p->addr + p->size - 1 < hvm_buffered_io_ranges[i]->start_addr +
   48.48 +                                     hvm_buffered_io_ranges[i]->length )
   48.49 +            break;
   48.50 +    }
   48.51 +
   48.52 +    if ( i == HVM_BUFFERED_IO_RANGE_NR )
   48.53 +        return 0;
   48.54 +
   48.55 +    return hvm_buffered_io_send(p);
   48.56 +}
   48.57 +
   48.58  int hvm_mmio_intercept(ioreq_t *p)
   48.59  {
   48.60      struct vcpu *v = current;
    49.1 --- a/xen/arch/x86/hvm/io.c	Fri Mar 30 17:18:42 2007 -0600
    49.2 +++ b/xen/arch/x86/hvm/io.c	Tue Apr 03 13:04:51 2007 -0600
    49.3 @@ -771,10 +771,11 @@ void hvm_io_assist(struct vcpu *v)
    49.4      struct cpu_user_regs *regs;
    49.5      struct hvm_io_op *io_opp;
    49.6      unsigned long gmfn;
    49.7 +    struct domain *d = v->domain;
    49.8  
    49.9      io_opp = &v->arch.hvm_vcpu.io_op;
   49.10      regs   = &io_opp->io_context;
   49.11 -    vio    = get_vio(v->domain, v->vcpu_id);
   49.12 +    vio    = get_vio(d, v->vcpu_id);
   49.13  
   49.14      p = &vio->vp_ioreq;
   49.15      if ( p->state != STATE_IORESP_READY )
   49.16 @@ -797,11 +798,13 @@ void hvm_io_assist(struct vcpu *v)
   49.17      memcpy(guest_cpu_user_regs(), regs, HVM_CONTEXT_STACK_BYTES);
   49.18  
   49.19      /* Has memory been dirtied? */
   49.20 -    if ( p->dir == IOREQ_READ && p->data_is_ptr )
   49.21 +    if ( (p->dir == IOREQ_READ) && p->data_is_ptr )
   49.22      {
   49.23          gmfn = get_mfn_from_gpfn(paging_gva_to_gfn(v, p->data));
   49.24 -        mark_dirty(v->domain, gmfn);
   49.25 +        mark_dirty(d, gmfn);
   49.26      }
   49.27 +
   49.28 +    vcpu_end_shutdown_deferral(v);
   49.29  }
   49.30  
   49.31  /*
    50.1 --- a/xen/arch/x86/hvm/platform.c	Fri Mar 30 17:18:42 2007 -0600
    50.2 +++ b/xen/arch/x86/hvm/platform.c	Tue Apr 03 13:04:51 2007 -0600
    50.3 @@ -921,6 +921,26 @@ static void send_mmio_req(unsigned char 
    50.4      hvm_send_assist_req(v);
    50.5  }
    50.6  
    50.7 +void send_timeoffset_req(unsigned long timeoff)
    50.8 +{
    50.9 +    ioreq_t p[1];
   50.10 +
   50.11 +    if ( timeoff == 0 )
   50.12 +        return;
   50.13 +
   50.14 +    memset(p, 0, sizeof(*p));
   50.15 +
   50.16 +    p->type = IOREQ_TYPE_TIMEOFFSET;
   50.17 +    p->size = 4;
   50.18 +    p->dir = IOREQ_WRITE;
   50.19 +    p->data = timeoff;
   50.20 +
   50.21 +    p->state = STATE_IOREQ_READY;
   50.22 +
   50.23 +    if ( !hvm_buffered_io_send(p) )
   50.24 +        printk("Unsuccessful timeoffset update\n");
   50.25 +}
   50.26 +
   50.27  static void mmio_operands(int type, unsigned long gpa,
   50.28                            struct hvm_io_op *mmio_op,
   50.29                            unsigned char op_size)
    51.1 --- a/xen/arch/x86/hvm/rtc.c	Fri Mar 30 17:18:42 2007 -0600
    51.2 +++ b/xen/arch/x86/hvm/rtc.c	Tue Apr 03 13:04:51 2007 -0600
    51.3 @@ -157,6 +157,10 @@ static inline int from_bcd(RTCState *s, 
    51.4  static void rtc_set_time(RTCState *s)
    51.5  {
    51.6      struct tm *tm = &s->current_tm;
    51.7 +    unsigned long before, after; /* XXX s_time_t */
    51.8 +      
    51.9 +    before = mktime(tm->tm_year, tm->tm_mon, tm->tm_mday,
   51.10 +		    tm->tm_hour, tm->tm_min, tm->tm_sec);
   51.11      
   51.12      tm->tm_sec = from_bcd(s, s->hw.cmos_data[RTC_SECONDS]);
   51.13      tm->tm_min = from_bcd(s, s->hw.cmos_data[RTC_MINUTES]);
   51.14 @@ -168,6 +172,10 @@ static void rtc_set_time(RTCState *s)
   51.15      tm->tm_mday = from_bcd(s, s->hw.cmos_data[RTC_DAY_OF_MONTH]);
   51.16      tm->tm_mon = from_bcd(s, s->hw.cmos_data[RTC_MONTH]) - 1;
   51.17      tm->tm_year = from_bcd(s, s->hw.cmos_data[RTC_YEAR]) + 100;
   51.18 +
   51.19 +    after = mktime(tm->tm_year, tm->tm_mon, tm->tm_mday,
   51.20 +                   tm->tm_hour, tm->tm_min, tm->tm_sec);
   51.21 +    send_timeoffset_req(after - before);
   51.22  }
   51.23  
   51.24  static void rtc_copy_date(RTCState *s)
    52.1 --- a/xen/arch/x86/hvm/svm/vmcb.c	Fri Mar 30 17:18:42 2007 -0600
    52.2 +++ b/xen/arch/x86/hvm/svm/vmcb.c	Tue Apr 03 13:04:51 2007 -0600
    52.3 @@ -79,6 +79,30 @@ struct host_save_area *alloc_host_save_a
    52.4      return hsa;
    52.5  }
    52.6  
    52.7 +static void disable_intercept_for_msr(char *msr_bitmap, u32 msr)
    52.8 +{
    52.9 +    /*
   52.10 +     * See AMD64 Programmers Manual, Vol 2, Section 15.10 (MSR-Bitmap Address).
   52.11 +     */
   52.12 +    if ( msr <= 0x1fff )
   52.13 +    {
   52.14 +        __clear_bit(msr*2, msr_bitmap + 0x000); 
   52.15 +        __clear_bit(msr*2+1, msr_bitmap + 0x000); 
   52.16 +    }
   52.17 +    else if ( (msr >= 0xc0000000) && (msr <= 0xc0001fff) )
   52.18 +    {
   52.19 +        msr &= 0x1fff;
   52.20 +        __clear_bit(msr*2, msr_bitmap + 0x800);
   52.21 +        __clear_bit(msr*2+1, msr_bitmap + 0x800);
   52.22 +    } 
   52.23 +    else if ( (msr >= 0xc001000) && (msr <= 0xc0011fff) )
   52.24 +    {
   52.25 +        msr &= 0x1fff;
   52.26 +        __clear_bit(msr*2, msr_bitmap + 0x1000);
   52.27 +        __clear_bit(msr*2+1, msr_bitmap + 0x1000);
   52.28 +    }
   52.29 +}
   52.30 +
   52.31  static int construct_vmcb(struct vcpu *v)
   52.32  {
   52.33      struct arch_svm_struct *arch_svm = &v->arch.hvm_svm;
   52.34 @@ -114,6 +138,10 @@ static int construct_vmcb(struct vcpu *v
   52.35      if ( arch_svm->msrpm == NULL )
   52.36          return -ENOMEM;
   52.37      memset(arch_svm->msrpm, 0xff, MSRPM_SIZE);
   52.38 +
   52.39 +    disable_intercept_for_msr((char *)arch_svm->msrpm, MSR_FS_BASE);
   52.40 +    disable_intercept_for_msr((char *)arch_svm->msrpm, MSR_GS_BASE);
   52.41 +
   52.42      vmcb->msrpm_base_pa = (u64)virt_to_maddr(arch_svm->msrpm);
   52.43      vmcb->iopm_base_pa  = (u64)virt_to_maddr(hvm_io_bitmap);
   52.44  
    53.1 --- a/xen/arch/x86/hvm/vmx/vmcs.c	Fri Mar 30 17:18:42 2007 -0600
    53.2 +++ b/xen/arch/x86/hvm/vmx/vmcs.c	Tue Apr 03 13:04:51 2007 -0600
    53.3 @@ -289,7 +289,7 @@ static void construct_vmcs(struct vcpu *
    53.4      v->arch.hvm_vcpu.u.vmx.exec_control = vmx_cpu_based_exec_control;
    53.5  
    53.6      if ( cpu_has_vmx_msr_bitmap )
    53.7 -        __vmwrite(MSR_BITMAP, virt_to_maddr(hvm_msr_bitmap));
    53.8 +        __vmwrite(MSR_BITMAP, virt_to_maddr(vmx_msr_bitmap));
    53.9  
   53.10      /* I/O access bitmap. */
   53.11      __vmwrite(IO_BITMAP_A, virt_to_maddr(hvm_io_bitmap));
    54.1 --- a/xen/arch/x86/hvm/vmx/vmx.c	Fri Mar 30 17:18:42 2007 -0600
    54.2 +++ b/xen/arch/x86/hvm/vmx/vmx.c	Tue Apr 03 13:04:51 2007 -0600
    54.3 @@ -51,6 +51,8 @@
    54.4  #include <public/hvm/save.h>
    54.5  #include <asm/hvm/trace.h>
    54.6  
    54.7 +char *vmx_msr_bitmap;
    54.8 +
    54.9  static void vmx_ctxt_switch_from(struct vcpu *v);
   54.10  static void vmx_ctxt_switch_to(struct vcpu *v);
   54.11  
   54.12 @@ -1005,14 +1007,14 @@ static void disable_intercept_for_msr(u3
   54.13       */
   54.14      if ( msr <= 0x1fff )
   54.15      {
   54.16 -        __clear_bit(msr, hvm_msr_bitmap + 0x000); /* read-low */
   54.17 -        __clear_bit(msr, hvm_msr_bitmap + 0x800); /* write-low */
   54.18 +        __clear_bit(msr, vmx_msr_bitmap + 0x000); /* read-low */
   54.19 +        __clear_bit(msr, vmx_msr_bitmap + 0x800); /* write-low */
   54.20      }
   54.21      else if ( (msr >= 0xc0000000) && (msr <= 0xc0001fff) )
   54.22      {
   54.23          msr &= 0x1fff;
   54.24 -        __clear_bit(msr, hvm_msr_bitmap + 0x400); /* read-high */
   54.25 -        __clear_bit(msr, hvm_msr_bitmap + 0xc00); /* write-high */
   54.26 +        __clear_bit(msr, vmx_msr_bitmap + 0x400); /* read-high */
   54.27 +        __clear_bit(msr, vmx_msr_bitmap + 0xc00); /* write-high */
   54.28      }
   54.29  }
   54.30  
   54.31 @@ -1105,6 +1107,9 @@ int start_vmx(void)
   54.32      if ( cpu_has_vmx_msr_bitmap )
   54.33      {
   54.34          printk("VMX: MSR intercept bitmap enabled\n");
   54.35 +        vmx_msr_bitmap = alloc_xenheap_page();
   54.36 +        BUG_ON(vmx_msr_bitmap == NULL);
   54.37 +        memset(vmx_msr_bitmap, ~0, PAGE_SIZE);
   54.38          disable_intercept_for_msr(MSR_FS_BASE);
   54.39          disable_intercept_for_msr(MSR_GS_BASE);
   54.40      }
    55.1 --- a/xen/arch/x86/mm.c	Fri Mar 30 17:18:42 2007 -0600
    55.2 +++ b/xen/arch/x86/mm.c	Tue Apr 03 13:04:51 2007 -0600
    55.3 @@ -806,7 +806,8 @@ void put_page_from_l1e(l1_pgentry_t l1e,
    55.4       * (Note that the undestroyable active grants are not a security hole in
    55.5       * Xen. All active grants can safely be cleaned up when the domain dies.)
    55.6       */
    55.7 -    if ( (l1e_get_flags(l1e) & _PAGE_GNTTAB) && !d->is_shutdown && !d->is_dying )
    55.8 +    if ( (l1e_get_flags(l1e) & _PAGE_GNTTAB) &&
    55.9 +         !d->is_shutting_down && !d->is_dying )
   55.10      {
   55.11          MEM_LOG("Attempt to implicitly unmap a granted PTE %" PRIpte,
   55.12                  l1e_get_intpte(l1e));
    56.1 --- a/xen/arch/x86/mm/hap/hap.c	Fri Mar 30 17:18:42 2007 -0600
    56.2 +++ b/xen/arch/x86/mm/hap/hap.c	Tue Apr 03 13:04:51 2007 -0600
    56.3 @@ -52,7 +52,7 @@
    56.4  /************************************************/
    56.5  /*             HAP SUPPORT FUNCTIONS            */
    56.6  /************************************************/
    56.7 -mfn_t hap_alloc(struct domain *d, unsigned long backpointer)
    56.8 +mfn_t hap_alloc(struct domain *d)
    56.9  {
   56.10      struct page_info *sp = NULL;
   56.11      void *p;
   56.12 @@ -82,43 +82,43 @@ void hap_free(struct domain *d, mfn_t sm
   56.13      list_add_tail(&sp->list, &d->arch.paging.hap.freelists);
   56.14  }
   56.15  
   56.16 -static int hap_alloc_p2m_pages(struct domain *d)
   56.17 -{
   56.18 -    struct page_info *pg;
   56.19 -
   56.20 -    ASSERT(hap_locked_by_me(d));
   56.21 -
   56.22 -    pg = mfn_to_page(hap_alloc(d, 0));
   56.23 -    d->arch.paging.hap.p2m_pages += 1;
   56.24 -    d->arch.paging.hap.total_pages -= 1;
   56.25 -    
   56.26 -    page_set_owner(pg, d);
   56.27 -    pg->count_info = 1;
   56.28 -    list_add_tail(&pg->list, &d->arch.paging.hap.p2m_freelist);
   56.29 -
   56.30 -    return 1;
   56.31 -}
   56.32 -
   56.33  struct page_info * hap_alloc_p2m_page(struct domain *d)
   56.34  {
   56.35 -    struct list_head *entry;
   56.36      struct page_info *pg;
   56.37      mfn_t mfn;
   56.38      void *p;
   56.39  
   56.40      hap_lock(d);
   56.41 -    
   56.42 -    if ( list_empty(&d->arch.paging.hap.p2m_freelist) && 
   56.43 -         !hap_alloc_p2m_pages(d) ) {
   56.44 -        hap_unlock(d);
   56.45 -        return NULL;
   56.46 +
   56.47 +#if CONFIG_PAGING_LEVELS == 3
   56.48 +    /* Under PAE mode, top-level P2M table should be allocated below 4GB space
   56.49 +     * because the size of h_cr3 is only 32-bit. We use alloc_domheap_pages to 
   56.50 +     * force this requirement. This page will be de-allocated in 
   56.51 +     * hap_free_p2m_page(), like other P2M pages.
   56.52 +    */
   56.53 +    if ( d->arch.paging.hap.p2m_pages == 0 ) 
   56.54 +    {
   56.55 +	pg = alloc_domheap_pages(NULL, 0, MEMF_bits(32));
   56.56 +	d->arch.paging.hap.p2m_pages += 1;
   56.57      }
   56.58 -    entry = d->arch.paging.hap.p2m_freelist.next;
   56.59 -    list_del(entry);
   56.60 -    
   56.61 +    else
   56.62 +#endif
   56.63 +    {
   56.64 +	pg = mfn_to_page(hap_alloc(d));
   56.65 +	
   56.66 +	d->arch.paging.hap.p2m_pages += 1;
   56.67 +	d->arch.paging.hap.total_pages -= 1;
   56.68 +    }	
   56.69 +
   56.70 +    if ( pg == NULL ) {
   56.71 +	hap_unlock(d);
   56.72 +	return NULL;
   56.73 +    }   
   56.74 +
   56.75      hap_unlock(d);
   56.76  
   56.77 -    pg = list_entry(entry, struct page_info, list);
   56.78 +    page_set_owner(pg, d);
   56.79 +    pg->count_info = 1;
   56.80      mfn = page_to_mfn(pg);
   56.81      p = hap_map_domain_page(mfn);
   56.82      clear_page(p);
   56.83 @@ -141,6 +141,7 @@ void hap_free_p2m_page(struct domain *d,
   56.84      page_set_owner(pg, NULL); 
   56.85      free_domheap_pages(pg, 0);
   56.86      d->arch.paging.hap.p2m_pages--;
   56.87 +    ASSERT( d->arch.paging.hap.p2m_pages >= 0 );
   56.88  }
   56.89  
   56.90  /* Return the size of the pool, rounded up to the nearest MB */
   56.91 @@ -320,7 +321,7 @@ mfn_t hap_make_monitor_table(struct vcpu
   56.92  #if CONFIG_PAGING_LEVELS == 4
   56.93      {
   56.94          mfn_t m4mfn;
   56.95 -        m4mfn = hap_alloc(d, 0);
   56.96 +        m4mfn = hap_alloc(d);
   56.97          hap_install_xen_entries_in_l4(v, m4mfn, m4mfn);
   56.98          return m4mfn;
   56.99      }
  56.100 @@ -331,12 +332,12 @@ mfn_t hap_make_monitor_table(struct vcpu
  56.101          l2_pgentry_t *l2e;
  56.102          int i;
  56.103  
  56.104 -        m3mfn = hap_alloc(d, 0);
  56.105 +        m3mfn = hap_alloc(d);
  56.106  
  56.107          /* Install a monitor l2 table in slot 3 of the l3 table.
  56.108           * This is used for all Xen entries, including linear maps
  56.109           */
  56.110 -        m2mfn = hap_alloc(d, 0);
  56.111 +        m2mfn = hap_alloc(d);
  56.112          l3e = hap_map_domain_page(m3mfn);
  56.113          l3e[3] = l3e_from_pfn(mfn_x(m2mfn), _PAGE_PRESENT);
  56.114          hap_install_xen_entries_in_l2h(v, m2mfn);
  56.115 @@ -357,7 +358,7 @@ mfn_t hap_make_monitor_table(struct vcpu
  56.116      {
  56.117          mfn_t m2mfn;
  56.118          
  56.119 -        m2mfn = hap_alloc(d, 0);
  56.120 +        m2mfn = hap_alloc(d);
  56.121          hap_install_xen_entries_in_l2(v, m2mfn, m2mfn);
  56.122      
  56.123          return m2mfn;
  56.124 @@ -390,7 +391,6 @@ void hap_domain_init(struct domain *d)
  56.125  {
  56.126      hap_lock_init(d);
  56.127      INIT_LIST_HEAD(&d->arch.paging.hap.freelists);
  56.128 -    INIT_LIST_HEAD(&d->arch.paging.hap.p2m_freelist);
  56.129  }
  56.130  
  56.131  /* return 0 for success, -errno for failure */
    57.1 --- a/xen/arch/x86/mm/shadow/multi.c	Fri Mar 30 17:18:42 2007 -0600
    57.2 +++ b/xen/arch/x86/mm/shadow/multi.c	Tue Apr 03 13:04:51 2007 -0600
    57.3 @@ -2823,8 +2823,8 @@ static int sh_page_fault(struct vcpu *v,
    57.4           * are OK, this can only have been caused by a failed
    57.5           * shadow_set_l*e(), which will have crashed the guest.
    57.6           * Get out of the fault handler immediately. */
    57.7 -        ASSERT(d->is_shutdown);
    57.8 -        unmap_walk(v, &gw); 
    57.9 +        ASSERT(d->is_shutting_down);
   57.10 +        unmap_walk(v, &gw);
   57.11          shadow_unlock(d);
   57.12          return 0;
   57.13      }
    58.1 --- a/xen/arch/x86/setup.c	Fri Mar 30 17:18:42 2007 -0600
    58.2 +++ b/xen/arch/x86/setup.c	Tue Apr 03 13:04:51 2007 -0600
    58.3 @@ -591,8 +591,6 @@ void __init __start_xen(multiboot_info_t
    58.4  
    58.5      numa_initmem_init(0, max_page);
    58.6  
    58.7 -    end_boot_allocator();
    58.8 -
    58.9      /* Initialise the Xen heap, skipping RAM holes. */
   58.10      nr_pages = 0;
   58.11      for ( i = 0; i < e820.nr_map; i++ )
   58.12 @@ -618,6 +616,8 @@ void __init __start_xen(multiboot_info_t
   58.13             nr_pages >> (20 - PAGE_SHIFT),
   58.14             nr_pages << (PAGE_SHIFT - 10));
   58.15  
   58.16 +    end_boot_allocator();
   58.17 +
   58.18      early_boot = 0;
   58.19  
   58.20      early_cpu_init();
    59.1 --- a/xen/arch/x86/time.c	Fri Mar 30 17:18:42 2007 -0600
    59.2 +++ b/xen/arch/x86/time.c	Tue Apr 03 13:04:51 2007 -0600
    59.3 @@ -573,7 +573,7 @@ static void init_platform_timer(void)
    59.4   * machines were long is 32-bit! (However, as time_t is signed, we
    59.5   * will already get problems at other places on 2038-01-19 03:14:08)
    59.6   */
    59.7 -static inline unsigned long
    59.8 +unsigned long
    59.9  mktime (unsigned int year, unsigned int mon,
   59.10          unsigned int day, unsigned int hour,
   59.11          unsigned int min, unsigned int sec)
    60.1 --- a/xen/arch/x86/traps.c	Fri Mar 30 17:18:42 2007 -0600
    60.2 +++ b/xen/arch/x86/traps.c	Tue Apr 03 13:04:51 2007 -0600
    60.3 @@ -285,23 +285,32 @@ void show_xen_trace()
    60.4      show_trace(&regs);
    60.5  }
    60.6  
    60.7 -void show_stack_overflow(unsigned long esp)
    60.8 +void show_stack_overflow(unsigned int cpu, unsigned long esp)
    60.9  {
   60.10  #ifdef MEMORY_GUARD
   60.11 -    unsigned long esp_top;
   60.12 +    unsigned long esp_top, esp_bottom;
   60.13      unsigned long *stack, addr;
   60.14  
   60.15 -    esp_top = (esp | (STACK_SIZE - 1)) - DEBUG_STACK_SIZE;
   60.16 +    esp_bottom = (esp | (STACK_SIZE - 1)) + 1;
   60.17 +    esp_top    = esp_bottom - DEBUG_STACK_SIZE;
   60.18 +
   60.19 +    printk("Valid stack range: %p-%p, sp=%p, tss.esp0=%p\n",
   60.20 +           (void *)esp_top, (void *)esp_bottom, (void *)esp,
   60.21 +           (void *)init_tss[cpu].esp0);
   60.22  
   60.23      /* Trigger overflow trace if %esp is within 512 bytes of the guard page. */
   60.24      if ( ((unsigned long)(esp - esp_top) > 512) &&
   60.25           ((unsigned long)(esp_top - esp) > 512) )
   60.26 +    {
   60.27 +        printk("No stack overflow detected. Skipping stack trace.\n");
   60.28          return;
   60.29 +    }
   60.30  
   60.31      if ( esp < esp_top )
   60.32          esp = esp_top;
   60.33  
   60.34 -    printk("Xen stack overflow:\n   ");
   60.35 +    printk("Xen stack overflow (dumping trace %p-%p):\n   ",
   60.36 +           (void *)esp, (void *)esp_bottom);
   60.37  
   60.38      stack = (unsigned long *)esp;
   60.39      while ( ((long)stack & (STACK_SIZE-BYTES_PER_LONG)) != 0 )
    61.1 --- a/xen/arch/x86/x86_32/traps.c	Fri Mar 30 17:18:42 2007 -0600
    61.2 +++ b/xen/arch/x86/x86_32/traps.c	Tue Apr 03 13:04:51 2007 -0600
    61.3 @@ -139,7 +139,7 @@ void show_page_walk(unsigned long addr)
    61.4      unmap_domain_page(l1t);
    61.5  }
    61.6  
    61.7 -#define DOUBLEFAULT_STACK_SIZE 1024
    61.8 +#define DOUBLEFAULT_STACK_SIZE 2048
    61.9  static struct tss_struct doublefault_tss;
   61.10  static unsigned char doublefault_stack[DOUBLEFAULT_STACK_SIZE];
   61.11  
   61.12 @@ -167,7 +167,7 @@ asmlinkage void do_double_fault(void)
   61.13             tss->esi, tss->edi, tss->ebp, tss->esp);
   61.14      printk("ds: %04x   es: %04x   fs: %04x   gs: %04x   ss: %04x\n",
   61.15             tss->ds, tss->es, tss->fs, tss->gs, tss->ss);
   61.16 -    show_stack_overflow(tss->esp);
   61.17 +    show_stack_overflow(cpu, tss->esp);
   61.18  
   61.19      panic("DOUBLE FAULT -- system shutdown\n");
   61.20  }
   61.21 @@ -268,8 +268,7 @@ void __init percpu_traps_init(void)
   61.22      tss->ds     = __HYPERVISOR_DS;
   61.23      tss->es     = __HYPERVISOR_DS;
   61.24      tss->ss     = __HYPERVISOR_DS;
   61.25 -    tss->esp    = (unsigned long)
   61.26 -        &doublefault_stack[DOUBLEFAULT_STACK_SIZE];
   61.27 +    tss->esp    = (unsigned long)&doublefault_stack[DOUBLEFAULT_STACK_SIZE];
   61.28      tss->__cr3  = __pa(idle_pg_table);
   61.29      tss->cs     = __HYPERVISOR_CS;
   61.30      tss->eip    = (unsigned long)do_double_fault;
    62.1 --- a/xen/arch/x86/x86_64/traps.c	Fri Mar 30 17:18:42 2007 -0600
    62.2 +++ b/xen/arch/x86/x86_64/traps.c	Tue Apr 03 13:04:51 2007 -0600
    62.3 @@ -171,7 +171,7 @@ asmlinkage void do_double_fault(struct c
    62.4      printk("r12: %016lx   r13: %016lx   r14: %016lx\n",
    62.5             regs->r12, regs->r13, regs->r14);
    62.6      printk("r15: %016lx\n", regs->r15);
    62.7 -    show_stack_overflow(regs->rsp);
    62.8 +    show_stack_overflow(cpu, regs->rsp);
    62.9  
   62.10      panic("DOUBLE FAULT -- system shutdown\n");
   62.11  }
   62.12 @@ -270,18 +270,18 @@ void __init percpu_traps_init(void)
   62.13      stack_bottom = (char *)get_stack_bottom();
   62.14      stack        = (char *)((unsigned long)stack_bottom & ~(STACK_SIZE - 1));
   62.15  
   62.16 -    /* Double-fault handler has its own per-CPU 1kB stack. */
   62.17 -    init_tss[cpu].ist[0] = (unsigned long)&stack[1024];
   62.18 +    /* Double-fault handler has its own per-CPU 2kB stack. */
   62.19 +    init_tss[cpu].ist[0] = (unsigned long)&stack[2048];
   62.20  
   62.21      /* NMI handler has its own per-CPU 1kB stack. */
   62.22 -    init_tss[cpu].ist[1] = (unsigned long)&stack[2048];
   62.23 +    init_tss[cpu].ist[1] = (unsigned long)&stack[3072];
   62.24  
   62.25      /*
   62.26       * Trampoline for SYSCALL entry from long mode.
   62.27       */
   62.28  
   62.29      /* Skip the NMI and DF stacks. */
   62.30 -    stack = &stack[2048];
   62.31 +    stack = &stack[3072];
   62.32      wrmsr(MSR_LSTAR, (unsigned long)stack, ((unsigned long)stack>>32));
   62.33  
   62.34      /* movq %rsp, saversp(%rip) */
    63.1 --- a/xen/common/domain.c	Fri Mar 30 17:18:42 2007 -0600
    63.2 +++ b/xen/common/domain.c	Tue Apr 03 13:04:51 2007 -0600
    63.3 @@ -59,6 +59,7 @@ struct domain *alloc_domain(domid_t domi
    63.4      atomic_set(&d->refcnt, 1);
    63.5      spin_lock_init(&d->big_lock);
    63.6      spin_lock_init(&d->page_alloc_lock);
    63.7 +    spin_lock_init(&d->shutdown_lock);
    63.8      INIT_LIST_HEAD(&d->page_list);
    63.9      INIT_LIST_HEAD(&d->xenpage_list);
   63.10  
   63.11 @@ -83,6 +84,45 @@ void free_domain(struct domain *d)
   63.12      xfree(d);
   63.13  }
   63.14  
   63.15 +static void __domain_finalise_shutdown(struct domain *d)
   63.16 +{
   63.17 +    struct vcpu *v;
   63.18 +
   63.19 +    BUG_ON(!spin_is_locked(&d->shutdown_lock));
   63.20 +
   63.21 +    if ( d->is_shut_down )
   63.22 +        return;
   63.23 +
   63.24 +    for_each_vcpu ( d, v )
   63.25 +        if ( !v->paused_for_shutdown )
   63.26 +            return;
   63.27 +
   63.28 +    d->is_shut_down = 1;
   63.29 +
   63.30 +    for_each_vcpu ( d, v )
   63.31 +        vcpu_sleep_nosync(v);
   63.32 +
   63.33 +    send_guest_global_virq(dom0, VIRQ_DOM_EXC);
   63.34 +}
   63.35 +
   63.36 +static void vcpu_check_shutdown(struct vcpu *v)
   63.37 +{
   63.38 +    struct domain *d = v->domain;
   63.39 +
   63.40 +    spin_lock(&d->shutdown_lock);
   63.41 +
   63.42 +    if ( d->is_shutting_down )
   63.43 +    {
   63.44 +        if ( !v->paused_for_shutdown )
   63.45 +            atomic_inc(&v->pause_count);
   63.46 +        v->paused_for_shutdown = 1;
   63.47 +        v->defer_shutdown = 0;
   63.48 +        __domain_finalise_shutdown(d);
   63.49 +    }
   63.50 +
   63.51 +    spin_unlock(&d->shutdown_lock);
   63.52 +}
   63.53 +
   63.54  struct vcpu *alloc_vcpu(
   63.55      struct domain *d, unsigned int vcpu_id, unsigned int cpu_id)
   63.56  {
   63.57 @@ -122,6 +162,9 @@ struct vcpu *alloc_vcpu(
   63.58      if ( vcpu_id != 0 )
   63.59          d->vcpu[v->vcpu_id-1]->next_in_list = v;
   63.60  
   63.61 +    /* Must be called after making new vcpu visible to for_each_vcpu(). */
   63.62 +    vcpu_check_shutdown(v);
   63.63 +
   63.64      return v;
   63.65  }
   63.66  
   63.67 @@ -286,7 +329,7 @@ void domain_kill(struct domain *d)
   63.68  
   63.69  void __domain_crash(struct domain *d)
   63.70  {
   63.71 -    if ( d->is_shutdown )
   63.72 +    if ( d->is_shutting_down )
   63.73      {
   63.74          /* Print nothing: the domain is already shutting down. */
   63.75      }
   63.76 @@ -335,16 +378,73 @@ void domain_shutdown(struct domain *d, u
   63.77      if ( d->domain_id == 0 )
   63.78          dom0_shutdown(reason);
   63.79  
   63.80 -    atomic_inc(&d->pause_count);
   63.81 -    if ( !xchg(&d->is_shutdown, 1) )
   63.82 -        d->shutdown_code = reason;
   63.83 -    else
   63.84 -        domain_unpause(d);
   63.85 +    spin_lock(&d->shutdown_lock);
   63.86 +
   63.87 +    if ( d->is_shutting_down )
   63.88 +    {
   63.89 +        spin_unlock(&d->shutdown_lock);
   63.90 +        return;
   63.91 +    }
   63.92 +
   63.93 +    d->is_shutting_down = 1;
   63.94 +    d->shutdown_code = reason;
   63.95 +
   63.96 +    smp_mb(); /* set shutdown status /then/ check for per-cpu deferrals */
   63.97  
   63.98      for_each_vcpu ( d, v )
   63.99 -        vcpu_sleep_nosync(v);
  63.100 +    {
  63.101 +        if ( v->defer_shutdown )
  63.102 +            continue;
  63.103 +        atomic_inc(&v->pause_count);
  63.104 +        v->paused_for_shutdown = 1;
  63.105 +    }
  63.106 +
  63.107 +    __domain_finalise_shutdown(d);
  63.108 +
  63.109 +    spin_unlock(&d->shutdown_lock);
  63.110 +}
  63.111 +
  63.112 +void domain_resume(struct domain *d)
  63.113 +{
  63.114 +    struct vcpu *v;
  63.115 +
  63.116 +    /*
  63.117 +     * Some code paths assume that shutdown status does not get reset under
  63.118 +     * their feet (e.g., some assertions make this assumption).
  63.119 +     */
  63.120 +    domain_pause(d);
  63.121 +
  63.122 +    spin_lock(&d->shutdown_lock);
  63.123 +
  63.124 +    d->is_shutting_down = d->is_shut_down = 0;
  63.125  
  63.126 -    send_guest_global_virq(dom0, VIRQ_DOM_EXC);
  63.127 +    for_each_vcpu ( d, v )
  63.128 +    {
  63.129 +        if ( v->paused_for_shutdown )
  63.130 +            vcpu_unpause(v);
  63.131 +        v->paused_for_shutdown = 0;
  63.132 +    }
  63.133 +
  63.134 +    spin_unlock(&d->shutdown_lock);
  63.135 +
  63.136 +    domain_unpause(d);
  63.137 +}
  63.138 +
  63.139 +int vcpu_start_shutdown_deferral(struct vcpu *v)
  63.140 +{
  63.141 +    v->defer_shutdown = 1;
  63.142 +    smp_mb(); /* set deferral status /then/ check for shutdown */
  63.143 +    if ( unlikely(v->domain->is_shutting_down) )
  63.144 +        vcpu_check_shutdown(v);
  63.145 +    return v->defer_shutdown;
  63.146 +}
  63.147 +
  63.148 +void vcpu_end_shutdown_deferral(struct vcpu *v)
  63.149 +{
  63.150 +    v->defer_shutdown = 0;
  63.151 +    smp_mb(); /* clear deferral status /then/ check for shutdown */
  63.152 +    if ( unlikely(v->domain->is_shutting_down) )
  63.153 +        vcpu_check_shutdown(v);
  63.154  }
  63.155  
  63.156  void domain_pause_for_debugger(void)
  63.157 @@ -425,7 +525,6 @@ void vcpu_pause_nosync(struct vcpu *v)
  63.158  
  63.159  void vcpu_unpause(struct vcpu *v)
  63.160  {
  63.161 -    ASSERT(v != current);
  63.162      if ( atomic_dec_and_test(&v->pause_count) )
  63.163          vcpu_wake(v);
  63.164  }
  63.165 @@ -446,8 +545,6 @@ void domain_unpause(struct domain *d)
  63.166  {
  63.167      struct vcpu *v;
  63.168  
  63.169 -    ASSERT(d != current->domain);
  63.170 -
  63.171      if ( atomic_dec_and_test(&d->pause_count) )
  63.172          for_each_vcpu( d, v )
  63.173              vcpu_wake(v);
    64.1 --- a/xen/common/domctl.c	Fri Mar 30 17:18:42 2007 -0600
    64.2 +++ b/xen/common/domctl.c	Tue Apr 03 13:04:51 2007 -0600
    64.3 @@ -115,7 +115,7 @@ void getdomaininfo(struct domain *d, str
    64.4  
    64.5      info->flags = flags |
    64.6          (d->is_dying                ? XEN_DOMINF_dying    : 0) |
    64.7 -        (d->is_shutdown             ? XEN_DOMINF_shutdown : 0) |
    64.8 +        (d->is_shut_down            ? XEN_DOMINF_shutdown : 0) |
    64.9          (d->is_paused_by_controller ? XEN_DOMINF_paused   : 0) |
   64.10          d->shutdown_code << XEN_DOMINF_shutdownshift;
   64.11  
   64.12 @@ -287,8 +287,7 @@ long do_domctl(XEN_GUEST_HANDLE(xen_domc
   64.13          if ( d == NULL )
   64.14              break;
   64.15  
   64.16 -        if ( xchg(&d->is_shutdown, 0) )
   64.17 -            domain_unpause(d);
   64.18 +        domain_resume(d);
   64.19          rcu_unlock_domain(d);
   64.20          ret = 0;
   64.21      }
    65.1 --- a/xen/common/page_alloc.c	Fri Mar 30 17:18:42 2007 -0600
    65.2 +++ b/xen/common/page_alloc.c	Tue Apr 03 13:04:51 2007 -0600
    65.3 @@ -512,6 +512,14 @@ void init_heap_pages(
    65.4  
    65.5      ASSERT(zone < NR_ZONES);
    65.6  
    65.7 +    if ( unlikely(avail[0] == NULL) )
    65.8 +    {
    65.9 +        /* Start-of-day memory node 0 initialisation. */
   65.10 +        init_heap_block(&_heap0);
   65.11 +        _heap[0] = &_heap0;
   65.12 +        avail[0] = avail0;
   65.13 +    }
   65.14 +
   65.15      if ( likely(page_to_mfn(pg) != 0) )
   65.16          nid_prev = phys_to_nid(page_to_maddr(pg-1));
   65.17      else
   65.18 @@ -570,10 +578,6 @@ void end_boot_allocator(void)
   65.19      unsigned long i;
   65.20      int curr_free, next_free;
   65.21  
   65.22 -    init_heap_block(&_heap0);
   65.23 -    _heap[0] = &_heap0;
   65.24 -    avail[0] = avail0;
   65.25 -
   65.26      /* Pages that are free now go to the domain sub-allocator. */
   65.27      if ( (curr_free = next_free = !allocated_in_map(first_valid_mfn)) )
   65.28          map_alloc(first_valid_mfn, 1);
    66.1 --- a/xen/common/symbols.c	Fri Mar 30 17:18:42 2007 -0600
    66.2 +++ b/xen/common/symbols.c	Tue Apr 03 13:04:51 2007 -0600
    66.3 @@ -16,6 +16,7 @@
    66.4  #include <xen/init.h>
    66.5  #include <xen/lib.h>
    66.6  #include <xen/string.h>
    66.7 +#include <xen/spinlock.h>
    66.8  
    66.9  extern unsigned long symbols_addresses[];
   66.10  extern unsigned long symbols_num_syms;
   66.11 @@ -140,12 +141,15 @@ const char *symbols_lookup(unsigned long
   66.12  void __print_symbol(const char *fmt, unsigned long address)
   66.13  {
   66.14      const char *name;
   66.15 -    unsigned long offset, size;
   66.16 -    char namebuf[KSYM_NAME_LEN+1];
   66.17 +    unsigned long offset, size, flags;
   66.18  
   66.19 +    static DEFINE_SPINLOCK(lock);
   66.20 +    static char namebuf[KSYM_NAME_LEN+1];
   66.21  #define BUFFER_SIZE sizeof("%s+%#lx/%#lx [%s]") + KSYM_NAME_LEN + \
   66.22  			2*(BITS_PER_LONG*3/10) + 1
   66.23 -    char buffer[BUFFER_SIZE];
   66.24 +    static char buffer[BUFFER_SIZE];
   66.25 +
   66.26 +    spin_lock_irqsave(&lock, flags);
   66.27  
   66.28      name = symbols_lookup(address, &size, &offset, namebuf);
   66.29  
   66.30 @@ -155,4 +159,6 @@ void __print_symbol(const char *fmt, uns
   66.31          snprintf(buffer, BUFFER_SIZE, "%s+%#lx/%#lx", name, offset, size);
   66.32  
   66.33      printk(fmt, buffer);
   66.34 +
   66.35 +    spin_unlock_irqrestore(&lock, flags);
   66.36  }
    67.1 --- a/xen/drivers/char/console.c	Fri Mar 30 17:18:42 2007 -0600
    67.2 +++ b/xen/drivers/char/console.c	Tue Apr 03 13:04:51 2007 -0600
    67.3 @@ -858,19 +858,20 @@ static int __init debugtrace_init(void)
    67.4  void panic(const char *fmt, ...)
    67.5  {
    67.6      va_list args;
    67.7 -    char buf[128];
    67.8      unsigned long flags;
    67.9      static DEFINE_SPINLOCK(lock);
   67.10 +    static char buf[128];
   67.11      
   67.12      debugtrace_dump();
   67.13  
   67.14 +    /* Protects buf[] and ensure multi-line message prints atomically. */
   67.15 +    spin_lock_irqsave(&lock, flags);
   67.16 +
   67.17      va_start(args, fmt);
   67.18      (void)vsnprintf(buf, sizeof(buf), fmt, args);
   67.19      va_end(args);
   67.20  
   67.21 -    /* Spit out multiline message in one go. */
   67.22      console_start_sync();
   67.23 -    spin_lock_irqsave(&lock, flags);
   67.24      printk("\n****************************************\n");
   67.25      printk("Panic on CPU %d:\n", smp_processor_id());
   67.26      printk(buf);
   67.27 @@ -879,6 +880,7 @@ void panic(const char *fmt, ...)
   67.28          printk("Manual reset required ('noreboot' specified)\n");
   67.29      else
   67.30          printk("Reboot in five seconds...\n");
   67.31 +
   67.32      spin_unlock_irqrestore(&lock, flags);
   67.33  
   67.34      debugger_trap_immediate();
    68.1 --- a/xen/include/asm-x86/domain.h	Fri Mar 30 17:18:42 2007 -0600
    68.2 +++ b/xen/include/asm-x86/domain.h	Tue Apr 03 13:04:51 2007 -0600
    68.3 @@ -115,7 +115,6 @@ struct hap_domain {
    68.4      const char       *locker_function;
    68.5      
    68.6      struct list_head  freelists;
    68.7 -    struct list_head  p2m_freelist;
    68.8      unsigned int      total_pages;  /* number of pages allocated */
    68.9      unsigned int      free_pages;   /* number of pages on freelists */
   68.10      unsigned int      p2m_pages;    /* number of pages allocates to p2m */
    69.1 --- a/xen/include/asm-x86/hvm/io.h	Fri Mar 30 17:18:42 2007 -0600
    69.2 +++ b/xen/include/asm-x86/hvm/io.h	Tue Apr 03 13:04:51 2007 -0600
    69.3 @@ -127,6 +127,7 @@ static inline int hvm_portio_intercept(i
    69.4  }
    69.5  
    69.6  extern int hvm_mmio_intercept(ioreq_t *p);
    69.7 +extern int hvm_buffered_io_send(ioreq_t *p);
    69.8  extern int hvm_buffered_io_intercept(ioreq_t *p);
    69.9  
   69.10  static inline int register_portio_handler(
   69.11 @@ -145,6 +146,7 @@ static inline int irq_masked(unsigned lo
   69.12  
   69.13  extern void send_pio_req(unsigned long port, unsigned long count, int size,
   69.14                           paddr_t value, int dir, int df, int value_is_ptr);
   69.15 +void send_timeoffset_req(unsigned long timeoff);
   69.16  extern void handle_mmio(unsigned long gpa);
   69.17  extern void hvm_interrupt_post(struct vcpu *v, int vector, int type);
   69.18  extern void hvm_io_assist(struct vcpu *v);
    70.1 --- a/xen/include/asm-x86/hvm/support.h	Fri Mar 30 17:18:42 2007 -0600
    70.2 +++ b/xen/include/asm-x86/hvm/support.h	Tue Apr 03 13:04:51 2007 -0600
    70.3 @@ -215,7 +215,6 @@ int hvm_load(struct domain *d, hvm_domai
    70.4  /* End of save/restore */
    70.5  
    70.6  extern char hvm_io_bitmap[];
    70.7 -extern char hvm_msr_bitmap[];
    70.8  extern int hvm_enabled;
    70.9  
   70.10  void hvm_enable(struct hvm_function_table *);
    71.1 --- a/xen/include/asm-x86/hvm/vmx/vmcs.h	Fri Mar 30 17:18:42 2007 -0600
    71.2 +++ b/xen/include/asm-x86/hvm/vmx/vmcs.h	Tue Apr 03 13:04:51 2007 -0600
    71.3 @@ -121,6 +121,7 @@ extern u32 vmx_vmentry_control;
    71.4  
    71.5  #define cpu_has_vmx_msr_bitmap \
    71.6      (vmx_cpu_based_exec_control & CPU_BASED_ACTIVATE_MSR_BITMAP)
    71.7 +extern char *vmx_msr_bitmap;
    71.8  
    71.9  /* VMCS Encordings */
   71.10  enum vmcs_field {
    72.1 --- a/xen/include/asm-x86/processor.h	Fri Mar 30 17:18:42 2007 -0600
    72.2 +++ b/xen/include/asm-x86/processor.h	Tue Apr 03 13:04:51 2007 -0600
    72.3 @@ -413,9 +413,9 @@ static always_inline void __mwait(unsign
    72.4  struct tss_struct {
    72.5      unsigned short	back_link,__blh;
    72.6  #ifdef __x86_64__
    72.7 -    u64 rsp0;
    72.8 -    u64 rsp1;
    72.9 -    u64 rsp2;
   72.10 +    union { u64 rsp0, esp0; };
   72.11 +    union { u64 rsp1, esp1; };
   72.12 +    union { u64 rsp2, esp2; };
   72.13      u64 reserved1;
   72.14      u64 ist[7];
   72.15      u64 reserved2;
   72.16 @@ -553,7 +553,7 @@ extern always_inline void prefetchw(cons
   72.17  
   72.18  void show_stack(struct cpu_user_regs *regs);
   72.19  void show_xen_trace(void);
   72.20 -void show_stack_overflow(unsigned long esp);
   72.21 +void show_stack_overflow(unsigned int cpu, unsigned long esp);
   72.22  void show_registers(struct cpu_user_regs *regs);
   72.23  void show_execution_state(struct cpu_user_regs *regs);
   72.24  void show_page_walk(unsigned long addr);
    73.1 --- a/xen/include/asm-x86/time.h	Fri Mar 30 17:18:42 2007 -0600
    73.2 +++ b/xen/include/asm-x86/time.h	Tue Apr 03 13:04:51 2007 -0600
    73.3 @@ -16,4 +16,9 @@ static inline cycles_t get_cycles(void)
    73.4      return c;
    73.5  }
    73.6  
    73.7 +unsigned long
    73.8 +mktime (unsigned int year, unsigned int mon,
    73.9 +        unsigned int day, unsigned int hour,
   73.10 +        unsigned int min, unsigned int sec);
   73.11 +
   73.12  #endif /* __X86_TIME_H__ */
    74.1 --- a/xen/include/public/hvm/ioreq.h	Fri Mar 30 17:18:42 2007 -0600
    74.2 +++ b/xen/include/public/hvm/ioreq.h	Tue Apr 03 13:04:51 2007 -0600
    74.3 @@ -39,6 +39,7 @@
    74.4  #define IOREQ_TYPE_XOR          4
    74.5  #define IOREQ_TYPE_XCHG         5
    74.6  #define IOREQ_TYPE_ADD          6
    74.7 +#define IOREQ_TYPE_TIMEOFFSET   7
    74.8  
    74.9  /*
   74.10   * VMExit dispatcher should cooperate with instruction decoder to
    75.1 --- a/xen/include/xen/sched.h	Fri Mar 30 17:18:42 2007 -0600
    75.2 +++ b/xen/include/xen/sched.h	Tue Apr 03 13:04:51 2007 -0600
    75.3 @@ -114,6 +114,10 @@ struct vcpu
    75.4      bool_t           nmi_pending;
    75.5      /* Avoid NMI reentry by allowing NMIs to be masked for short periods. */
    75.6      bool_t           nmi_masked;
    75.7 +    /* Require shutdown to be deferred for some asynchronous operation? */
    75.8 +    bool_t           defer_shutdown;
    75.9 +    /* VCPU is paused following shutdown request (d->is_shutting_down)? */
   75.10 +    bool_t           paused_for_shutdown;
   75.11  
   75.12      unsigned long    pause_flags;
   75.13      atomic_t         pause_count;
   75.14 @@ -193,7 +197,9 @@ struct domain
   75.15      bool_t           is_paused_by_controller;
   75.16  
   75.17      /* Guest has shut down (inc. reason code)? */
   75.18 -    bool_t           is_shutdown;
   75.19 +    spinlock_t       shutdown_lock;
   75.20 +    bool_t           is_shutting_down; /* in process of shutting down? */
   75.21 +    bool_t           is_shut_down;     /* fully shut down? */
   75.22      int              shutdown_code;
   75.23  
   75.24      atomic_t         pause_count;
   75.25 @@ -331,8 +337,12 @@ struct domain *get_domain_by_id(domid_t 
   75.26  void domain_destroy(struct domain *d);
   75.27  void domain_kill(struct domain *d);
   75.28  void domain_shutdown(struct domain *d, u8 reason);
   75.29 +void domain_resume(struct domain *d);
   75.30  void domain_pause_for_debugger(void);
   75.31  
   75.32 +int vcpu_start_shutdown_deferral(struct vcpu *v);
   75.33 +void vcpu_end_shutdown_deferral(struct vcpu *v);
   75.34 +
   75.35  /*
   75.36   * Mark specified domain as crashed. This function always returns, even if the
   75.37   * caller is the specified domain. The domain is not synchronously descheduled