ia64/xen-unstable

changeset 11446:3bed37b2c599

merge with xen-unstable.hg
author awilliam@xenbuild.aw
date Sun Sep 10 14:52:57 2006 -0600 (2006-09-10)
parents 5791030e6473 7acaba46e15e
children a1988768828d
files
line diff
     1.1 --- a/docs/man/xm.pod.1	Sun Sep 10 14:31:54 2006 -0600
     1.2 +++ b/docs/man/xm.pod.1	Sun Sep 10 14:52:57 2006 -0600
     1.3 @@ -432,7 +432,6 @@ bug report.
     1.4  Sample xen domain info looks as follows (lines wrapped manually to
     1.5  make the man page more readable):
     1.6  
     1.7 - system                 : Linux
     1.8   host                   : talon
     1.9   release                : 2.6.12.6-xen0
    1.10   version                : #1 Mon Nov 14 14:26:26 EST 2005
    1.11 @@ -444,13 +443,14 @@ make the man page more readable):
    1.12   threads_per_core       : 1
    1.13   cpu_mhz                : 696
    1.14   hw_caps                : 0383fbff:00000000:00000000:00000040
    1.15 - memory                 : 767
    1.16 + total_memory           : 767
    1.17   free_memory            : 37
    1.18   xen_major              : 3
    1.19   xen_minor              : 0
    1.20   xen_extra              : -devel
    1.21   xen_caps               : xen-3.0-x86_32
    1.22 - xen_params             : virt_start=0xfc000000
    1.23 + xen_pagesize           : 4096
    1.24 + platform_params        : virt_start=0xfc000000
    1.25   xen_changeset          : Mon Nov 14 18:13:38 2005 +0100 
    1.26                            7793:090e44133d40
    1.27   cc_compiler            : gcc version 3.4.3 (Mandrakelinux 
    1.28 @@ -458,6 +458,7 @@ make the man page more readable):
    1.29   cc_compile_by          : sdague
    1.30   cc_compile_domain      : (none)
    1.31   cc_compile_date        : Mon Nov 14 14:16:48 EST 2005
    1.32 + xend_config_format     : 2
    1.33  
    1.34  B<FIELDS>
    1.35  
     2.1 --- a/docs/src/user.tex	Sun Sep 10 14:31:54 2006 -0600
     2.2 +++ b/docs/src/user.tex	Sun Sep 10 14:52:57 2006 -0600
     2.3 @@ -1654,26 +1654,58 @@ Now unmount (this is important!):
     2.4  
     2.5  In the configuration file set:
     2.6  \begin{quote}
     2.7 -  \verb_disk = ['file:/full/path/to/vm1disk,sda1,w']_
     2.8 +  \verb_disk = ['tap:aio:/full/path/to/vm1disk,sda1,w']_
     2.9  \end{quote}
    2.10  
    2.11  As the virtual machine writes to its `disk', the sparse file will be
    2.12  filled in and consume more space up to the original 2GB.
    2.13  
    2.14 -{\bf Note that file-backed VBDs may not be appropriate for backing
    2.15 -  I/O-intensive domains.}  File-backed VBDs are known to experience
    2.16 +{\em{Note:}} Users that have worked with file-backed VBDs on Xen in previous
    2.17 +versions will be interested to know that this support is now provided through
    2.18 +the blktap driver instead of the loopback driver.  This change results in
    2.19 +file-based block devices that are higher-performance, more scalable, and which
    2.20 +provide better safety properties for VBD data.  All that is required to update
    2.21 +your existing file-backed VM configurations is to change VBD configuration
    2.22 +lines from:
    2.23 +\begin{quote}
    2.24 +  \verb_disk = ['file:/full/path/to/vm1disk,sda1,w']_
    2.25 +\end{quote}
    2.26 +to:
    2.27 +\begin{quote}
    2.28 +  \verb_disk = ['tap:aio:/full/path/to/vm1disk,sda1,w']_
    2.29 +\end{quote}
    2.30 +
    2.31 +
    2.32 +\subsection{Loopback-mounted file-backed VBDs (deprecated)}
    2.33 +
    2.34 +{\em{{\bf{Note:}} Loopback mounted VBDs have now been replaced with
    2.35 +    blktap-based support for raw image files, as described above.  This
    2.36 +    section remains to detail a configuration that was used by older Xen
    2.37 +    versions.}}
    2.38 +
    2.39 +Raw image file-backed VBDs amy also be attached to VMs using the 
    2.40 +Linux loopback driver.  The only required change to the raw file 
    2.41 +instructions above are to specify the configuration entry as:
    2.42 +\begin{quote}
    2.43 +  \verb_disk = ['file:/full/path/to/vm1disk,sda1,w']_
    2.44 +\end{quote}
    2.45 +
    2.46 +{\bf Note that loopback file-backed VBDs may not be appropriate for backing
    2.47 +  I/O-intensive domains.}  This approach is known to experience
    2.48  substantial slowdowns under heavy I/O workloads, due to the I/O
    2.49  handling by the loopback block device used to support file-backed VBDs
    2.50 -in dom0.  Better I/O performance can be achieved by using either
    2.51 -LVM-backed VBDs (Section~\ref{s:using-lvm-backed-vbds}) or physical
    2.52 -devices as VBDs (Section~\ref{s:exporting-physical-devices-as-vbds}).
    2.53 -
    2.54 -Linux supports a maximum of eight file-backed VBDs across all domains
    2.55 -by default.  This limit can be statically increased by using the
    2.56 -\emph{max\_loop} module parameter if CONFIG\_BLK\_DEV\_LOOP is
    2.57 -compiled as a module in the dom0 kernel, or by using the
    2.58 -\emph{max\_loop=n} boot option if CONFIG\_BLK\_DEV\_LOOP is compiled
    2.59 -directly into the dom0 kernel.
    2.60 +in dom0.  Loopbach support remains for old Xen installations, and users
    2.61 +are strongly encouraged to use the blktap-based file support (using 
    2.62 +``{\tt{tap:aio}}'' as described above).
    2.63 +
    2.64 +Additionally, Linux supports a maximum of eight loopback file-backed 
    2.65 +VBDs across all domains by default.  This limit can be statically 
    2.66 +increased by using the \emph{max\_loop} module parameter if 
    2.67 +CONFIG\_BLK\_DEV\_LOOP is compiled as a module in the dom0 kernel, or 
    2.68 +by using the \emph{max\_loop=n} boot option if CONFIG\_BLK\_DEV\_LOOP 
    2.69 +is compiled directly into the dom0 kernel.  Again, users are encouraged
    2.70 +to use the blktap-based file support described above which scales to much 
    2.71 +larger number of active VBDs.
    2.72  
    2.73  
    2.74  \section{Using LVM-backed VBDs}
     3.1 --- a/linux-2.6-xen-sparse/drivers/xen/console/console.c	Sun Sep 10 14:31:54 2006 -0600
     3.2 +++ b/linux-2.6-xen-sparse/drivers/xen/console/console.c	Sun Sep 10 14:52:57 2006 -0600
     3.3 @@ -182,17 +182,18 @@ static struct console kcons_info = {
     3.4  	.index	= -1,
     3.5  };
     3.6  
     3.7 -#define __RETCODE 0
     3.8  static int __init xen_console_init(void)
     3.9  {
    3.10  	if (!is_running_on_xen())
    3.11 -		return __RETCODE;
    3.12 +		goto out;
    3.13  
    3.14  	if (is_initial_xendomain()) {
    3.15  		if (xc_mode == XC_DEFAULT)
    3.16  			xc_mode = XC_SERIAL;
    3.17  		kcons_info.write = kcons_write_dom0;
    3.18  	} else {
    3.19 +		if (!xen_start_info->console.domU.evtchn)
    3.20 +			goto out;
    3.21  		if (xc_mode == XC_DEFAULT)
    3.22  			xc_mode = XC_TTY;
    3.23  		kcons_info.write = kcons_write;
    3.24 @@ -212,14 +213,15 @@ static int __init xen_console_init(void)
    3.25  		break;
    3.26  
    3.27  	default:
    3.28 -		return __RETCODE;
    3.29 +		goto out;
    3.30  	}
    3.31  
    3.32  	wbuf = alloc_bootmem(wbuf_size);
    3.33  
    3.34  	register_console(&kcons_info);
    3.35  
    3.36 -	return __RETCODE;
    3.37 + out:
    3.38 +	return 0;
    3.39  }
    3.40  console_initcall(xen_console_init);
    3.41  
    3.42 @@ -247,7 +249,9 @@ void xencons_force_flush(void)
    3.43  	int sz;
    3.44  
    3.45  	/* Emergency console is synchronous, so there's nothing to flush. */
    3.46 -	if (is_initial_xendomain())
    3.47 +	if (!is_running_on_xen() ||
    3.48 +	    is_initial_xendomain() ||
    3.49 +	    !xen_start_info->console.domU.evtchn)
    3.50  		return;
    3.51  
    3.52  	/* Spin until console data is flushed through to the daemon. */
    3.53 @@ -582,7 +586,11 @@ static int __init xencons_init(void)
    3.54  	if (xc_mode == XC_OFF)
    3.55  		return 0;
    3.56  
    3.57 -	xencons_ring_init();
    3.58 +	if (!is_initial_xendomain()) {
    3.59 +		rc = xencons_ring_init();
    3.60 +		if (rc)
    3.61 +			return rc;
    3.62 +	}
    3.63  
    3.64  	xencons_driver = alloc_tty_driver((xc_mode == XC_SERIAL) ?
    3.65  					  1 : MAX_NR_CONSOLES);
     4.1 --- a/linux-2.6-xen-sparse/drivers/xen/console/xencons_ring.c	Sun Sep 10 14:31:54 2006 -0600
     4.2 +++ b/linux-2.6-xen-sparse/drivers/xen/console/xencons_ring.c	Sun Sep 10 14:52:57 2006 -0600
     4.3 @@ -110,24 +110,26 @@ static irqreturn_t handle_input(int irq,
     4.4  
     4.5  int xencons_ring_init(void)
     4.6  {
     4.7 -	int err;
     4.8 +	int irq;
     4.9  
    4.10  	if (xencons_irq)
    4.11  		unbind_from_irqhandler(xencons_irq, NULL);
    4.12  	xencons_irq = 0;
    4.13  
    4.14 -	if (!xen_start_info->console.domU.evtchn)
    4.15 -		return 0;
    4.16 +	if (!is_running_on_xen() ||
    4.17 +	    is_initial_xendomain() ||
    4.18 +	    !xen_start_info->console.domU.evtchn)
    4.19 +		return -ENODEV;
    4.20  
    4.21 -	err = bind_evtchn_to_irqhandler(
    4.22 +	irq = bind_evtchn_to_irqhandler(
    4.23  		xen_start_info->console.domU.evtchn,
    4.24  		handle_input, 0, "xencons", NULL);
    4.25 -	if (err <= 0) {
    4.26 -		printk(KERN_ERR "XEN console request irq failed %i\n", err);
    4.27 -		return err;
    4.28 +	if (irq < 0) {
    4.29 +		printk(KERN_ERR "XEN console request irq failed %i\n", irq);
    4.30 +		return irq;
    4.31  	}
    4.32  
    4.33 -	xencons_irq = err;
    4.34 +	xencons_irq = irq;
    4.35  
    4.36  	/* In case we have in-flight data after save/restore... */
    4.37  	notify_daemon();
     5.1 --- a/linux-2.6-xen-sparse/drivers/xen/netback/xenbus.c	Sun Sep 10 14:31:54 2006 -0600
     5.2 +++ b/linux-2.6-xen-sparse/drivers/xen/netback/xenbus.c	Sun Sep 10 14:52:57 2006 -0600
     5.3 @@ -427,6 +427,14 @@ static int connect_rings(struct backend_
     5.4  		be->netif->dev->features |= NETIF_F_TSO;
     5.5  	}
     5.6  
     5.7 +	if (xenbus_scanf(XBT_NIL, dev->otherend, "feature-no-csum-offload",
     5.8 +			 "%d", &val) < 0)
     5.9 +		val = 0;
    5.10 +	if (val) {
    5.11 +		be->netif->features &= ~NETIF_F_IP_CSUM;
    5.12 +		be->netif->dev->features &= ~NETIF_F_IP_CSUM;
    5.13 +	}
    5.14 +
    5.15  	/* Map the shared frame, irq etc. */
    5.16  	err = netif_map(be->netif, tx_ring_ref, rx_ring_ref, evtchn);
    5.17  	if (err) {
     6.1 --- a/tools/blktap/drivers/block-qcow.c	Sun Sep 10 14:31:54 2006 -0600
     6.2 +++ b/tools/blktap/drivers/block-qcow.c	Sun Sep 10 14:52:57 2006 -0600
     6.3 @@ -237,6 +237,25 @@ static uint32_t gen_cksum(char *ptr, int
     6.4  	return ret;
     6.5  }
     6.6  
     6.7 +static int get_filesize(char *filename, uint64_t *size, struct stat *st)
     6.8 +{
     6.9 +	int blockfd;
    6.10 +
    6.11 +	/*Set to the backing file size*/
    6.12 +	if(S_ISBLK(st->st_mode)) {
    6.13 +		blockfd = open(filename, O_RDONLY);
    6.14 +		if (blockfd < 0)
    6.15 +			return -1;
    6.16 +		if (ioctl(blockfd,BLKGETSIZE,size)!=0) {
    6.17 +			printf("Unable to get Block device size\n");
    6.18 +			close(blockfd);
    6.19 +			return -1;
    6.20 +		}
    6.21 +		close(blockfd);
    6.22 +	} else *size = (st->st_size >> SECTOR_SHIFT);	
    6.23 +	return 0;
    6.24 +}
    6.25 +
    6.26  static int qcow_set_key(struct td_state *bs, const char *key)
    6.27  {
    6.28  	struct tdqcow_state *s = (struct tdqcow_state *)bs->private;
    6.29 @@ -1204,12 +1223,14 @@ int qcow_create(const char *filename, ui
    6.30  			header_size += backing_filename_len;
    6.31  			
    6.32  			/*Set to the backing file size*/
    6.33 -			size = (st.st_size >> SECTOR_SHIFT);
    6.34 +			if(get_filesize(backing_filename, &size, &st)) {
    6.35 +				return -1;
    6.36 +			}
    6.37  			DPRINTF("Backing file size detected: %lld sectors" 
    6.38  				"(total %lld [%lld MB])\n", 
    6.39 -				(long long)total_size, 
    6.40 -				(long long)(total_size << SECTOR_SHIFT), 
    6.41 -				(long long)(total_size >> 11));
    6.42 +				(long long)size, 
    6.43 +				(long long)(size << SECTOR_SHIFT), 
    6.44 +				(long long)(size >> 11));
    6.45  		} else {
    6.46  			backing_file = NULL;
    6.47  			DPRINTF("Setting file size: %lld (total %lld)\n", 
     7.1 --- a/tools/blktap/drivers/tapdisk.c	Sun Sep 10 14:31:54 2006 -0600
     7.2 +++ b/tools/blktap/drivers/tapdisk.c	Sun Sep 10 14:52:57 2006 -0600
     7.3 @@ -127,18 +127,15 @@ void sig_handler(int sig)
     7.4  static inline int LOCAL_FD_SET(fd_set *readfds)
     7.5  {
     7.6  	fd_list_entry_t *ptr;
     7.7 -	int i;
     7.8  
     7.9  	ptr = fd_start;
    7.10  	while (ptr != NULL) {
    7.11  		if (ptr->tap_fd) {
    7.12  			FD_SET(ptr->tap_fd, readfds);
    7.13 -			for (i = 0; i < MAX_IOFD; i++) {
    7.14 -				if (ptr->io_fd[i]) 
    7.15 -					FD_SET(ptr->io_fd[i], readfds);
    7.16 -				maxfds = (ptr->io_fd[i] > maxfds ? 
    7.17 -					  ptr->io_fd[i]: maxfds);
    7.18 -			}
    7.19 +			if (ptr->io_fd[READ]) 
    7.20 +				FD_SET(ptr->io_fd[READ], readfds);
    7.21 +			maxfds = (ptr->io_fd[READ] > maxfds ? 
    7.22 +					ptr->io_fd[READ]: maxfds);
    7.23  			maxfds = (ptr->tap_fd > maxfds ? ptr->tap_fd: maxfds);
    7.24  		}
    7.25  		ptr = ptr->next;
    7.26 @@ -580,7 +577,7 @@ static void get_io_request(struct td_sta
    7.27  
    7.28  int main(int argc, char *argv[])
    7.29  {
    7.30 -	int len, msglen, ret, i;
    7.31 +	int len, msglen, ret;
    7.32  	char *p, *buf;
    7.33  	fd_set readfds, writefds;
    7.34  	struct timeval timeout;
    7.35 @@ -633,16 +630,14 @@ int main(int argc, char *argv[])
    7.36  			     (fd_set *) 0, &timeout);
    7.37  
    7.38  		if (ret > 0) 
    7.39 -                {
    7.40 +		{
    7.41  			ptr = fd_start;
    7.42  			while (ptr != NULL) {
    7.43  				if (FD_ISSET(ptr->tap_fd, &readfds)) 
    7.44  					get_io_request(ptr->s);
    7.45 -				for (i = 0; i < MAX_IOFD; i++) {
    7.46 -					if (ptr->io_fd[i] && 
    7.47 -					   FD_ISSET(ptr->io_fd[i], &readfds)) 
    7.48 -						io_done(ptr->s, i);
    7.49 -				}
    7.50 +				if (ptr->io_fd[READ] && 
    7.51 +						FD_ISSET(ptr->io_fd[READ], &readfds)) 
    7.52 +					io_done(ptr->s, READ);
    7.53  
    7.54  				ptr = ptr->next;
    7.55  			}
     8.1 --- a/tools/console/client/main.c	Sun Sep 10 14:31:54 2006 -0600
     8.2 +++ b/tools/console/client/main.c	Sun Sep 10 14:52:57 2006 -0600
     8.3 @@ -220,7 +220,8 @@ int main(int argc, char **argv)
     8.4  	   user friendly, we'll bail out here since no data will ever show
     8.5  	   up on domain-0. */
     8.6  	if (domid == 0) {
     8.7 -		err(errno, "Could not read tty from store");
     8.8 +		fprintf(stderr, "Can't specify Domain-0\n");
     8.9 +		exit(EINVAL);
    8.10  	}
    8.11  
    8.12  	/* Wait a little bit for tty to appear.  There is a race
     9.1 --- a/tools/examples/vtpm-common.sh	Sun Sep 10 14:31:54 2006 -0600
     9.2 +++ b/tools/examples/vtpm-common.sh	Sun Sep 10 14:52:57 2006 -0600
     9.3 @@ -48,6 +48,9 @@ else
     9.4  	function vtpm_migrate() {
     9.5  		echo "Error: vTPM migration accross machines not implemented."
     9.6  	}
     9.7 +	function vtpm_migrate_local() {
     9.8 +		echo "Error: local vTPM migration not supported"
     9.9 +	}
    9.10  	function vtpm_migrate_recover() {
    9.11  		true
    9.12  	}
    9.13 @@ -353,6 +356,8 @@ function vtpm_migration_step() {
    9.14  	local res=$(vtpm_isLocalAddress $1)
    9.15  	if [ "$res" == "0" ]; then
    9.16  		vtpm_migrate $1 $2 $3
    9.17 +	else
    9.18 +		vtpm_migrate_local
    9.19  	fi
    9.20  }
    9.21  
    10.1 --- a/tools/examples/vtpm-impl	Sun Sep 10 14:31:54 2006 -0600
    10.2 +++ b/tools/examples/vtpm-impl	Sun Sep 10 14:52:57 2006 -0600
    10.3 @@ -184,3 +184,6 @@ function vtpm_migrate_recover() {
    10.4   echo "Error: Recovery not supported yet" 
    10.5  }
    10.6  
    10.7 +function vtpm_migrate_local() {
    10.8 + echo "Error: local vTPM migration not supported"
    10.9 +}
    11.1 --- a/tools/python/xen/xend/XendDomain.py	Sun Sep 10 14:31:54 2006 -0600
    11.2 +++ b/tools/python/xen/xend/XendDomain.py	Sun Sep 10 14:52:57 2006 -0600
    11.3 @@ -420,6 +420,10 @@ class XendDomain:
    11.4          """ The following call may raise a XendError exception """
    11.5          dominfo.testMigrateDevices(True, dst)
    11.6  
    11.7 +        if live:
    11.8 +            """ Make sure there's memory free for enabling shadow mode """
    11.9 +            dominfo.checkLiveMigrateMemory()
   11.10 +
   11.11          if port == 0:
   11.12              port = xroot.get_xend_relocation_port()
   11.13          try:
    12.1 --- a/tools/python/xen/xend/XendDomainInfo.py	Sun Sep 10 14:31:54 2006 -0600
    12.2 +++ b/tools/python/xen/xend/XendDomainInfo.py	Sun Sep 10 14:52:57 2006 -0600
    12.3 @@ -49,6 +49,7 @@ from xen.xend.xenstore.xstransact import
    12.4  from xen.xend.xenstore.xsutil import GetDomainPath, IntroduceDomain
    12.5  from xen.xend.xenstore.xswatch import xswatch
    12.6  
    12.7 +from xen.xend import arch
    12.8  
    12.9  """Shutdown code for poweroff."""
   12.10  DOMAIN_POWEROFF = 0
   12.11 @@ -1087,6 +1088,15 @@ class XendDomainInfo:
   12.12      ## public:
   12.13  
   12.14      def destroyDevice(self, deviceClass, devid):
   12.15 +	if type(devid) is str:
   12.16 +	    devicePath = '%s/device/%s' % (self.dompath, deviceClass)
   12.17 +	    for entry in xstransact.List(devicePath):
   12.18 +		backend = xstransact.Read('%s/%s' % (devicePath, entry), "backend")
   12.19 +		devName = xstransact.Read(backend, "dev")
   12.20 +		if devName == devid:
   12.21 +		    # We found the integer matching our devid, use it instead
   12.22 +		    devid = entry
   12.23 +        	    break
   12.24          return self.getDeviceController(deviceClass).destroyDevice(devid)
   12.25  
   12.26  
   12.27 @@ -1285,28 +1295,37 @@ class XendDomainInfo:
   12.28                  for v in range(0, self.info['max_vcpu_id']+1):
   12.29                      xc.vcpu_setaffinity(self.domid, v, self.info['cpus'])
   12.30  
   12.31 +            # Use architecture- and image-specific calculations to determine
   12.32 +            # the various headrooms necessary, given the raw configured
   12.33 +            # values.
   12.34 +            # reservation, maxmem, memory, and shadow are all in KiB.
   12.35 +            reservation = self.image.getRequiredInitialReservation(
   12.36 +                self.info['memory'] * 1024)
   12.37 +            maxmem = self.image.getRequiredAvailableMemory(
   12.38 +                self.info['maxmem'] * 1024)
   12.39 +            memory = self.image.getRequiredAvailableMemory(
   12.40 +                self.info['memory'] * 1024)
   12.41 +            shadow = self.image.getRequiredShadowMemory(
   12.42 +                self.info['shadow_memory'] * 1024,
   12.43 +                self.info['maxmem'] * 1024)
   12.44 +
   12.45 +            # Round shadow up to a multiple of a MiB, as shadow_mem_control
   12.46 +            # takes MiB and we must not round down and end up under-providing.
   12.47 +            shadow = ((shadow + 1023) / 1024) * 1024
   12.48 +
   12.49              # set memory limit
   12.50 -            maxmem = self.image.getRequiredMemory(self.info['maxmem'] * 1024)
   12.51              xc.domain_setmaxmem(self.domid, maxmem)
   12.52  
   12.53 -            mem_kb = self.image.getRequiredMemory(self.info['memory'] * 1024)
   12.54 -
   12.55 -            # get the domain's shadow memory requirement
   12.56 -            shadow_kb = self.image.getRequiredShadowMemory(mem_kb)
   12.57 -            shadow_kb_req = self.info['shadow_memory'] * 1024
   12.58 -            if shadow_kb_req > shadow_kb:
   12.59 -                shadow_kb = shadow_kb_req
   12.60 -            shadow_mb = (shadow_kb + 1023) / 1024
   12.61 -
   12.62              # Make sure there's enough RAM available for the domain
   12.63 -            balloon.free(mem_kb + shadow_mb * 1024)
   12.64 +            balloon.free(memory + shadow)
   12.65  
   12.66              # Set up the shadow memory
   12.67 -            shadow_cur = xc.shadow_mem_control(self.domid, shadow_mb)
   12.68 +            shadow_cur = xc.shadow_mem_control(self.domid, shadow / 1024)
   12.69              self.info['shadow_memory'] = shadow_cur
   12.70  
   12.71 -            # initial memory allocation
   12.72 -            xc.domain_memory_increase_reservation(self.domid, mem_kb, 0, 0)
   12.73 +            # initial memory reservation
   12.74 +            xc.domain_memory_increase_reservation(self.domid, reservation, 0,
   12.75 +                                                  0)
   12.76  
   12.77              self.createChannels()
   12.78  
   12.79 @@ -1485,6 +1504,19 @@ class XendDomainInfo:
   12.80  
   12.81      ## public:
   12.82  
   12.83 +    def checkLiveMigrateMemory(self):
   12.84 +        """ Make sure there's enough memory to migrate this domain """
   12.85 +        overhead_kb = 0
   12.86 +        if arch.type == "x86":
   12.87 +            # 1MB per vcpu plus 4Kib/Mib of RAM.  This is higher than 
   12.88 +            # the minimum that Xen would allocate if no value were given.
   12.89 +            overhead_kb = self.info['vcpus'] * 1024 + self.info['maxmem'] * 4
   12.90 +            overhead_kb = ((overhead_kb + 1023) / 1024) * 1024
   12.91 +            # The domain might already have some shadow memory
   12.92 +            overhead_kb -= xc.shadow_mem_control(self.domid) * 1024
   12.93 +        if overhead_kb > 0:
   12.94 +            balloon.free(overhead_kb)
   12.95 +
   12.96      def testMigrateDevices(self, network, dst):
   12.97          """ Notify all device about intention of migration
   12.98          @raise: XendError for a device that cannot be migrated
    13.1 --- a/tools/python/xen/xend/image.py	Sun Sep 10 14:31:54 2006 -0600
    13.2 +++ b/tools/python/xen/xend/image.py	Sun Sep 10 14:52:57 2006 -0600
    13.3 @@ -143,12 +143,27 @@ class ImageHandler:
    13.4              raise VmError('Building domain failed: ostype=%s dom=%d err=%s'
    13.5                            % (self.ostype, self.vm.getDomid(), str(result)))
    13.6  
    13.7 -    def getRequiredMemory(self, mem_kb):
    13.8 +    def getRequiredAvailableMemory(self, mem_kb):
    13.9 +        """@param mem_kb The configured maxmem or memory, in KiB.
   13.10 +        @return The corresponding required amount of memory for the domain,
   13.11 +        also in KiB.  This is normally the given mem_kb, but architecture- or
   13.12 +        image-specific code may override this to add headroom where
   13.13 +        necessary."""
   13.14          return mem_kb
   13.15  
   13.16 -    def getRequiredShadowMemory(self, mem_kb):
   13.17 -        """@return The minimum shadow memory required, in KiB, for a domain 
   13.18 -        with mem_kb KiB of RAM."""
   13.19 +    def getRequiredInitialReservation(self, mem_kb):
   13.20 +        """@param mem_kb The configured memory, in KiB.
   13.21 +        @return The corresponding required amount of memory to be free, also
   13.22 +        in KiB. This is normally the same as getRequiredAvailableMemory, but
   13.23 +        architecture- or image-specific code may override this to
   13.24 +        add headroom where necessary."""
   13.25 +        return self.getRequiredAvailableMemory(mem_kb)
   13.26 +
   13.27 +    def getRequiredShadowMemory(self, shadow_mem_kb, maxmem_kb):
   13.28 +        """@param shadow_mem_kb The configured shadow memory, in KiB.
   13.29 +        @param maxmem_kb The configured maxmem, in KiB.
   13.30 +        @return The corresponding required amount of shadow memory, also in
   13.31 +        KiB."""
   13.32          # PV domains don't need any shadow memory
   13.33          return 0
   13.34  
   13.35 @@ -418,7 +433,7 @@ class IA64_HVM_ImageHandler(HVMImageHand
   13.36  
   13.37      ostype = "hvm"
   13.38  
   13.39 -    def getRequiredMemory(self, mem_kb):
   13.40 +    def getRequiredAvailableMemory(self, mem_kb):
   13.41          page_kb = 16
   13.42          # ROM size for guest firmware, ioreq page and xenstore page
   13.43          extra_pages = 1024 + 2
   13.44 @@ -432,19 +447,29 @@ class X86_HVM_ImageHandler(HVMImageHandl
   13.45  
   13.46      ostype = "hvm"
   13.47  
   13.48 -    def getRequiredMemory(self, mem_kb):
   13.49 +    def getRequiredAvailableMemory(self, mem_kb):
   13.50 +        # Add 8 MiB overhead for QEMU's video RAM.
   13.51 +        return self.getRequiredInitialReservation(mem_kb) + 8192
   13.52 +
   13.53 +    def getRequiredInitialReservation(self, mem_kb):
   13.54          page_kb = 4
   13.55          # This was derived emperically:
   13.56 -        #   2.4 MB overhead per 1024 MB RAM + 8 MB constant
   13.57 +        #   2.4 MB overhead per 1024 MB RAM
   13.58          #   + 4 to avoid low-memory condition
   13.59 -        extra_mb = (2.4/1024) * (mem_kb/1024.0) + 12;
   13.60 +        extra_mb = (2.4/1024) * (mem_kb/1024.0) + 4;
   13.61          extra_pages = int( math.ceil( extra_mb*1024 / page_kb ))
   13.62          return mem_kb + extra_pages * page_kb
   13.63  
   13.64 -    def getRequiredShadowMemory(self, mem_kb):
   13.65 +    def getRequiredShadowMemory(self, shadow_mem_kb, maxmem_kb):
   13.66 +        # The given value is the configured value -- we need to include the
   13.67 +        # overhead due to getRequiredInitialReservation.
   13.68 +        maxmem_kb = self.getRequiredInitialReservation(maxmem_kb)
   13.69 +
   13.70          # 1MB per vcpu plus 4Kib/Mib of RAM.  This is higher than 
   13.71          # the minimum that Xen would allocate if no value were given.
   13.72 -        return 1024 * self.vm.getVCpuCount() + mem_kb / 256
   13.73 +        return max(1024 * self.vm.getVCpuCount() + maxmem_kb / 256,
   13.74 +                   shadow_mem_kb)
   13.75 +
   13.76  
   13.77  _handlers = {
   13.78      "powerpc": {
    14.1 --- a/tools/python/xen/xend/server/pciquirk.py	Sun Sep 10 14:31:54 2006 -0600
    14.2 +++ b/tools/python/xen/xend/server/pciquirk.py	Sun Sep 10 14:52:57 2006 -0600
    14.3 @@ -1,5 +1,5 @@
    14.4  from xen.xend.XendLogging import log
    14.5 -from xen.xend.XendError import XendError
    14.6 +from xen.xend.XendError import XendError, VmError
    14.7  import sys
    14.8  import os.path
    14.9  from xen.xend.sxp import *
    15.1 --- a/tools/python/xen/xm/create.py	Sun Sep 10 14:31:54 2006 -0600
    15.2 +++ b/tools/python/xen/xm/create.py	Sun Sep 10 14:52:57 2006 -0600
    15.3 @@ -1169,6 +1169,9 @@ def main(argv):
    15.4      if not opts:
    15.5          return
    15.6  
    15.7 +    if type(config) == str:
    15.8 +            config = sxp.parse(file(config))[0]
    15.9 +
   15.10      if opts.vals.dryrun:
   15.11          PrettyPrint.prettyprint(config)
   15.12      else:
    16.1 --- a/tools/python/xen/xm/tests/test_create.py	Sun Sep 10 14:31:54 2006 -0600
    16.2 +++ b/tools/python/xen/xm/tests/test_create.py	Sun Sep 10 14:52:57 2006 -0600
    16.3 @@ -51,6 +51,7 @@ class test_create(unittest.TestCase):
    16.4                   'path'      : '.:/etc/xen',
    16.5                   'builder'   : 'linux',
    16.6                   'nics'      : -1,
    16.7 +                 'vncunused' : 1,
    16.8                   'xauthority': xen.xm.create.get_xauthority(),
    16.9                   })
   16.10  
   16.11 @@ -101,6 +102,7 @@ on_crash    = 'destroy'
   16.12                   'path'        : '.:/etc/xen',
   16.13                   'builder'     : 'linux',
   16.14  
   16.15 +                 'vncunused'   : 1,
   16.16                   'xauthority'  : xen.xm.create.get_xauthority(),
   16.17                 })
   16.18  
   16.19 @@ -140,6 +142,7 @@ cpu_weight = 0.75
   16.20                   'builder'    : 'linux',
   16.21                   'nics'       : -1,
   16.22  
   16.23 +                 'vncunused'   : 1,
   16.24                   'xauthority' : xen.xm.create.get_xauthority(),
   16.25                   })
   16.26              
   16.27 @@ -182,6 +185,7 @@ ne2000=0
   16.28                                     xen.xm.create.VNC_BASE_PORT +
   16.29                                     xen.xm.create.choose_vnc_display())),
   16.30                   'vnc'         : 1,
   16.31 +                 'vncunused'   : 1,
   16.32                   'vncviewer'   : 1,
   16.33  
   16.34                   'xm_file'     : fname,
    17.1 --- a/tools/xenmon/xenmon.py	Sun Sep 10 14:31:54 2006 -0600
    17.2 +++ b/tools/xenmon/xenmon.py	Sun Sep 10 14:52:57 2006 -0600
    17.3 @@ -672,6 +672,9 @@ def main():
    17.4  
    17.5      parser = setup_cmdline_parser()
    17.6      (options, args) = parser.parse_args()
    17.7 +    if options.mspersample < 0:
    17.8 +        parser.error("option --ms_per_sample: invalid negative value: '%d'" %
    17.9 +                     options.mspersample)
   17.10      
   17.11      start_xenbaked()
   17.12      if options.live:
    18.1 --- a/unmodified_drivers/linux-2.6/platform-pci/platform-pci.c	Sun Sep 10 14:31:54 2006 -0600
    18.2 +++ b/unmodified_drivers/linux-2.6/platform-pci/platform-pci.c	Sun Sep 10 14:52:57 2006 -0600
    18.3 @@ -17,6 +17,7 @@
    18.4   * Place - Suite 330, Boston, MA 02111-1307 USA.
    18.5   *
    18.6   */
    18.7 +
    18.8  #include <linux/module.h>
    18.9  #include <linux/kernel.h>
   18.10  #include <linux/sched.h>
   18.11 @@ -25,6 +26,8 @@
   18.12  #include <linux/init.h>
   18.13  #include <linux/version.h>
   18.14  #include <linux/interrupt.h>
   18.15 +#include <linux/vmalloc.h>
   18.16 +#include <linux/mm.h>
   18.17  #include <asm/system.h>
   18.18  #include <asm/io.h>
   18.19  #include <asm/irq.h>
   18.20 @@ -47,7 +50,6 @@ MODULE_AUTHOR("ssmith@xensource.com");
   18.21  MODULE_DESCRIPTION("Xen platform PCI device");
   18.22  MODULE_LICENSE("GPL");
   18.23  
   18.24 -
   18.25  unsigned long *phys_to_machine_mapping;
   18.26  EXPORT_SYMBOL(phys_to_machine_mapping);
   18.27  
   18.28 @@ -118,7 +120,7 @@ unsigned long alloc_xen_mmio(unsigned lo
   18.29  /* Lifted from hvmloader.c */
   18.30  static int get_hypercall_stubs(void)
   18.31  {
   18.32 -	uint32_t eax, ebx, ecx, edx, pages, msr, order, i;
   18.33 +	uint32_t eax, ebx, ecx, edx, pages, msr, i;
   18.34  	char signature[13];
   18.35  
   18.36  	cpuid(0x40000000, &eax, &ebx, &ecx, &edx);
   18.37 @@ -141,22 +143,22 @@ static int get_hypercall_stubs(void)
   18.38  
   18.39  	cpuid(0x40000002, &pages, &msr, &ecx, &edx);
   18.40  
   18.41 -	i = pages - 1;
   18.42 -	for (order = 0; i != 0; order++)
   18.43 -		i >>= 1;
   18.44 +	printk(KERN_INFO "Hypercall area is %u pages.\n", pages);
   18.45  
   18.46 -	printk(KERN_INFO "Hypercall area is %u pages (order %u allocation)\n",
   18.47 -	       pages, order);
   18.48 -
   18.49 -	hypercall_stubs = (void *)__get_free_pages(GFP_KERNEL, order);
   18.50 +	/* Use __vmalloc() because vmalloc_exec() is not an exported symbol. */
   18.51 +	/* PAGE_KERNEL_EXEC also is not exported, hence we use PAGE_KERNEL. */
   18.52 +	/* hypercall_stubs = vmalloc_exec(pages * PAGE_SIZE); */
   18.53 +	hypercall_stubs = __vmalloc(pages * PAGE_SIZE,
   18.54 +				    GFP_KERNEL | __GFP_HIGHMEM,
   18.55 +				    __pgprot(__PAGE_KERNEL & ~_PAGE_NX));
   18.56  	if (hypercall_stubs == NULL)
   18.57  		return -ENOMEM;
   18.58  
   18.59 -	for (i = 0; i < pages; i++)
   18.60 -		wrmsrl(ebx,
   18.61 -		       virt_to_phys(hypercall_stubs) +	/* base address      */
   18.62 -		       (i << PAGE_SHIFT) +		/* offset of page @i */
   18.63 -		       i);				/* request page @i   */
   18.64 +	for (i = 0; i < pages; i++) {
   18.65 +		unsigned long pfn;
   18.66 +		pfn = vmalloc_to_pfn((char *)hypercall_stubs + i*PAGE_SIZE);
   18.67 +		wrmsrl(msr, ((u64)pfn << PAGE_SHIFT) + i);
   18.68 +	}
   18.69  
   18.70  	return 0;
   18.71  }
    19.1 --- a/xen/arch/x86/mm/shadow/multi.c	Sun Sep 10 14:31:54 2006 -0600
    19.2 +++ b/xen/arch/x86/mm/shadow/multi.c	Sun Sep 10 14:52:57 2006 -0600
    19.3 @@ -1792,8 +1792,10 @@ void sh_install_xen_entries_in_l2h(struc
    19.4          for ( i = 0; i < MACHPHYS_MBYTES>>1; i++ )
    19.5          {
    19.6              sl2e[shadow_l2_table_offset(RO_MPT_VIRT_START) + i] =
    19.7 -                shadow_l2e_from_mfn(_mfn(l3e_get_pfn(p2m[i])),
    19.8 -                                    __PAGE_HYPERVISOR);
    19.9 +                (l3e_get_flags(p2m[i]) & _PAGE_PRESENT)
   19.10 +                ? shadow_l2e_from_mfn(_mfn(l3e_get_pfn(p2m[i])),
   19.11 +                                      __PAGE_HYPERVISOR)
   19.12 +                : shadow_l2e_empty();
   19.13          }
   19.14          sh_unmap_domain_page(p2m);
   19.15      }
    20.1 --- a/xen/common/grant_table.c	Sun Sep 10 14:31:54 2006 -0600
    20.2 +++ b/xen/common/grant_table.c	Sun Sep 10 14:52:57 2006 -0600
    20.3 @@ -90,11 +90,8 @@ static void
    20.4      unsigned long  frame = 0;
    20.5      int            rc = GNTST_okay;
    20.6      struct active_grant_entry *act;
    20.7 -
    20.8 -    /* Entry details from @rd's shared grant table. */
    20.9      grant_entry_t *sha;
   20.10 -    domid_t        sdom;
   20.11 -    u16            sflags;
   20.12 +    union grant_combo scombo, prev_scombo, new_scombo;
   20.13  
   20.14      /*
   20.15       * We bound the number of times we retry CMPXCHG on memory locations that
   20.16 @@ -159,7 +156,10 @@ static void
   20.17  
   20.18          memcpy(new_mt, lgt->maptrack, PAGE_SIZE << lgt->maptrack_order);
   20.19          for ( i = lgt->maptrack_limit; i < (lgt->maptrack_limit << 1); i++ )
   20.20 +        {
   20.21              new_mt[i].ref = i+1;
   20.22 +            new_mt[i].flags = 0;
   20.23 +        }
   20.24  
   20.25          free_xenheap_pages(lgt->maptrack, lgt->maptrack_order);
   20.26          lgt->maptrack          = new_mt;
   20.27 @@ -175,12 +175,19 @@ static void
   20.28  
   20.29      spin_lock(&rd->grant_table->lock);
   20.30  
   20.31 +    /* If already pinned, check the active domid and avoid refcnt overflow. */
   20.32 +    if ( act->pin &&
   20.33 +         ((act->domid != ld->domain_id) ||
   20.34 +          (act->pin & 0x80808080U) != 0) )
   20.35 +        PIN_FAIL(unlock_out, GNTST_general_error,
   20.36 +                 "Bad domain (%d != %d), or risk of counter overflow %08x\n",
   20.37 +                 act->domid, ld->domain_id, act->pin);
   20.38 +
   20.39      if ( !act->pin ||
   20.40           (!(op->flags & GNTMAP_readonly) &&
   20.41            !(act->pin & (GNTPIN_hstw_mask|GNTPIN_devw_mask))) )
   20.42      {
   20.43 -        sflags = sha->flags;
   20.44 -        sdom   = sha->domid;
   20.45 +        scombo.word = *(u32 *)&sha->flags;
   20.46  
   20.47          /*
   20.48           * This loop attempts to set the access (reading/writing) flags
   20.49 @@ -190,33 +197,29 @@ static void
   20.50           */
   20.51          for ( ; ; )
   20.52          {
   20.53 -            union grant_combo scombo, prev_scombo, new_scombo;
   20.54 +            /* If not already pinned, check the grant domid and type. */
   20.55 +            if ( !act->pin &&
   20.56 +                 (((scombo.shorts.flags & GTF_type_mask) !=
   20.57 +                   GTF_permit_access) ||
   20.58 +                  (scombo.shorts.domid != ld->domain_id)) )
   20.59 +                 PIN_FAIL(unlock_out, GNTST_general_error,
   20.60 +                          "Bad flags (%x) or dom (%d). (expected dom %d)\n",
   20.61 +                          scombo.shorts.flags, scombo.shorts.domid,
   20.62 +                          ld->domain_id);
   20.63  
   20.64 -            if ( unlikely((sflags & GTF_type_mask) != GTF_permit_access) ||
   20.65 -                 unlikely(sdom != led->domain->domain_id) )
   20.66 -                PIN_FAIL(unlock_out, GNTST_general_error,
   20.67 -                         "Bad flags (%x) or dom (%d). (NB. expected dom %d)\n",
   20.68 -                         sflags, sdom, led->domain->domain_id);
   20.69 -
   20.70 -            /* Merge two 16-bit values into a 32-bit combined update. */
   20.71 -            scombo.shorts.flags = sflags;
   20.72 -            scombo.shorts.domid = sdom;
   20.73 -            
   20.74              new_scombo = scombo;
   20.75              new_scombo.shorts.flags |= GTF_reading;
   20.76  
   20.77              if ( !(op->flags & GNTMAP_readonly) )
   20.78              {
   20.79                  new_scombo.shorts.flags |= GTF_writing;
   20.80 -                if ( unlikely(sflags & GTF_readonly) )
   20.81 +                if ( unlikely(scombo.shorts.flags & GTF_readonly) )
   20.82                      PIN_FAIL(unlock_out, GNTST_general_error,
   20.83                               "Attempt to write-pin a r/o grant entry.\n");
   20.84              }
   20.85  
   20.86              prev_scombo.word = cmpxchg((u32 *)&sha->flags,
   20.87                                         scombo.word, new_scombo.word);
   20.88 -
   20.89 -            /* Did the combined update work (did we see what we expected?). */
   20.90              if ( likely(prev_scombo.word == scombo.word) )
   20.91                  break;
   20.92  
   20.93 @@ -224,20 +227,15 @@ static void
   20.94                  PIN_FAIL(unlock_out, GNTST_general_error,
   20.95                           "Shared grant entry is unstable.\n");
   20.96  
   20.97 -            /* Didn't see what we expected. Split out the seen flags & dom. */
   20.98 -            sflags = prev_scombo.shorts.flags;
   20.99 -            sdom   = prev_scombo.shorts.domid;
  20.100 +            scombo = prev_scombo;
  20.101          }
  20.102  
  20.103          if ( !act->pin )
  20.104          {
  20.105 -            act->domid = sdom;
  20.106 +            act->domid = scombo.shorts.domid;
  20.107              act->frame = gmfn_to_mfn(rd, sha->frame);
  20.108          }
  20.109      }
  20.110 -    else if ( (act->pin & 0x80808080U) != 0 )
  20.111 -        PIN_FAIL(unlock_out, ENOSPC,
  20.112 -                 "Risk of counter overflow %08x\n", act->pin);
  20.113  
  20.114      if ( op->flags & GNTMAP_device_map )
  20.115          act->pin += (op->flags & GNTMAP_readonly) ?
  20.116 @@ -545,9 +543,7 @@ gnttab_prepare_for_transfer(
  20.117  {
  20.118      struct grant_table *rgt;
  20.119      struct grant_entry *sha;
  20.120 -    domid_t             sdom;
  20.121 -    u16                 sflags;
  20.122 -    union grant_combo   scombo, prev_scombo, tmp_scombo;
  20.123 +    union grant_combo   scombo, prev_scombo, new_scombo;
  20.124      int                 retries = 0;
  20.125  
  20.126      if ( unlikely((rgt = rd->grant_table) == NULL) ||
  20.127 @@ -562,29 +558,24 @@ gnttab_prepare_for_transfer(
  20.128  
  20.129      sha = &rgt->shared[ref];
  20.130      
  20.131 -    sflags = sha->flags;
  20.132 -    sdom   = sha->domid;
  20.133 +    scombo.word = *(u32 *)&sha->flags;
  20.134  
  20.135      for ( ; ; )
  20.136      {
  20.137 -        if ( unlikely(sflags != GTF_accept_transfer) ||
  20.138 -             unlikely(sdom != ld->domain_id) )
  20.139 +        if ( unlikely(scombo.shorts.flags != GTF_accept_transfer) ||
  20.140 +             unlikely(scombo.shorts.domid != ld->domain_id) )
  20.141          {
  20.142              DPRINTK("Bad flags (%x) or dom (%d). (NB. expected dom %d)\n",
  20.143 -                    sflags, sdom, ld->domain_id);
  20.144 +                    scombo.shorts.flags, scombo.shorts.domid,
  20.145 +                    ld->domain_id);
  20.146              goto fail;
  20.147          }
  20.148  
  20.149 -        /* Merge two 16-bit values into a 32-bit combined update. */
  20.150 -        scombo.shorts.flags = sflags;
  20.151 -        scombo.shorts.domid = sdom;
  20.152 +        new_scombo = scombo;
  20.153 +        new_scombo.shorts.flags |= GTF_transfer_committed;
  20.154  
  20.155 -        tmp_scombo = scombo;
  20.156 -        tmp_scombo.shorts.flags |= GTF_transfer_committed;
  20.157          prev_scombo.word = cmpxchg((u32 *)&sha->flags,
  20.158 -                                   scombo.word, tmp_scombo.word);
  20.159 -
  20.160 -        /* Did the combined update work (did we see what we expected?). */
  20.161 +                                   scombo.word, new_scombo.word);
  20.162          if ( likely(prev_scombo.word == scombo.word) )
  20.163              break;
  20.164  
  20.165 @@ -594,9 +585,7 @@ gnttab_prepare_for_transfer(
  20.166              goto fail;
  20.167          }
  20.168  
  20.169 -        /* Didn't see what we expected. Split out the seen flags & dom. */
  20.170 -        sflags = prev_scombo.shorts.flags;
  20.171 -        sdom   = prev_scombo.shorts.domid;
  20.172 +        scombo = prev_scombo;
  20.173      }
  20.174  
  20.175      spin_unlock(&rgt->lock);
  20.176 @@ -734,16 +723,21 @@ static void
  20.177          gnttab_mark_dirty(rd, r_frame);
  20.178  
  20.179      spin_lock(&rd->grant_table->lock);
  20.180 +
  20.181      if ( readonly )
  20.182 +    {
  20.183          act->pin -= GNTPIN_hstr_inc;
  20.184 +    }
  20.185      else
  20.186 +    {
  20.187          act->pin -= GNTPIN_hstw_inc;
  20.188 -
  20.189 -    if ( !(act->pin & GNTPIN_hstw_mask) && !readonly )
  20.190 -        gnttab_clear_flag(_GTF_writing, &sha->flags);
  20.191 +        if ( !(act->pin & (GNTPIN_devw_mask|GNTPIN_hstw_mask)) )
  20.192 +            gnttab_clear_flag(_GTF_writing, &sha->flags);
  20.193 +    }
  20.194  
  20.195      if ( !act->pin )
  20.196          gnttab_clear_flag(_GTF_reading, &sha->flags);
  20.197 +
  20.198      spin_unlock(&rd->grant_table->lock);
  20.199  }
  20.200  
  20.201 @@ -759,8 +753,7 @@ static int
  20.202      struct active_grant_entry *act;
  20.203      s16 rc = GNTST_okay;
  20.204      int retries = 0;
  20.205 -    u16 sflags;
  20.206 -    domid_t sdom;
  20.207 +    union grant_combo scombo, prev_scombo, new_scombo;
  20.208  
  20.209      if ( unlikely(gref >= NR_GRANT_ENTRIES) )
  20.210          PIN_FAIL(error_out, GNTST_bad_gntref,
  20.211 @@ -771,36 +764,42 @@ static int
  20.212  
  20.213      spin_lock(&rd->grant_table->lock);
  20.214      
  20.215 +    /* If already pinned, check the active domid and avoid refcnt overflow. */
  20.216 +    if ( act->pin &&
  20.217 +         ((act->domid != current->domain->domain_id) ||
  20.218 +          (act->pin & 0x80808080U) != 0) )
  20.219 +        PIN_FAIL(unlock_out, GNTST_general_error,
  20.220 +                 "Bad domain (%d != %d), or risk of counter overflow %08x\n",
  20.221 +                 act->domid, current->domain->domain_id, act->pin);
  20.222 +
  20.223      if ( !act->pin ||
  20.224 -         (!readonly && !(act->pin & GNTPIN_hstw_mask)) )
  20.225 +         (!readonly && !(act->pin & (GNTPIN_devw_mask|GNTPIN_hstw_mask))) )
  20.226      {
  20.227 -        sflags = sha->flags;
  20.228 -        sdom = sha->domid;
  20.229 +        scombo.word = *(u32 *)&sha->flags;
  20.230  
  20.231          for ( ; ; )
  20.232          {
  20.233 -            union grant_combo scombo, prev_scombo, new_scombo;
  20.234 +            /* If not already pinned, check the grant domid and type. */
  20.235 +            if ( !act->pin &&
  20.236 +                 (((scombo.shorts.flags & GTF_type_mask) !=
  20.237 +                   GTF_permit_access) ||
  20.238 +                  (scombo.shorts.domid != current->domain->domain_id)) )
  20.239 +                 PIN_FAIL(unlock_out, GNTST_general_error,
  20.240 +                          "Bad flags (%x) or dom (%d). (expected dom %d)\n",
  20.241 +                          scombo.shorts.flags, scombo.shorts.domid,
  20.242 +                          current->domain->domain_id);
  20.243  
  20.244 -            if ( unlikely((sflags & GTF_type_mask) != GTF_permit_access ||
  20.245 -                          sdom != current->domain->domain_id ) )
  20.246 -                PIN_FAIL(unlock_out, GNTST_general_error,
  20.247 -                         "Bad flags (%x) or dom (%d). (NB. expected dom %d)\n",
  20.248 -                         sflags, sdom, current->domain->domain_id);
  20.249 -
  20.250 -            /* Merge two 16-bit values into a 32-bit combined update. */
  20.251 -            scombo.shorts.flags = sflags;
  20.252 -            scombo.shorts.domid = sdom;
  20.253 -            
  20.254              new_scombo = scombo;
  20.255              new_scombo.shorts.flags |= GTF_reading;
  20.256  
  20.257              if ( !readonly )
  20.258              {
  20.259                  new_scombo.shorts.flags |= GTF_writing;
  20.260 -                if ( unlikely(sflags & GTF_readonly) )
  20.261 +                if ( unlikely(scombo.shorts.flags & GTF_readonly) )
  20.262                      PIN_FAIL(unlock_out, GNTST_general_error,
  20.263                               "Attempt to write-pin a r/o grant entry.\n");
  20.264              }
  20.265 +
  20.266              prev_scombo.word = cmpxchg((u32 *)&sha->flags,
  20.267                                         scombo.word, new_scombo.word);
  20.268              if ( likely(prev_scombo.word == scombo.word) )
  20.269 @@ -809,19 +808,16 @@ static int
  20.270              if ( retries++ == 4 )
  20.271                  PIN_FAIL(unlock_out, GNTST_general_error,
  20.272                           "Shared grant entry is unstable.\n");
  20.273 -            sflags = prev_scombo.shorts.flags;
  20.274 -            sdom = prev_scombo.shorts.flags;
  20.275 +
  20.276 +            scombo = prev_scombo;
  20.277          }
  20.278  
  20.279          if ( !act->pin )
  20.280          {
  20.281 -            act->domid = sdom;
  20.282 +            act->domid = scombo.shorts.domid;
  20.283              act->frame = gmfn_to_mfn(rd, sha->frame);
  20.284          }
  20.285      }
  20.286 -    else if ( (act->pin & 0x80808080U) != 0 )
  20.287 -        PIN_FAIL(unlock_out, ENOSPC,
  20.288 -                 "Risk of counter overflow %08x\n", act->pin);
  20.289  
  20.290      act->pin += readonly ? GNTPIN_hstr_inc : GNTPIN_hstw_inc;
  20.291  
    21.1 --- a/xen/include/asm-x86/bitops.h	Sun Sep 10 14:31:54 2006 -0600
    21.2 +++ b/xen/include/asm-x86/bitops.h	Sun Sep 10 14:52:57 2006 -0600
    21.3 @@ -7,20 +7,19 @@
    21.4  
    21.5  #include <xen/config.h>
    21.6  
    21.7 -/*
    21.8 - * These have to be done with inline assembly: that way the bit-setting
    21.9 - * is guaranteed to be atomic. All bit operations return 0 if the bit
   21.10 - * was cleared before the operation and != 0 if it was not.
   21.11 - *
   21.12 - * bit 0 is the LSB of addr; bit 32 is the LSB of (addr+1).
   21.13 - */
   21.14 -
   21.15  #ifdef CONFIG_SMP
   21.16  #define LOCK_PREFIX "lock ; "
   21.17  #else
   21.18  #define LOCK_PREFIX ""
   21.19  #endif
   21.20  
   21.21 +/*
   21.22 + * We use the "+m" constraint because the memory operand is both read from
   21.23 + * and written to. Since the operand is in fact a word array, we also
   21.24 + * specify "memory" in the clobbers list to indicate that words other than
   21.25 + * the one directly addressed by the memory operand may be modified.
   21.26 + */
   21.27 +
   21.28  #define ADDR (*(volatile long *) addr)
   21.29  
   21.30  /**
   21.31 @@ -37,8 +36,8 @@ static __inline__ void set_bit(int nr, v
   21.32  {
   21.33  	__asm__ __volatile__( LOCK_PREFIX
   21.34  		"btsl %1,%0"
   21.35 -		:"=m" (ADDR)
   21.36 -		:"dIr" (nr));
   21.37 +		:"+m" (ADDR)
   21.38 +		:"dIr" (nr) : "memory");
   21.39  }
   21.40  
   21.41  /**
   21.42 @@ -54,8 +53,8 @@ static __inline__ void __set_bit(int nr,
   21.43  {
   21.44  	__asm__(
   21.45  		"btsl %1,%0"
   21.46 -		:"=m" (ADDR)
   21.47 -		:"dIr" (nr));
   21.48 +		:"+m" (ADDR)
   21.49 +		:"dIr" (nr) : "memory");
   21.50  }
   21.51  
   21.52  /**
   21.53 @@ -72,8 +71,8 @@ static __inline__ void clear_bit(int nr,
   21.54  {
   21.55  	__asm__ __volatile__( LOCK_PREFIX
   21.56  		"btrl %1,%0"
   21.57 -		:"=m" (ADDR)
   21.58 -		:"dIr" (nr));
   21.59 +		:"+m" (ADDR)
   21.60 +		:"dIr" (nr) : "memory");
   21.61  }
   21.62  
   21.63  /**
   21.64 @@ -89,8 +88,8 @@ static __inline__ void __clear_bit(int n
   21.65  {
   21.66  	__asm__(
   21.67  		"btrl %1,%0"
   21.68 -		:"=m" (ADDR)
   21.69 -		:"dIr" (nr));
   21.70 +		:"+m" (ADDR)
   21.71 +		:"dIr" (nr) : "memory");
   21.72  }
   21.73  
   21.74  #define smp_mb__before_clear_bit()	barrier()
   21.75 @@ -109,8 +108,8 @@ static __inline__ void __change_bit(int 
   21.76  {
   21.77  	__asm__ __volatile__(
   21.78  		"btcl %1,%0"
   21.79 -		:"=m" (ADDR)
   21.80 -		:"dIr" (nr));
   21.81 +		:"+m" (ADDR)
   21.82 +		:"dIr" (nr) : "memory");
   21.83  }
   21.84  
   21.85  /**
   21.86 @@ -126,8 +125,8 @@ static __inline__ void change_bit(int nr
   21.87  {
   21.88  	__asm__ __volatile__( LOCK_PREFIX
   21.89  		"btcl %1,%0"
   21.90 -		:"=m" (ADDR)
   21.91 -		:"dIr" (nr));
   21.92 +		:"+m" (ADDR)
   21.93 +		:"dIr" (nr) : "memory");
   21.94  }
   21.95  
   21.96  /**
   21.97 @@ -144,7 +143,7 @@ static __inline__ int test_and_set_bit(i
   21.98  
   21.99  	__asm__ __volatile__( LOCK_PREFIX
  21.100  		"btsl %2,%1\n\tsbbl %0,%0"
  21.101 -		:"=r" (oldbit),"=m" (ADDR)
  21.102 +		:"=r" (oldbit),"+m" (ADDR)
  21.103  		:"dIr" (nr) : "memory");
  21.104  	return oldbit;
  21.105  }
  21.106 @@ -164,8 +163,8 @@ static __inline__ int __test_and_set_bit
  21.107  
  21.108  	__asm__(
  21.109  		"btsl %2,%1\n\tsbbl %0,%0"
  21.110 -		:"=r" (oldbit),"=m" (ADDR)
  21.111 -		:"dIr" (nr));
  21.112 +		:"=r" (oldbit),"+m" (ADDR)
  21.113 +		:"dIr" (nr) : "memory");
  21.114  	return oldbit;
  21.115  }
  21.116  
  21.117 @@ -183,7 +182,7 @@ static __inline__ int test_and_clear_bit
  21.118  
  21.119  	__asm__ __volatile__( LOCK_PREFIX
  21.120  		"btrl %2,%1\n\tsbbl %0,%0"
  21.121 -		:"=r" (oldbit),"=m" (ADDR)
  21.122 +		:"=r" (oldbit),"+m" (ADDR)
  21.123  		:"dIr" (nr) : "memory");
  21.124  	return oldbit;
  21.125  }
  21.126 @@ -203,8 +202,8 @@ static __inline__ int __test_and_clear_b
  21.127  
  21.128  	__asm__(
  21.129  		"btrl %2,%1\n\tsbbl %0,%0"
  21.130 -		:"=r" (oldbit),"=m" (ADDR)
  21.131 -		:"dIr" (nr));
  21.132 +		:"=r" (oldbit),"+m" (ADDR)
  21.133 +		:"dIr" (nr) : "memory");
  21.134  	return oldbit;
  21.135  }
  21.136  
  21.137 @@ -215,7 +214,7 @@ static __inline__ int __test_and_change_
  21.138  
  21.139  	__asm__ __volatile__(
  21.140  		"btcl %2,%1\n\tsbbl %0,%0"
  21.141 -		:"=r" (oldbit),"=m" (ADDR)
  21.142 +		:"=r" (oldbit),"+m" (ADDR)
  21.143  		:"dIr" (nr) : "memory");
  21.144  	return oldbit;
  21.145  }
  21.146 @@ -234,7 +233,7 @@ static __inline__ int test_and_change_bi
  21.147  
  21.148  	__asm__ __volatile__( LOCK_PREFIX
  21.149  		"btcl %2,%1\n\tsbbl %0,%0"
  21.150 -		:"=r" (oldbit),"=m" (ADDR)
  21.151 +		:"=r" (oldbit),"+m" (ADDR)
  21.152  		:"dIr" (nr) : "memory");
  21.153  	return oldbit;
  21.154  }
  21.155 @@ -242,7 +241,7 @@ static __inline__ int test_and_change_bi
  21.156  
  21.157  static __inline__ int constant_test_bit(int nr, const volatile void * addr)
  21.158  {
  21.159 -	return ((1UL << (nr & 31)) & (((const volatile unsigned int *) addr)[nr >> 5])) != 0;
  21.160 +	return ((1U << (nr & 31)) & (((const volatile unsigned int *) addr)[nr >> 5])) != 0;
  21.161  }
  21.162  
  21.163  static __inline__ int variable_test_bit(int nr, volatile void * addr)