ia64/xen-unstable

changeset 14828:41471cc650a2

merge with xen-unstable.hg
author Alex Williamson <alex.williamson@hp.com>
date Thu Apr 12 10:30:12 2007 -0600 (2007-04-12)
parents 605d534f9635 a839e331f06f
children c42ae7839750 fdbbc6aa2cbf
files tools/libxc/xc_hvm_save.c tools/libxc/xc_linux_save.c
line diff
     1.1 --- a/docs/xen-api/xenapi-datamodel.tex	Thu Apr 12 10:26:42 2007 -0600
     1.2 +++ b/docs/xen-api/xenapi-datamodel.tex	Thu Apr 12 10:30:12 2007 -0600
     1.3 @@ -1558,6 +1558,111 @@ void
     1.4  \vspace{0.3cm}
     1.5  \vspace{0.3cm}
     1.6  \vspace{0.3cm}
     1.7 +\subsubsection{RPC name:~add\_to\_VCPUs\_params\_live}
     1.8 +
     1.9 +{\bf Overview:} 
    1.10 +Add the given key-value pair to VM.VCPUs\_params, and apply that value on
    1.11 +the running VM.
    1.12 +
    1.13 + \noindent {\bf Signature:} 
    1.14 +\begin{verbatim} void add_to_VCPUs_params_live (session_id s, VM ref self, string key, string value)\end{verbatim}
    1.15 +
    1.16 +
    1.17 +\noindent{\bf Arguments:}
    1.18 +
    1.19 + 
    1.20 +\vspace{0.3cm}
    1.21 +\begin{tabular}{|c|c|p{7cm}|}
    1.22 + \hline
    1.23 +{\bf type} & {\bf name} & {\bf description} \\ \hline
    1.24 +{\tt VM ref } & self & The VM \\ \hline 
    1.25 +
    1.26 +{\tt string } & key & The key \\ \hline 
    1.27 +
    1.28 +{\tt string } & value & The value \\ \hline 
    1.29 +
    1.30 +\end{tabular}
    1.31 +
    1.32 +\vspace{0.3cm}
    1.33 +
    1.34 + \noindent {\bf Return Type:} 
    1.35 +{\tt 
    1.36 +void
    1.37 +}
    1.38 +
    1.39 +
    1.40 +
    1.41 +\vspace{0.3cm}
    1.42 +\vspace{0.3cm}
    1.43 +\vspace{0.3cm}
    1.44 +\subsubsection{RPC name:~set\_memory\_dynamic\_max\_live}
    1.45 +
    1.46 +{\bf Overview:} 
    1.47 +Set memory\_dynamic\_max in database and on running VM.
    1.48 +
    1.49 + \noindent {\bf Signature:} 
    1.50 +\begin{verbatim} void set_memory_dynamic_max_live (session_id s, VM ref self, int max)\end{verbatim}
    1.51 +
    1.52 +
    1.53 +\noindent{\bf Arguments:}
    1.54 +
    1.55 + 
    1.56 +\vspace{0.3cm}
    1.57 +\begin{tabular}{|c|c|p{7cm}|}
    1.58 + \hline
    1.59 +{\bf type} & {\bf name} & {\bf description} \\ \hline
    1.60 +{\tt VM ref } & self & The VM \\ \hline 
    1.61 +
    1.62 +{\tt int } & max & The memory\_dynamic\_max value \\ \hline 
    1.63 +
    1.64 +\end{tabular}
    1.65 +
    1.66 +\vspace{0.3cm}
    1.67 +
    1.68 + \noindent {\bf Return Type:} 
    1.69 +{\tt 
    1.70 +void
    1.71 +}
    1.72 +
    1.73 +
    1.74 +
    1.75 +\vspace{0.3cm}
    1.76 +\vspace{0.3cm}
    1.77 +\vspace{0.3cm}
    1.78 +\subsubsection{RPC name:~set\_memory\_dynamic\_min\_live}
    1.79 +
    1.80 +{\bf Overview:} 
    1.81 +Set memory\_dynamic\_min in database and on running VM.
    1.82 +
    1.83 + \noindent {\bf Signature:} 
    1.84 +\begin{verbatim} void set_memory_dynamic_min_live (session_id s, VM ref self, int min)\end{verbatim}
    1.85 +
    1.86 +
    1.87 +\noindent{\bf Arguments:}
    1.88 +
    1.89 + 
    1.90 +\vspace{0.3cm}
    1.91 +\begin{tabular}{|c|c|p{7cm}|}
    1.92 + \hline
    1.93 +{\bf type} & {\bf name} & {\bf description} \\ \hline
    1.94 +{\tt VM ref } & self & The VM \\ \hline 
    1.95 +
    1.96 +{\tt int } & min & The memory\_dynamic\_min value \\ \hline 
    1.97 +
    1.98 +\end{tabular}
    1.99 +
   1.100 +\vspace{0.3cm}
   1.101 +
   1.102 + \noindent {\bf Return Type:} 
   1.103 +{\tt 
   1.104 +void
   1.105 +}
   1.106 +
   1.107 +
   1.108 +
   1.109 +\vspace{0.3cm}
   1.110 +\vspace{0.3cm}
   1.111 +\vspace{0.3cm}
   1.112  \subsubsection{RPC name:~send\_sysrq}
   1.113  
   1.114  {\bf Overview:} 
   1.115 @@ -4184,6 +4289,7 @@ Quals & Field & Type & Description \\
   1.116  $\mathit{RO}_\mathit{run}$ &  {\tt VCPUs/utilisation} & (int $\rightarrow$ float) Map & Utilisation for all of guest's current VCPUs \\
   1.117  $\mathit{RO}_\mathit{run}$ &  {\tt VCPUs/CPU} & (int $\rightarrow$ int) Map & VCPU to PCPU map \\
   1.118  $\mathit{RO}_\mathit{run}$ &  {\tt VCPUs/params} & (string $\rightarrow$ string) Map & The live equivalent to VM.VCPUs\_params \\
   1.119 +$\mathit{RO}_\mathit{run}$ &  {\tt state} & string Set & The state of the guest, eg blocked, dying etc \\
   1.120  $\mathit{RO}_\mathit{run}$ &  {\tt start\_time} & datetime & Time at which this VM was last booted \\
   1.121  $\mathit{RO}_\mathit{run}$ &  {\tt last\_updated} & datetime & Time at which this information was last updated \\
   1.122  \hline
   1.123 @@ -4402,6 +4508,38 @@ value of the field
   1.124  \vspace{0.3cm}
   1.125  \vspace{0.3cm}
   1.126  \vspace{0.3cm}
   1.127 +\subsubsection{RPC name:~get\_state}
   1.128 +
   1.129 +{\bf Overview:} 
   1.130 +Get the state field of the given VM\_metrics.
   1.131 +
   1.132 + \noindent {\bf Signature:} 
   1.133 +\begin{verbatim} (string Set) get_state (session_id s, VM_metrics ref self)\end{verbatim}
   1.134 +
   1.135 +
   1.136 +\noindent{\bf Arguments:}
   1.137 +
   1.138 + 
   1.139 +\vspace{0.3cm}
   1.140 +\begin{tabular}{|c|c|p{7cm}|}
   1.141 + \hline
   1.142 +{\bf type} & {\bf name} & {\bf description} \\ \hline
   1.143 +{\tt VM\_metrics ref } & self & reference to the object \\ \hline 
   1.144 +
   1.145 +\end{tabular}
   1.146 +
   1.147 +\vspace{0.3cm}
   1.148 +
   1.149 + \noindent {\bf Return Type:} 
   1.150 +{\tt 
   1.151 +string Set
   1.152 +}
   1.153 +
   1.154 +
   1.155 +value of the field
   1.156 +\vspace{0.3cm}
   1.157 +\vspace{0.3cm}
   1.158 +\vspace{0.3cm}
   1.159  \subsubsection{RPC name:~get\_start\_time}
   1.160  
   1.161  {\bf Overview:} 
   1.162 @@ -6601,7 +6739,8 @@ Quals & Field & Type & Description \\
   1.163  $\mathit{RO}_\mathit{run}$ &  {\tt speed} & int & the speed of the physical CPU \\
   1.164  $\mathit{RO}_\mathit{run}$ &  {\tt modelname} & string & the model name of the physical CPU \\
   1.165  $\mathit{RO}_\mathit{run}$ &  {\tt stepping} & string & the stepping of the physical CPU \\
   1.166 -$\mathit{RO}_\mathit{run}$ &  {\tt flags} & string & the flags of the physical CPU \\
   1.167 +$\mathit{RO}_\mathit{run}$ &  {\tt flags} & string & the flags of the physical CPU (a decoded version of the features field) \\
   1.168 +$\mathit{RO}_\mathit{run}$ &  {\tt features} & string & the physical CPU feature bitmap \\
   1.169  $\mathit{RO}_\mathit{run}$ &  {\tt utilisation} & float & the current CPU utilisation \\
   1.170  \hline
   1.171  \end{longtable}
   1.172 @@ -6883,6 +7022,38 @@ value of the field
   1.173  \vspace{0.3cm}
   1.174  \vspace{0.3cm}
   1.175  \vspace{0.3cm}
   1.176 +\subsubsection{RPC name:~get\_features}
   1.177 +
   1.178 +{\bf Overview:} 
   1.179 +Get the features field of the given host\_cpu.
   1.180 +
   1.181 + \noindent {\bf Signature:} 
   1.182 +\begin{verbatim} string get_features (session_id s, host_cpu ref self)\end{verbatim}
   1.183 +
   1.184 +
   1.185 +\noindent{\bf Arguments:}
   1.186 +
   1.187 + 
   1.188 +\vspace{0.3cm}
   1.189 +\begin{tabular}{|c|c|p{7cm}|}
   1.190 + \hline
   1.191 +{\bf type} & {\bf name} & {\bf description} \\ \hline
   1.192 +{\tt host\_cpu ref } & self & reference to the object \\ \hline 
   1.193 +
   1.194 +\end{tabular}
   1.195 +
   1.196 +\vspace{0.3cm}
   1.197 +
   1.198 + \noindent {\bf Return Type:} 
   1.199 +{\tt 
   1.200 +string
   1.201 +}
   1.202 +
   1.203 +
   1.204 +value of the field
   1.205 +\vspace{0.3cm}
   1.206 +\vspace{0.3cm}
   1.207 +\vspace{0.3cm}
   1.208  \subsubsection{RPC name:~get\_utilisation}
   1.209  
   1.210  {\bf Overview:} 
     2.1 --- a/linux-2.6-xen-sparse/drivers/xen/blkfront/block.h	Thu Apr 12 10:26:42 2007 -0600
     2.2 +++ b/linux-2.6-xen-sparse/drivers/xen/blkfront/block.h	Thu Apr 12 10:30:12 2007 -0600
     2.3 @@ -56,20 +56,6 @@
     2.4  #include <asm/atomic.h>
     2.5  #include <asm/uaccess.h>
     2.6  
     2.7 -#if 1
     2.8 -#define IPRINTK(fmt, args...)				\
     2.9 -	printk(KERN_INFO "xen_blk: " fmt, ##args)
    2.10 -#else
    2.11 -#define IPRINTK(fmt, args...) ((void)0)
    2.12 -#endif
    2.13 -
    2.14 -#if 1
    2.15 -#define WPRINTK(fmt, args...)				\
    2.16 -	printk(KERN_WARNING "xen_blk: " fmt, ##args)
    2.17 -#else
    2.18 -#define WPRINTK(fmt, args...) ((void)0)
    2.19 -#endif
    2.20 -
    2.21  #define DPRINTK(_f, _a...) pr_debug(_f, ## _a)
    2.22  
    2.23  #if 0
     3.1 --- a/linux-2.6-xen-sparse/drivers/xen/blkfront/vbd.c	Thu Apr 12 10:26:42 2007 -0600
     3.2 +++ b/linux-2.6-xen-sparse/drivers/xen/blkfront/vbd.c	Thu Apr 12 10:30:12 2007 -0600
     3.3 @@ -128,14 +128,12 @@ xlbd_alloc_major_info(int major, int min
     3.4  		break;
     3.5  	}
     3.6  
     3.7 -	printk("Registering block device major %i\n", ptr->major);
     3.8  	if (register_blkdev(ptr->major, ptr->type->devname)) {
     3.9 -		WPRINTK("can't get major %d with name %s\n",
    3.10 -			ptr->major, ptr->type->devname);
    3.11  		kfree(ptr);
    3.12  		return NULL;
    3.13  	}
    3.14  
    3.15 +	printk("xen-vbd: registered block device major %i\n", ptr->major);
    3.16  	major_info[index] = ptr;
    3.17  	return ptr;
    3.18  }
     4.1 --- a/linux-2.6-xen-sparse/drivers/xen/core/gnttab.c	Thu Apr 12 10:26:42 2007 -0600
     4.2 +++ b/linux-2.6-xen-sparse/drivers/xen/core/gnttab.c	Thu Apr 12 10:30:12 2007 -0600
     4.3 @@ -60,9 +60,6 @@ static grant_ref_t gnttab_free_head;
     4.4  static DEFINE_SPINLOCK(gnttab_list_lock);
     4.5  
     4.6  static struct grant_entry *shared;
     4.7 -#ifndef CONFIG_XEN
     4.8 -static unsigned long resume_frames;
     4.9 -#endif
    4.10  
    4.11  static struct gnttab_free_callback *gnttab_free_callback_list;
    4.12  
    4.13 @@ -514,6 +511,8 @@ int gnttab_suspend(void)
    4.14  
    4.15  #include <platform-pci.h>
    4.16  
    4.17 +static unsigned long resume_frames;
    4.18 +
    4.19  static int gnttab_map(unsigned int start_idx, unsigned int end_idx)
    4.20  {
    4.21  	struct xen_add_to_physmap xatp;
    4.22 @@ -543,23 +542,17 @@ int gnttab_resume(void)
    4.23  	if (max_nr_gframes < nr_gframes)
    4.24  		return -ENOSYS;
    4.25  
    4.26 -	resume_frames = alloc_xen_mmio(PAGE_SIZE * max_nr_gframes);
    4.27 +	if (!resume_frames) {
    4.28 +		resume_frames = alloc_xen_mmio(PAGE_SIZE * max_nr_gframes);
    4.29 +		shared = ioremap(resume_frames, PAGE_SIZE * max_nr_gframes);
    4.30 +		if (shared == NULL) {
    4.31 +			printk("error to ioremap gnttab share frames\n");
    4.32 +			return -1;
    4.33 +		}
    4.34 +	}
    4.35  
    4.36  	gnttab_map(0, nr_gframes - 1);
    4.37  
    4.38 -	shared = ioremap(resume_frames, PAGE_SIZE * max_nr_gframes);
    4.39 -	if (shared == NULL) {
    4.40 -		printk("error to ioremap gnttab share frames\n");
    4.41 -		return -1;
    4.42 -	}
    4.43 -
    4.44 -	return 0;
    4.45 -}
    4.46 -
    4.47 -int gnttab_suspend(void)
    4.48 -{
    4.49 -	iounmap(shared);
    4.50 -	resume_frames = 0;
    4.51  	return 0;
    4.52  }
    4.53  
    4.54 @@ -624,7 +617,6 @@ int __devinit gnttab_init(void)
    4.55  	gnttab_free_count = nr_init_grefs - NR_RESERVED_ENTRIES;
    4.56  	gnttab_free_head  = NR_RESERVED_ENTRIES;
    4.57  
    4.58 -	printk("Grant table initialized\n");
    4.59  	return 0;
    4.60  
    4.61   ini_nomem:
     5.1 --- a/linux-2.6-xen-sparse/drivers/xen/core/machine_reboot.c	Thu Apr 12 10:26:42 2007 -0600
     5.2 +++ b/linux-2.6-xen-sparse/drivers/xen/core/machine_reboot.c	Thu Apr 12 10:30:12 2007 -0600
     5.3 @@ -209,6 +209,8 @@ int __xen_suspend(int fast_suspend)
     5.4  	if (fast_suspend) {
     5.5  		xenbus_suspend();
     5.6  		err = stop_machine_run(take_machine_down, &fast_suspend, 0);
     5.7 +		if (err < 0)
     5.8 +			xenbus_suspend_cancel();
     5.9  	} else {
    5.10  		err = take_machine_down(&fast_suspend);
    5.11  	}
     6.1 --- a/linux-2.6-xen-sparse/drivers/xen/core/reboot.c	Thu Apr 12 10:26:42 2007 -0600
     6.2 +++ b/linux-2.6-xen-sparse/drivers/xen/core/reboot.c	Thu Apr 12 10:30:12 2007 -0600
     6.3 @@ -118,6 +118,7 @@ static void shutdown_handler(struct xenb
     6.4  	err = xenbus_transaction_start(&xbt);
     6.5  	if (err)
     6.6  		return;
     6.7 +
     6.8  	str = (char *)xenbus_read(xbt, "control", "shutdown", NULL);
     6.9  	/* Ignore read errors and empty reads. */
    6.10  	if (XENBUS_IS_ERR_READ(str)) {
    6.11 @@ -206,14 +207,12 @@ static int setup_shutdown_watcher(void)
    6.12  		printk(KERN_ERR "Failed to set shutdown watcher\n");
    6.13  		return err;
    6.14  	}
    6.15 -	xenbus_write(XBT_NIL, "control", "feature-reboot", "1");
    6.16  
    6.17  	err = register_xenbus_watch(&sysrq_watch);
    6.18  	if (err) {
    6.19  		printk(KERN_ERR "Failed to set sysrq watcher\n");
    6.20  		return err;
    6.21  	}
    6.22 -	xenbus_write(XBT_NIL, "control", "feature-sysrq", "1");
    6.23  
    6.24  	return 0;
    6.25  }
     7.1 --- a/linux-2.6-xen-sparse/include/asm-i386/mach-xen/asm/pgtable.h	Thu Apr 12 10:26:42 2007 -0600
     7.2 +++ b/linux-2.6-xen-sparse/include/asm-i386/mach-xen/asm/pgtable.h	Thu Apr 12 10:30:12 2007 -0600
     7.3 @@ -210,7 +210,7 @@ extern unsigned long pg0[];
     7.4  
     7.5  /* To avoid harmful races, pmd_none(x) should check only the lower when PAE */
     7.6  #define pmd_none(x)	(!(unsigned long)pmd_val(x))
     7.7 -#ifdef CONFIG_XEN_COMPAT_030002
     7.8 +#if CONFIG_XEN_COMPAT <= 0x030002
     7.9  /* pmd_present doesn't just test the _PAGE_PRESENT bit since wr.p.t.
    7.10     can temporarily clear it. */
    7.11  #define pmd_present(x)	(pmd_val(x))
     8.1 --- a/linux-2.6-xen-sparse/include/asm-x86_64/mach-xen/asm/pgtable.h	Thu Apr 12 10:26:42 2007 -0600
     8.2 +++ b/linux-2.6-xen-sparse/include/asm-x86_64/mach-xen/asm/pgtable.h	Thu Apr 12 10:30:12 2007 -0600
     8.3 @@ -411,7 +411,7 @@ static inline int pmd_large(pmd_t pte) {
     8.4  #define pmd_offset(dir, address) ((pmd_t *) pud_page(*(dir)) + \
     8.5                                    pmd_index(address))
     8.6  #define pmd_none(x)	(!pmd_val(x))
     8.7 -#ifdef CONFIG_XEN_COMPAT_030002
     8.8 +#if CONFIG_XEN_COMPAT <= 0x030002
     8.9  /* pmd_present doesn't just test the _PAGE_PRESENT bit since wr.p.t.
    8.10     can temporarily clear it. */
    8.11  #define pmd_present(x)	(pmd_val(x))
     9.1 --- a/tools/blktap/drivers/block-qcow.c	Thu Apr 12 10:26:42 2007 -0600
     9.2 +++ b/tools/blktap/drivers/block-qcow.c	Thu Apr 12 10:30:12 2007 -0600
     9.3 @@ -949,8 +949,14 @@ int tdqcow_open (struct disk_driver *dd,
     9.4  		goto fail;
     9.5  	}
     9.6  	init_fds(dd);
     9.7 -	s->fd_end = (final_cluster == 0 ? (s->l1_table_offset + l1_table_size) : 
     9.8 -				(final_cluster + s->cluster_size));
     9.9 +
    9.10 +	if (!final_cluster)
    9.11 +		s->fd_end = s->l1_table_offset + l1_table_size;
    9.12 +	else {
    9.13 +		s->fd_end = lseek64(fd, 0, SEEK_END);
    9.14 +		if (s->fd_end == (off64_t)-1)
    9.15 +			goto fail;
    9.16 +	}
    9.17  
    9.18  	return 0;
    9.19  	
    10.1 --- a/tools/examples/network-bridge	Thu Apr 12 10:26:42 2007 -0600
    10.2 +++ b/tools/examples/network-bridge	Thu Apr 12 10:30:12 2007 -0600
    10.3 @@ -183,12 +183,12 @@ op_start () {
    10.4  	return
    10.5      fi
    10.6  
    10.7 -    if ! link_exists "$vdev"; then
    10.8 -        if link_exists "$pdev"; then
    10.9 -            # The device is already up.
   10.10 -            return
   10.11 -        else
   10.12 -            echo "
   10.13 +    if link_exists "$pdev"; then
   10.14 +	# The device is already up.
   10.15 +	return
   10.16 +    fi
   10.17 +    if link_exists veth0 && ! link_exists "$vdev"; then
   10.18 +	echo "
   10.19  Link $vdev is missing.
   10.20  This may be because you have reached the limit of the number of interfaces
   10.21  that the loopback driver supports.  If the loopback driver is a module, you
   10.22 @@ -196,8 +196,7 @@ may raise this limit by passing it as a 
   10.23  driver is compiled statically into the kernel, then you may set the parameter
   10.24  using netloop.nloopbacks=<N> on the domain 0 kernel command line.
   10.25  " >&2
   10.26 -            exit 1
   10.27 -        fi
   10.28 +	exit 1
   10.29      fi
   10.30  
   10.31      create_bridge ${bridge}
   10.32 @@ -224,9 +223,13 @@ using netloop.nloopbacks=<N> on the doma
   10.33  	add_to_bridge2 ${bridge} ${pdev}
   10.34  	do_ifup ${netdev}
   10.35      else
   10.36 +	ip link set ${bridge} arp on
   10.37 +	ip link set ${bridge} multicast on
   10.38  	# old style without ${vdev}
   10.39  	transfer_addrs  ${netdev} ${bridge}
   10.40  	transfer_routes ${netdev} ${bridge}
   10.41 +	# Attach the real interface to the bridge.
   10.42 +	add_to_bridge ${bridge} ${netdev}
   10.43      fi
   10.44  
   10.45      if [ ${antispoof} = 'yes' ] ; then
    11.1 --- a/tools/examples/xmexample.hvm	Thu Apr 12 10:26:42 2007 -0600
    11.2 +++ b/tools/examples/xmexample.hvm	Thu Apr 12 10:30:12 2007 -0600
    11.3 @@ -170,6 +170,12 @@ serial='pty'
    11.4  
    11.5  
    11.6  #-----------------------------------------------------------------------------
    11.7 +#   Qemu Monitor, default is disable
    11.8 +#   Use ctrl-alt-2 to connect
    11.9 +#monitor=1
   11.10 +
   11.11 +
   11.12 +#-----------------------------------------------------------------------------
   11.13  #   enable sound card support, [sb16|es1370|all|..,..], default none
   11.14  #soundhw='sb16'
   11.15  
    12.1 --- a/tools/examples/xmexample.vti	Thu Apr 12 10:26:42 2007 -0600
    12.2 +++ b/tools/examples/xmexample.vti	Thu Apr 12 10:30:12 2007 -0600
    12.3 @@ -113,6 +113,11 @@ stdvga=0
    12.4  serial='pty'
    12.5  
    12.6  #-----------------------------------------------------------------------------
    12.7 +#   Qemu Monitor, default is disable
    12.8 +#   Use ctrl-alt-2 to connect
    12.9 +#monitor=1
   12.10 +
   12.11 +#-----------------------------------------------------------------------------
   12.12  #   enable sound card support, [sb16|es1370|all|..,..], default none
   12.13  #soundhw='sb16'
   12.14  
    13.1 --- a/tools/ioemu/hw/pc.c	Thu Apr 12 10:26:42 2007 -0600
    13.2 +++ b/tools/ioemu/hw/pc.c	Thu Apr 12 10:30:12 2007 -0600
    13.3 @@ -902,7 +902,6 @@ static void pc_init1(uint64_t ram_size, 
    13.4      if (pci_enabled && acpi_enabled) {
    13.5          piix4_pm_init(pci_bus, piix3_devfn + 3);
    13.6      }
    13.7 -#endif /* !CONFIG_DM */
    13.8  
    13.9  #if 0
   13.10      /* ??? Need to figure out some way for the user to
   13.11 @@ -921,6 +920,17 @@ static void pc_init1(uint64_t ram_size, 
   13.12          lsi_scsi_attach(scsi, bdrv, -1);
   13.13      }
   13.14  #endif
   13.15 +#else
   13.16 +    if (pci_enabled) {
   13.17 +        void *scsi;
   13.18 +
   13.19 +        scsi = lsi_scsi_init(pci_bus, -1);
   13.20 +        for (i = 0; i < MAX_SCSI_DISKS ; i++) {
   13.21 +            if (bs_table[i + MAX_DISKS]) 
   13.22 +                lsi_scsi_attach(scsi, bs_table[i + MAX_DISKS], -1);
   13.23 +        }
   13.24 +    }
   13.25 +#endif /* !CONFIG_DM */
   13.26      /* must be done after all PCI devices are instanciated */
   13.27      /* XXX: should be done in the Bochs BIOS */
   13.28      if (pci_enabled) {
    14.1 --- a/tools/ioemu/monitor.c	Thu Apr 12 10:26:42 2007 -0600
    14.2 +++ b/tools/ioemu/monitor.c	Thu Apr 12 10:30:12 2007 -0600
    14.3 @@ -180,7 +180,7 @@ static void do_commit(void)
    14.4  {
    14.5      int i;
    14.6  
    14.7 -    for (i = 0; i < MAX_DISKS; i++) {
    14.8 +    for (i = 0; i < MAX_DISKS + MAX_SCSI_DISKS; i++) {
    14.9          if (bs_table[i]) {
   14.10              bdrv_commit(bs_table[i]);
   14.11          }
    15.1 --- a/tools/ioemu/vl.c	Thu Apr 12 10:26:42 2007 -0600
    15.2 +++ b/tools/ioemu/vl.c	Thu Apr 12 10:30:12 2007 -0600
    15.3 @@ -116,7 +116,7 @@ char phys_ram_file[1024];
    15.4  void *ioport_opaque[MAX_IOPORTS];
    15.5  IOPortReadFunc *ioport_read_table[3][MAX_IOPORTS];
    15.6  IOPortWriteFunc *ioport_write_table[3][MAX_IOPORTS];
    15.7 -BlockDriverState *bs_table[MAX_DISKS], *fd_table[MAX_FD];
    15.8 +BlockDriverState *bs_table[MAX_DISKS + MAX_SCSI_DISKS], *fd_table[MAX_FD];
    15.9  int vga_ram_size;
   15.10  int bios_size;
   15.11  static DisplayState display_state;
   15.12 @@ -1396,7 +1396,7 @@ static void stdio_received_byte(int ch)
   15.13          case 's': 
   15.14              {
   15.15                  int i;
   15.16 -                for (i = 0; i < MAX_DISKS; i++) {
   15.17 +                for (i = 0; i < MAX_DISKS + MAX_SCSI_DISKS; i++) {
   15.18                      if (bs_table[i])
   15.19                          bdrv_commit(bs_table[i]);
   15.20                  }
   15.21 @@ -6057,7 +6057,7 @@ int main(int argc, char **argv)
   15.22      int snapshot, linux_boot;
   15.23      const char *initrd_filename;
   15.24  #ifndef CONFIG_DM
   15.25 -    const char *hd_filename[MAX_DISKS];
   15.26 +    const char *hd_filename[MAX_DISKS + MAX_SCSI_DISKS];
   15.27  #endif /* !CONFIG_DM */
   15.28      const char *fd_filename[MAX_FD];
   15.29      const char *kernel_filename, *kernel_cmdline;
   15.30 @@ -6126,7 +6126,7 @@ int main(int argc, char **argv)
   15.31      for(i = 0; i < MAX_FD; i++)
   15.32          fd_filename[i] = NULL;
   15.33  #ifndef CONFIG_DM
   15.34 -    for(i = 0; i < MAX_DISKS; i++)
   15.35 +    for(i = 0; i < MAX_DISKS + MAX_SCSI_DISKS; i++)
   15.36          hd_filename[i] = NULL;
   15.37  #endif /* !CONFIG_DM */
   15.38      ram_size = DEFAULT_RAM_SIZE * 1024 * 1024;
   15.39 @@ -6724,7 +6724,7 @@ int main(int argc, char **argv)
   15.40      }
   15.41  
   15.42      /* open the virtual block devices */
   15.43 -    for(i = 0; i < MAX_DISKS; i++) {
   15.44 +    for(i = 0; i < MAX_DISKS + MAX_SCSI_DISKS; i++) {
   15.45          if (hd_filename[i]) {
   15.46              if (!bs_table[i]) {
   15.47                  char buf[64];
    16.1 --- a/tools/ioemu/vl.h	Thu Apr 12 10:26:42 2007 -0600
    16.2 +++ b/tools/ioemu/vl.h	Thu Apr 12 10:30:12 2007 -0600
    16.3 @@ -818,8 +818,9 @@ int vnc_start_viewer(int port);
    16.4  
    16.5  /* ide.c */
    16.6  #define MAX_DISKS 4
    16.7 +#define MAX_SCSI_DISKS 7
    16.8  
    16.9 -extern BlockDriverState *bs_table[MAX_DISKS];
   16.10 +extern BlockDriverState *bs_table[MAX_DISKS + MAX_SCSI_DISKS];
   16.11  
   16.12  void isa_ide_init(int iobase, int iobase2, int irq,
   16.13                    BlockDriverState *hd0, BlockDriverState *hd1);
    17.1 --- a/tools/ioemu/xenstore.c	Thu Apr 12 10:26:42 2007 -0600
    17.2 +++ b/tools/ioemu/xenstore.c	Thu Apr 12 10:30:12 2007 -0600
    17.3 @@ -18,7 +18,7 @@
    17.4  #include <fcntl.h>
    17.5  
    17.6  static struct xs_handle *xsh = NULL;
    17.7 -static char *media_filename[MAX_DISKS];
    17.8 +static char *media_filename[MAX_DISKS + MAX_SCSI_DISKS];
    17.9  static QEMUTimer *insert_timer = NULL;
   17.10  
   17.11  #define UWAIT_MAX (30*1000000) /* thirty seconds */
   17.12 @@ -30,11 +30,11 @@ static int pasprintf(char **buf, const c
   17.13      int ret = 0;
   17.14  
   17.15      if (*buf)
   17.16 -	free(*buf);
   17.17 +        free(*buf);
   17.18      va_start(ap, fmt);
   17.19      if (vasprintf(buf, fmt, ap) == -1) {
   17.20 -	buf = NULL;
   17.21 -	ret = -1;
   17.22 +        buf = NULL;
   17.23 +        ret = -1;
   17.24      }
   17.25      va_end(ap);
   17.26      return ret;
   17.27 @@ -44,12 +44,12 @@ static void insert_media(void *opaque)
   17.28  {
   17.29      int i;
   17.30  
   17.31 -    for (i = 0; i < MAX_DISKS; i++) {
   17.32 -	if (media_filename[i] && bs_table[i]) {
   17.33 -	    do_change(bs_table[i]->device_name, media_filename[i]);
   17.34 -	    free(media_filename[i]);
   17.35 -	    media_filename[i] = NULL;
   17.36 -	}
   17.37 +    for (i = 0; i < MAX_DISKS + MAX_SCSI_DISKS; i++) {
   17.38 +        if (media_filename[i] && bs_table[i]) {
   17.39 +            do_change(bs_table[i]->device_name, media_filename[i]);
   17.40 +            free(media_filename[i]);
   17.41 +            media_filename[i] = NULL;
   17.42 +        }
   17.43      }
   17.44  }
   17.45  
   17.46 @@ -57,7 +57,7 @@ void xenstore_check_new_media_present(in
   17.47  {
   17.48  
   17.49      if (insert_timer == NULL)
   17.50 -	insert_timer = qemu_new_timer(rt_clock, insert_media, NULL);
   17.51 +        insert_timer = qemu_new_timer(rt_clock, insert_media, NULL);
   17.52      qemu_mod_timer(insert_timer, qemu_get_clock(rt_clock) + timeout);
   17.53  }
   17.54  
   17.55 @@ -82,17 +82,17 @@ void xenstore_parse_domain_config(int do
   17.56      char **e = NULL;
   17.57      char *buf = NULL, *path;
   17.58      char *fpath = NULL, *bpath = NULL,
   17.59 -         *dev = NULL, *params = NULL, *type = NULL;
   17.60 -    int i;
   17.61 +        *dev = NULL, *params = NULL, *type = NULL;
   17.62 +    int i, is_scsi;
   17.63      unsigned int len, num, hd_index;
   17.64  
   17.65 -    for(i = 0; i < MAX_DISKS; i++)
   17.66 +    for(i = 0; i < MAX_DISKS + MAX_SCSI_DISKS; i++)
   17.67          media_filename[i] = NULL;
   17.68  
   17.69      xsh = xs_daemon_open();
   17.70      if (xsh == NULL) {
   17.71 -	fprintf(logfile, "Could not contact xenstore for domain config\n");
   17.72 -	return;
   17.73 +        fprintf(logfile, "Could not contact xenstore for domain config\n");
   17.74 +        return;
   17.75      }
   17.76  
   17.77      path = xs_get_domain_path(xsh, domid);
   17.78 @@ -102,59 +102,60 @@ void xenstore_parse_domain_config(int do
   17.79      }
   17.80  
   17.81      if (pasprintf(&buf, "%s/device/vbd", path) == -1)
   17.82 -	goto out;
   17.83 +        goto out;
   17.84  
   17.85      e = xs_directory(xsh, XBT_NULL, buf, &num);
   17.86      if (e == NULL)
   17.87 -	goto out;
   17.88 +        goto out;
   17.89  
   17.90      for (i = 0; i < num; i++) {
   17.91 -	/* read the backend path */
   17.92 -	if (pasprintf(&buf, "%s/device/vbd/%s/backend", path, e[i]) == -1)
   17.93 -	    continue;
   17.94 -	free(bpath);
   17.95 +        /* read the backend path */
   17.96 +        if (pasprintf(&buf, "%s/device/vbd/%s/backend", path, e[i]) == -1)
   17.97 +            continue;
   17.98 +        free(bpath);
   17.99          bpath = xs_read(xsh, XBT_NULL, buf, &len);
  17.100 -	if (bpath == NULL)
  17.101 -	    continue;
  17.102 -	/* read the name of the device */
  17.103 -	if (pasprintf(&buf, "%s/dev", bpath) == -1)
  17.104 -	    continue;
  17.105 -	free(dev);
  17.106 -	dev = xs_read(xsh, XBT_NULL, buf, &len);
  17.107 -	if (dev == NULL)
  17.108 -	    continue;
  17.109 -	if (strncmp(dev, "hd", 2) || strlen(dev) != 3)
  17.110 -	    continue;
  17.111 -	hd_index = dev[2] - 'a';
  17.112 -	if (hd_index >= MAX_DISKS)
  17.113 -	    continue;
  17.114 -	/* read the type of the device */
  17.115 -	if (pasprintf(&buf, "%s/device/vbd/%s/device-type", path, e[i]) == -1)
  17.116 -	    continue;
  17.117 -	free(type);
  17.118 -	type = xs_read(xsh, XBT_NULL, buf, &len);
  17.119 -	if (pasprintf(&buf, "%s/params", bpath) == -1)
  17.120 -	    continue;
  17.121 -	free(params);
  17.122 -	params = xs_read(xsh, XBT_NULL, buf, &len);
  17.123 -	if (params == NULL)
  17.124 -	    continue;
  17.125 +        if (bpath == NULL)
  17.126 +            continue;
  17.127 +        /* read the name of the device */
  17.128 +        if (pasprintf(&buf, "%s/dev", bpath) == -1)
  17.129 +            continue;
  17.130 +        free(dev);
  17.131 +        dev = xs_read(xsh, XBT_NULL, buf, &len);
  17.132 +        if (dev == NULL)
  17.133 +            continue;
  17.134 +        is_scsi = !strncmp(dev, "sd", 2);
  17.135 +        if ((strncmp(dev, "hd", 2) && !is_scsi) || strlen(dev) != 3 )
  17.136 +            continue;
  17.137 +        hd_index = dev[2] - 'a';
  17.138 +        if (hd_index >= (is_scsi ? MAX_SCSI_DISKS : MAX_DISKS))
  17.139 +            continue;
  17.140 +        /* read the type of the device */
  17.141 +        if (pasprintf(&buf, "%s/device/vbd/%s/device-type", path, e[i]) == -1)
  17.142 +            continue;
  17.143 +        free(type);
  17.144 +        type = xs_read(xsh, XBT_NULL, buf, &len);
  17.145 +        if (pasprintf(&buf, "%s/params", bpath) == -1)
  17.146 +            continue;
  17.147 +        free(params);
  17.148 +        params = xs_read(xsh, XBT_NULL, buf, &len);
  17.149 +        if (params == NULL)
  17.150 +            continue;
  17.151          /* 
  17.152           * check if device has a phantom vbd; the phantom is hooked
  17.153           * to the frontend device (for ease of cleanup), so lookup 
  17.154           * the frontend device, and see if there is a phantom_vbd
  17.155           * if there is, we will use resolution as the filename
  17.156           */
  17.157 -	if (pasprintf(&buf, "%s/device/vbd/%s/phantom_vbd", path, e[i]) == -1)
  17.158 -	    continue;
  17.159 -	free(fpath);
  17.160 +        if (pasprintf(&buf, "%s/device/vbd/%s/phantom_vbd", path, e[i]) == -1)
  17.161 +            continue;
  17.162 +        free(fpath);
  17.163          fpath = xs_read(xsh, XBT_NULL, buf, &len);
  17.164 -	if (fpath) {
  17.165 -	    if (pasprintf(&buf, "%s/dev", fpath) == -1)
  17.166 -	        continue;
  17.167 -	    free(params);
  17.168 +        if (fpath) {
  17.169 +            if (pasprintf(&buf, "%s/dev", fpath) == -1)
  17.170 +                continue;
  17.171 +            free(params);
  17.172              params = xs_read(xsh, XBT_NULL, buf , &len);
  17.173 -	    if (params) {
  17.174 +            if (params) {
  17.175                  /* 
  17.176                   * wait for device, on timeout silently fail because we will 
  17.177                   * fail to open below
  17.178 @@ -163,19 +164,20 @@ void xenstore_parse_domain_config(int do
  17.179              }
  17.180          }
  17.181  
  17.182 -	bs_table[hd_index] = bdrv_new(dev);
  17.183 -	/* check if it is a cdrom */
  17.184 -	if (type && !strcmp(type, "cdrom")) {
  17.185 -	    bdrv_set_type_hint(bs_table[hd_index], BDRV_TYPE_CDROM);
  17.186 -	    if (pasprintf(&buf, "%s/params", bpath) != -1)
  17.187 -		xs_watch(xsh, buf, dev);
  17.188 -	}
  17.189 -	/* open device now if media present */
  17.190 -	if (params[0]) {
  17.191 -            if (bdrv_open(bs_table[hd_index], params, 0 /* snapshot */) < 0)
  17.192 +        bs_table[hd_index + (is_scsi ? MAX_DISKS : 0)] = bdrv_new(dev);
  17.193 +        /* check if it is a cdrom */
  17.194 +        if (type && !strcmp(type, "cdrom")) {
  17.195 +            bdrv_set_type_hint(bs_table[hd_index], BDRV_TYPE_CDROM);
  17.196 +            if (pasprintf(&buf, "%s/params", bpath) != -1)
  17.197 +                xs_watch(xsh, buf, dev);
  17.198 +        }
  17.199 +        /* open device now if media present */
  17.200 +        if (params[0]) {
  17.201 +            if (bdrv_open(bs_table[hd_index + (is_scsi ? MAX_DISKS : 0)],
  17.202 +                          params, 0 /* snapshot */) < 0)
  17.203                  fprintf(stderr, "qemu: could not open hard disk image '%s'\n",
  17.204                          params);
  17.205 -	}
  17.206 +        }
  17.207      }
  17.208  
  17.209      /* Set a watch for log-dirty requests from the migration tools */
  17.210 @@ -199,7 +201,7 @@ void xenstore_parse_domain_config(int do
  17.211  int xenstore_fd(void)
  17.212  {
  17.213      if (xsh)
  17.214 -	return xs_fileno(xsh);
  17.215 +        return xs_fileno(xsh);
  17.216      return -1;
  17.217  }
  17.218  
  17.219 @@ -316,7 +318,7 @@ void xenstore_process_event(void *opaque
  17.220  
  17.221      vec = xs_read_watch(xsh, &num);
  17.222      if (!vec)
  17.223 -	return;
  17.224 +        return;
  17.225  
  17.226      if (!strcmp(vec[XS_WATCH_TOKEN], "logdirty")) {
  17.227          xenstore_process_logdirty_event();
  17.228 @@ -324,23 +326,23 @@ void xenstore_process_event(void *opaque
  17.229      }
  17.230  
  17.231      if (strncmp(vec[XS_WATCH_TOKEN], "hd", 2) ||
  17.232 -	strlen(vec[XS_WATCH_TOKEN]) != 3)
  17.233 -	goto out;
  17.234 +        strlen(vec[XS_WATCH_TOKEN]) != 3)
  17.235 +        goto out;
  17.236      hd_index = vec[XS_WATCH_TOKEN][2] - 'a';
  17.237      image = xs_read(xsh, XBT_NULL, vec[XS_WATCH_PATH], &len);
  17.238      if (image == NULL || !strcmp(image, bs_table[hd_index]->filename))
  17.239 -	goto out;		/* gone or identical */
  17.240 +        goto out;  /* gone or identical */
  17.241  
  17.242      do_eject(0, vec[XS_WATCH_TOKEN]);
  17.243      bs_table[hd_index]->filename[0] = 0;
  17.244      if (media_filename[hd_index]) {
  17.245 -	free(media_filename[hd_index]);
  17.246 -	media_filename[hd_index] = NULL;
  17.247 +        free(media_filename[hd_index]);
  17.248 +        media_filename[hd_index] = NULL;
  17.249      }
  17.250  
  17.251      if (image[0]) {
  17.252 -	media_filename[hd_index] = strdup(image);
  17.253 -	xenstore_check_new_media_present(5000);
  17.254 +        media_filename[hd_index] = strdup(image);
  17.255 +        xenstore_check_new_media_present(5000);
  17.256      }
  17.257  
  17.258   out:
  17.259 @@ -354,7 +356,7 @@ void xenstore_write_vncport(int display)
  17.260      char *portstr = NULL;
  17.261  
  17.262      if (xsh == NULL)
  17.263 -	return;
  17.264 +        return;
  17.265  
  17.266      path = xs_get_domain_path(xsh, domid);
  17.267      if (path == NULL) {
  17.268 @@ -363,10 +365,10 @@ void xenstore_write_vncport(int display)
  17.269      }
  17.270  
  17.271      if (pasprintf(&buf, "%s/console/vnc-port", path) == -1)
  17.272 -	goto out;
  17.273 +        goto out;
  17.274  
  17.275      if (pasprintf(&portstr, "%d", 5900 + display) == -1)
  17.276 -	goto out;
  17.277 +        goto out;
  17.278  
  17.279      if (xs_write(xsh, XBT_NULL, buf, portstr, strlen(portstr)) == 0)
  17.280          fprintf(logfile, "xs_write() vncport failed\n");
  17.281 @@ -383,41 +385,41 @@ int xenstore_read_vncpasswd(int domid)
  17.282      unsigned int i, len, rc = 0;
  17.283  
  17.284      if (xsh == NULL) {
  17.285 -	return -1;
  17.286 +        return -1;
  17.287      }
  17.288  
  17.289      path = xs_get_domain_path(xsh, domid);
  17.290      if (path == NULL) {
  17.291 -	fprintf(logfile, "xs_get_domain_path() error. domid %d.\n", domid);
  17.292 -	return -1;
  17.293 +        fprintf(logfile, "xs_get_domain_path() error. domid %d.\n", domid);
  17.294 +        return -1;
  17.295      }
  17.296  
  17.297      pasprintf(&buf, "%s/vm", path);
  17.298      uuid = xs_read(xsh, XBT_NULL, buf, &len);
  17.299      if (uuid == NULL) {
  17.300 -	fprintf(logfile, "xs_read(): uuid get error. %s.\n", buf);
  17.301 -	free(path);
  17.302 -	return -1;
  17.303 +        fprintf(logfile, "xs_read(): uuid get error. %s.\n", buf);
  17.304 +        free(path);
  17.305 +        return -1;
  17.306      }
  17.307  
  17.308      pasprintf(&buf, "%s/vncpasswd", uuid);
  17.309      passwd = xs_read(xsh, XBT_NULL, buf, &len);
  17.310      if (passwd == NULL) {
  17.311 -	fprintf(logfile, "xs_read(): vncpasswd get error. %s.\n", buf);
  17.312 -	free(uuid);
  17.313 -	free(path);
  17.314 -	return rc;
  17.315 +        fprintf(logfile, "xs_read(): vncpasswd get error. %s.\n", buf);
  17.316 +        free(uuid);
  17.317 +        free(path);
  17.318 +        return rc;
  17.319      }
  17.320  
  17.321      for (i=0; i<len && i<63; i++) {
  17.322 -	vncpasswd[i] = passwd[i];
  17.323 -	passwd[i] = '\0';
  17.324 +        vncpasswd[i] = passwd[i];
  17.325 +        passwd[i] = '\0';
  17.326      }
  17.327      vncpasswd[len] = '\0';
  17.328      pasprintf(&buf, "%s/vncpasswd", uuid);
  17.329      if (xs_write(xsh, XBT_NULL, buf, passwd, len) == 0) {
  17.330 -	fprintf(logfile, "xs_write() vncpasswd failed.\n");
  17.331 -	rc = -1;
  17.332 +        fprintf(logfile, "xs_write() vncpasswd failed.\n");
  17.333 +        rc = -1;
  17.334      }
  17.335  
  17.336      free(passwd);
  17.337 @@ -443,7 +445,7 @@ char **xenstore_domain_get_devices(struc
  17.338          goto out;
  17.339  
  17.340      if (pasprintf(&buf, "%s/device/%s", path,devtype) == -1)
  17.341 -	goto out;
  17.342 +        goto out;
  17.343  
  17.344      e = xs_directory(handle, XBT_NULL, buf, num);
  17.345  
  17.346 @@ -496,13 +498,13 @@ char *xenstore_backend_read_variable(str
  17.347  
  17.348      buf = get_device_variable_path(devtype, inst, var);
  17.349      if (NULL == buf)
  17.350 -	goto out;
  17.351 +        goto out;
  17.352  
  17.353      value = xs_read(handle, XBT_NULL, buf, &len);
  17.354  
  17.355      free(buf);
  17.356  
  17.357 -out:
  17.358 + out:
  17.359      return value;
  17.360  }
  17.361  
  17.362 @@ -569,27 +571,27 @@ char *xenstore_vm_read(int domid, char *
  17.363      char *buf = NULL, *path = NULL, *value = NULL;
  17.364  
  17.365      if (xsh == NULL)
  17.366 -	goto out;
  17.367 +        goto out;
  17.368  
  17.369      path = xs_get_domain_path(xsh, domid);
  17.370      if (path == NULL) {
  17.371 -	fprintf(logfile, "xs_get_domain_path(%d): error\n", domid);
  17.372 -	goto out;
  17.373 +        fprintf(logfile, "xs_get_domain_path(%d): error\n", domid);
  17.374 +        goto out;
  17.375      }
  17.376  
  17.377      pasprintf(&buf, "%s/vm", path);
  17.378      free(path);
  17.379      path = xs_read(xsh, XBT_NULL, buf, NULL);
  17.380      if (path == NULL) {
  17.381 -	fprintf(logfile, "xs_read(%s): read error\n", buf);
  17.382 -	goto out;
  17.383 +        fprintf(logfile, "xs_read(%s): read error\n", buf);
  17.384 +        goto out;
  17.385      }
  17.386  
  17.387      pasprintf(&buf, "%s/%s", path, key);
  17.388      value = xs_read(xsh, XBT_NULL, buf, len);
  17.389      if (value == NULL) {
  17.390 -	fprintf(logfile, "xs_read(%s): read error\n", buf);
  17.391 -	goto out;
  17.392 +        fprintf(logfile, "xs_read(%s): read error\n", buf);
  17.393 +        goto out;
  17.394      }
  17.395  
  17.396   out:
  17.397 @@ -604,27 +606,27 @@ int xenstore_vm_write(int domid, char *k
  17.398      int rc = -1;
  17.399  
  17.400      if (xsh == NULL)
  17.401 -	goto out;
  17.402 +        goto out;
  17.403  
  17.404      path = xs_get_domain_path(xsh, domid);
  17.405      if (path == NULL) {
  17.406 -	fprintf(logfile, "xs_get_domain_path: error\n");
  17.407 -	goto out;
  17.408 +        fprintf(logfile, "xs_get_domain_path: error\n");
  17.409 +        goto out;
  17.410      }
  17.411  
  17.412      pasprintf(&buf, "%s/vm", path);
  17.413      free(path);
  17.414      path = xs_read(xsh, XBT_NULL, buf, NULL);
  17.415      if (path == NULL) {
  17.416 -	fprintf(logfile, "xs_read(%s): read error\n", buf);
  17.417 -	goto out;
  17.418 +        fprintf(logfile, "xs_read(%s): read error\n", buf);
  17.419 +        goto out;
  17.420      }
  17.421  
  17.422      pasprintf(&buf, "%s/%s", path, key);
  17.423      rc = xs_write(xsh, XBT_NULL, buf, value, strlen(value));
  17.424      if (rc) {
  17.425 -	fprintf(logfile, "xs_write(%s, %s): write error\n", buf, key);
  17.426 -	goto out;
  17.427 +        fprintf(logfile, "xs_write(%s, %s): write error\n", buf, key);
  17.428 +        goto out;
  17.429      }
  17.430  
  17.431   out:
    18.1 --- a/tools/libfsimage/fat/fat.h	Thu Apr 12 10:26:42 2007 -0600
    18.2 +++ b/tools/libfsimage/fat/fat.h	Thu Apr 12 10:30:12 2007 -0600
    18.3 @@ -84,17 +84,17 @@ struct fat_bpb {
    18.4  #define FAT_DIRENTRY_LENGTH       32
    18.5  
    18.6  #define FAT_DIRENTRY_ATTRIB(entry) \
    18.7 -  (*((unsigned char *) (entry+11)))
    18.8 +  (*((__u8 *) (entry+11)))
    18.9  #define FAT_DIRENTRY_VALID(entry) \
   18.10 -  ( ((*((unsigned char *) entry)) != 0) \
   18.11 -    && ((*((unsigned char *) entry)) != 0xE5) \
   18.12 +  ( ((*((__u8 *) entry)) != 0) \
   18.13 +    && ((*((__u8 *) entry)) != 0xE5) \
   18.14      && !(FAT_DIRENTRY_ATTRIB(entry) & FAT_ATTRIB_NOT_OK_MASK) )
   18.15  #define FAT_DIRENTRY_FIRST_CLUSTER(entry) \
   18.16 -  ((*((unsigned short *) (entry+26)))+(*((unsigned short *) (entry+20)) << 16))
   18.17 +  ((*((__u16 *) (entry+26)))+(*((__u16 *) (entry+20)) << 16))
   18.18  #define FAT_DIRENTRY_FILELENGTH(entry) \
   18.19 -  (*((unsigned long *) (entry+28)))
   18.20 +  (*((__u32 *) (entry+28)))
   18.21  
   18.22  #define FAT_LONGDIR_ID(entry) \
   18.23 -  (*((unsigned char *) (entry)))
   18.24 +  (*((__u8 *) (entry)))
   18.25  #define FAT_LONGDIR_ALIASCHECKSUM(entry) \
   18.26 -  (*((unsigned char *) (entry+13)))
   18.27 +  (*((__u8 *) (entry+13)))
    19.1 --- a/tools/libxc/Makefile	Thu Apr 12 10:26:42 2007 -0600
    19.2 +++ b/tools/libxc/Makefile	Thu Apr 12 10:30:12 2007 -0600
    19.3 @@ -26,8 +26,8 @@ CTRL_SRCS-$(CONFIG_X86_Linux) += xc_ptra
    19.4  
    19.5  GUEST_SRCS-y :=
    19.6  GUEST_SRCS-y += xg_private.c
    19.7 -GUEST_SRCS-$(CONFIG_MIGRATE) += xc_domain_restore.c xc_linux_save.c
    19.8 -GUEST_SRCS-$(CONFIG_HVM) += xc_hvm_build.c xc_hvm_save.c
    19.9 +GUEST_SRCS-$(CONFIG_MIGRATE) += xc_domain_restore.c xc_domain_save.c
   19.10 +GUEST_SRCS-$(CONFIG_HVM) += xc_hvm_build.c
   19.11  
   19.12  # symlink libelf from xen/common/libelf/
   19.13  LIBELF_SRCS := libelf-tools.c libelf-loader.c
    20.1 --- a/tools/libxc/ia64/xc_ia64_linux_save.c	Thu Apr 12 10:26:42 2007 -0600
    20.2 +++ b/tools/libxc/ia64/xc_ia64_linux_save.c	Thu Apr 12 10:30:12 2007 -0600
    20.3 @@ -134,8 +134,10 @@ retry:
    20.4  }
    20.5  
    20.6  int
    20.7 -xc_linux_save(int xc_handle, int io_fd, uint32_t dom, uint32_t max_iters,
    20.8 -              uint32_t max_factor, uint32_t flags, int (*suspend)(int))
    20.9 +xc_domain_save(int xc_handle, int io_fd, uint32_t dom, uint32_t max_iters,
   20.10 +               uint32_t max_factor, uint32_t flags, int (*suspend)(int),
   20.11 +               int hvm, void *(*init_qemu_maps)(int, unsigned),
   20.12 +               void (*qemu_flip_buffer)(int, int))
   20.13  {
   20.14      DECLARE_DOMCTL;
   20.15      xc_dominfo_t info;
    21.1 --- a/tools/libxc/xc_domain.c	Thu Apr 12 10:26:42 2007 -0600
    21.2 +++ b/tools/libxc/xc_domain.c	Thu Apr 12 10:30:12 2007 -0600
    21.3 @@ -8,6 +8,7 @@
    21.4  
    21.5  #include "xc_private.h"
    21.6  #include <xen/memory.h>
    21.7 +#include <xen/hvm/hvm_op.h>
    21.8  
    21.9  int xc_domain_create(int xc_handle,
   21.10                       uint32_t ssidref,
   21.11 @@ -657,6 +658,44 @@ int xc_domain_send_trigger(int xc_handle
   21.12      return do_domctl(xc_handle, &domctl);
   21.13  }
   21.14  
   21.15 +int xc_set_hvm_param(int handle, domid_t dom, int param, unsigned long value)
   21.16 +{
   21.17 +    DECLARE_HYPERCALL;
   21.18 +    xen_hvm_param_t arg;
   21.19 +    int rc;
   21.20 +
   21.21 +    hypercall.op     = __HYPERVISOR_hvm_op;
   21.22 +    hypercall.arg[0] = HVMOP_set_param;
   21.23 +    hypercall.arg[1] = (unsigned long)&arg;
   21.24 +    arg.domid = dom;
   21.25 +    arg.index = param;
   21.26 +    arg.value = value;
   21.27 +    if ( lock_pages(&arg, sizeof(arg)) != 0 )
   21.28 +        return -1;
   21.29 +    rc = do_xen_hypercall(handle, &hypercall);
   21.30 +    unlock_pages(&arg, sizeof(arg));
   21.31 +    return rc;
   21.32 +}
   21.33 +
   21.34 +int xc_get_hvm_param(int handle, domid_t dom, int param, unsigned long *value)
   21.35 +{
   21.36 +    DECLARE_HYPERCALL;
   21.37 +    xen_hvm_param_t arg;
   21.38 +    int rc;
   21.39 +
   21.40 +    hypercall.op     = __HYPERVISOR_hvm_op;
   21.41 +    hypercall.arg[0] = HVMOP_get_param;
   21.42 +    hypercall.arg[1] = (unsigned long)&arg;
   21.43 +    arg.domid = dom;
   21.44 +    arg.index = param;
   21.45 +    if ( lock_pages(&arg, sizeof(arg)) != 0 )
   21.46 +        return -1;
   21.47 +    rc = do_xen_hypercall(handle, &hypercall);
   21.48 +    unlock_pages(&arg, sizeof(arg));
   21.49 +    *value = arg.value;
   21.50 +    return rc;
   21.51 +}
   21.52 +
   21.53  /*
   21.54   * Local variables:
   21.55   * mode: C
    22.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
    22.2 +++ b/tools/libxc/xc_domain_save.c	Thu Apr 12 10:30:12 2007 -0600
    22.3 @@ -0,0 +1,1609 @@
    22.4 +/******************************************************************************
    22.5 + * xc_linux_save.c
    22.6 + *
    22.7 + * Save the state of a running Linux session.
    22.8 + *
    22.9 + * Copyright (c) 2003, K A Fraser.
   22.10 + */
   22.11 +
   22.12 +#include <inttypes.h>
   22.13 +#include <time.h>
   22.14 +#include <stdlib.h>
   22.15 +#include <unistd.h>
   22.16 +#include <sys/time.h>
   22.17 +
   22.18 +#include "xc_private.h"
   22.19 +#include "xc_dom.h"
   22.20 +#include "xg_private.h"
   22.21 +#include "xg_save_restore.h"
   22.22 +
   22.23 +#include <xen/hvm/params.h>
   22.24 +#include <xen/hvm/e820.h>
   22.25 +
   22.26 +/*
   22.27 +** Default values for important tuning parameters. Can override by passing
   22.28 +** non-zero replacement values to xc_domain_save().
   22.29 +**
   22.30 +** XXX SMH: should consider if want to be able to override MAX_MBIT_RATE too.
   22.31 +**
   22.32 +*/
   22.33 +#define DEF_MAX_ITERS   29   /* limit us to 30 times round loop   */
   22.34 +#define DEF_MAX_FACTOR   3   /* never send more than 3x p2m_size  */
   22.35 +
   22.36 +/* max mfn of the whole machine */
   22.37 +static unsigned long max_mfn;
   22.38 +
   22.39 +/* virtual starting address of the hypervisor */
   22.40 +static unsigned long hvirt_start;
   22.41 +
   22.42 +/* #levels of page tables used by the current guest */
   22.43 +static unsigned int pt_levels;
   22.44 +
   22.45 +/* HVM: shared-memory bitmaps for getting log-dirty bits from qemu-dm */
   22.46 +static unsigned long *qemu_bitmaps[2];
   22.47 +static int qemu_active;
   22.48 +static int qemu_non_active;
   22.49 +
   22.50 +/* number of pfns this guest has (i.e. number of entries in the P2M) */
   22.51 +static unsigned long p2m_size;
   22.52 +
   22.53 +/* Live mapping of the table mapping each PFN to its current MFN. */
   22.54 +static xen_pfn_t *live_p2m = NULL;
   22.55 +
   22.56 +/* Live mapping of system MFN to PFN table. */
   22.57 +static xen_pfn_t *live_m2p = NULL;
   22.58 +static unsigned long m2p_mfn0;
   22.59 +
   22.60 +/* grep fodder: machine_to_phys */
   22.61 +
   22.62 +#define mfn_to_pfn(_mfn) live_m2p[(_mfn)]
   22.63 +
   22.64 +/*
   22.65 + * Returns TRUE if the given machine frame number has a unique mapping
   22.66 + * in the guest's pseudophysical map.
   22.67 + */
   22.68 +#define MFN_IS_IN_PSEUDOPHYS_MAP(_mfn)          \
   22.69 +    (((_mfn) < (max_mfn)) &&                    \
   22.70 +     ((mfn_to_pfn(_mfn) < (p2m_size)) &&        \
   22.71 +      (live_p2m[mfn_to_pfn(_mfn)] == (_mfn))))
   22.72 +
   22.73 +/* Returns TRUE if MFN is successfully converted to a PFN. */
   22.74 +#define translate_mfn_to_pfn(_pmfn)                             \
   22.75 +({                                                              \
   22.76 +    unsigned long mfn = *(_pmfn);                               \
   22.77 +    int _res = 1;                                               \
   22.78 +    if ( !MFN_IS_IN_PSEUDOPHYS_MAP(mfn) )                       \
   22.79 +        _res = 0;                                               \
   22.80 +    else                                                        \
   22.81 +        *(_pmfn) = mfn_to_pfn(mfn);                             \
   22.82 +    _res;                                                       \
   22.83 +})
   22.84 +
   22.85 +/*
   22.86 +** During (live) save/migrate, we maintain a number of bitmaps to track
   22.87 +** which pages we have to send, to fixup, and to skip.
   22.88 +*/
   22.89 +
   22.90 +#define BITS_PER_LONG (sizeof(unsigned long) * 8)
   22.91 +#define BITS_TO_LONGS(bits) (((bits)+BITS_PER_LONG-1)/BITS_PER_LONG)
   22.92 +#define BITMAP_SIZE   (BITS_TO_LONGS(p2m_size) * sizeof(unsigned long))
   22.93 +
   22.94 +#define BITMAP_ENTRY(_nr,_bmap) \
   22.95 +   ((volatile unsigned long *)(_bmap))[(_nr)/BITS_PER_LONG]
   22.96 +
   22.97 +#define BITMAP_SHIFT(_nr) ((_nr) % BITS_PER_LONG)
   22.98 +
   22.99 +static inline int test_bit (int nr, volatile void * addr)
  22.100 +{
  22.101 +    return (BITMAP_ENTRY(nr, addr) >> BITMAP_SHIFT(nr)) & 1;
  22.102 +}
  22.103 +
  22.104 +static inline void clear_bit (int nr, volatile void * addr)
  22.105 +{
  22.106 +    BITMAP_ENTRY(nr, addr) &= ~(1UL << BITMAP_SHIFT(nr));
  22.107 +}
  22.108 +
  22.109 +static inline void set_bit ( int nr, volatile void * addr)
  22.110 +{
  22.111 +    BITMAP_ENTRY(nr, addr) |= (1UL << BITMAP_SHIFT(nr));
  22.112 +}
  22.113 +
  22.114 +/* Returns the hamming weight (i.e. the number of bits set) in a N-bit word */
  22.115 +static inline unsigned int hweight32(unsigned int w)
  22.116 +{
  22.117 +    unsigned int res = (w & 0x55555555) + ((w >> 1) & 0x55555555);
  22.118 +    res = (res & 0x33333333) + ((res >> 2) & 0x33333333);
  22.119 +    res = (res & 0x0F0F0F0F) + ((res >> 4) & 0x0F0F0F0F);
  22.120 +    res = (res & 0x00FF00FF) + ((res >> 8) & 0x00FF00FF);
  22.121 +    return (res & 0x0000FFFF) + ((res >> 16) & 0x0000FFFF);
  22.122 +}
  22.123 +
  22.124 +static inline int count_bits ( int nr, volatile void *addr)
  22.125 +{
  22.126 +    int i, count = 0;
  22.127 +    volatile unsigned long *p = (volatile unsigned long *)addr;
  22.128 +    /* We know that the array is padded to unsigned long. */
  22.129 +    for ( i = 0; i < (nr / (sizeof(unsigned long)*8)); i++, p++ )
  22.130 +        count += hweight32(*p);
  22.131 +    return count;
  22.132 +}
  22.133 +
  22.134 +static inline int permute( int i, int nr, int order_nr  )
  22.135 +{
  22.136 +    /* Need a simple permutation function so that we scan pages in a
  22.137 +       pseudo random order, enabling us to get a better estimate of
  22.138 +       the domain's page dirtying rate as we go (there are often
  22.139 +       contiguous ranges of pfns that have similar behaviour, and we
  22.140 +       want to mix them up. */
  22.141 +
  22.142 +    /* e.g. nr->oder 15->4 16->4 17->5 */
  22.143 +    /* 512MB domain, 128k pages, order 17 */
  22.144 +
  22.145 +    /*
  22.146 +      QPONMLKJIHGFEDCBA
  22.147 +             QPONMLKJIH
  22.148 +      GFEDCBA
  22.149 +     */
  22.150 +
  22.151 +    /*
  22.152 +      QPONMLKJIHGFEDCBA
  22.153 +                  EDCBA
  22.154 +             QPONM
  22.155 +      LKJIHGF
  22.156 +      */
  22.157 +
  22.158 +    do { i = ((i>>(order_nr-10)) | ( i<<10 ) ) & ((1<<order_nr)-1); }
  22.159 +    while ( i >= nr ); /* this won't ever loop if nr is a power of 2 */
  22.160 +
  22.161 +    return i;
  22.162 +}
  22.163 +
  22.164 +static uint64_t tv_to_us(struct timeval *new)
  22.165 +{
  22.166 +    return (new->tv_sec * 1000000) + new->tv_usec;
  22.167 +}
  22.168 +
  22.169 +static uint64_t llgettimeofday(void)
  22.170 +{
  22.171 +    struct timeval now;
  22.172 +    gettimeofday(&now, NULL);
  22.173 +    return tv_to_us(&now);
  22.174 +}
  22.175 +
  22.176 +static uint64_t tv_delta(struct timeval *new, struct timeval *old)
  22.177 +{
  22.178 +    return (((new->tv_sec - old->tv_sec)*1000000) +
  22.179 +            (new->tv_usec - old->tv_usec));
  22.180 +}
  22.181 +
  22.182 +static int noncached_write(int fd, int live, void *buffer, int len) 
  22.183 +{
  22.184 +    static int write_count = 0;
  22.185 +
  22.186 +    int rc = write(fd,buffer,len);
  22.187 +
  22.188 +    write_count += len;
  22.189 +    if ( write_count >= (MAX_PAGECACHE_USAGE * PAGE_SIZE) )
  22.190 +    {
  22.191 +        /* Time to discard cache - dont care if this fails */
  22.192 +        discard_file_cache(fd, 0 /* no flush */);
  22.193 +        write_count = 0;
  22.194 +    }
  22.195 +
  22.196 +    return rc;
  22.197 +}
  22.198 +
  22.199 +#ifdef ADAPTIVE_SAVE
  22.200 +
  22.201 +/*
  22.202 +** We control the rate at which we transmit (or save) to minimize impact
  22.203 +** on running domains (including the target if we're doing live migrate).
  22.204 +*/
  22.205 +
  22.206 +#define MAX_MBIT_RATE    500      /* maximum transmit rate for migrate */
  22.207 +#define START_MBIT_RATE  100      /* initial transmit rate for migrate */
  22.208 +
  22.209 +/* Scaling factor to convert between a rate (in Mb/s) and time (in usecs) */
  22.210 +#define RATE_TO_BTU      781250
  22.211 +
  22.212 +/* Amount in bytes we allow ourselves to send in a burst */
  22.213 +#define BURST_BUDGET (100*1024)
  22.214 +
  22.215 +/* We keep track of the current and previous transmission rate */
  22.216 +static int mbit_rate, ombit_rate = 0;
  22.217 +
  22.218 +/* Have we reached the maximum transmission rate? */
  22.219 +#define RATE_IS_MAX() (mbit_rate == MAX_MBIT_RATE)
  22.220 +
  22.221 +static inline void initialize_mbit_rate()
  22.222 +{
  22.223 +    mbit_rate = START_MBIT_RATE;
  22.224 +}
  22.225 +
  22.226 +static int ratewrite(int io_fd, int live, void *buf, int n)
  22.227 +{
  22.228 +    static int budget = 0;
  22.229 +    static int burst_time_us = -1;
  22.230 +    static struct timeval last_put = { 0 };
  22.231 +    struct timeval now;
  22.232 +    struct timespec delay;
  22.233 +    long long delta;
  22.234 +
  22.235 +    if ( START_MBIT_RATE == 0 )
  22.236 +        return noncached_write(io_fd, live, buf, n);
  22.237 +
  22.238 +    budget -= n;
  22.239 +    if ( budget < 0 )
  22.240 +    {
  22.241 +        if ( mbit_rate != ombit_rate )
  22.242 +        {
  22.243 +            burst_time_us = RATE_TO_BTU / mbit_rate;
  22.244 +            ombit_rate = mbit_rate;
  22.245 +            DPRINTF("rate limit: %d mbit/s burst budget %d slot time %d\n",
  22.246 +                    mbit_rate, BURST_BUDGET, burst_time_us);
  22.247 +        }
  22.248 +        if ( last_put.tv_sec == 0 )
  22.249 +        {
  22.250 +            budget += BURST_BUDGET;
  22.251 +            gettimeofday(&last_put, NULL);
  22.252 +        }
  22.253 +        else
  22.254 +        {
  22.255 +            while ( budget < 0 )
  22.256 +            {
  22.257 +                gettimeofday(&now, NULL);
  22.258 +                delta = tv_delta(&now, &last_put);
  22.259 +                while ( delta > burst_time_us )
  22.260 +                {
  22.261 +                    budget += BURST_BUDGET;
  22.262 +                    last_put.tv_usec += burst_time_us;
  22.263 +                    if ( last_put.tv_usec > 1000000 
  22.264 +                    {
  22.265 +                        last_put.tv_usec -= 1000000;
  22.266 +                        last_put.tv_sec++;
  22.267 +                    }
  22.268 +                    delta -= burst_time_us;
  22.269 +                }
  22.270 +                if ( budget > 0 )
  22.271 +                    break;
  22.272 +                delay.tv_sec = 0;
  22.273 +                delay.tv_nsec = 1000 * (burst_time_us - delta);
  22.274 +                while ( delay.tv_nsec > 0 )
  22.275 +                    if ( nanosleep(&delay, &delay) == 0 )
  22.276 +                        break;
  22.277 +            }
  22.278 +        }
  22.279 +    }
  22.280 +    return noncached_write(io_fd, live, buf, n);
  22.281 +}
  22.282 +
  22.283 +#else /* ! ADAPTIVE SAVE */
  22.284 +
  22.285 +#define RATE_IS_MAX() (0)
  22.286 +#define ratewrite(_io_fd, _live, _buf, _n) noncached_write((_io_fd), (_live), (_buf), (_n))
  22.287 +#define initialize_mbit_rate()
  22.288 +
  22.289 +#endif
  22.290 +
  22.291 +static inline ssize_t write_exact(int fd, void *buf, size_t count)
  22.292 +{
  22.293 +    return (write(fd, buf, count) == count);
  22.294 +}
  22.295 +
  22.296 +static int print_stats(int xc_handle, uint32_t domid, int pages_sent,
  22.297 +                       xc_shadow_op_stats_t *stats, int print)
  22.298 +{
  22.299 +    static struct timeval wall_last;
  22.300 +    static long long      d0_cpu_last;
  22.301 +    static long long      d1_cpu_last;
  22.302 +
  22.303 +    struct timeval        wall_now;
  22.304 +    long long             wall_delta;
  22.305 +    long long             d0_cpu_now, d0_cpu_delta;
  22.306 +    long long             d1_cpu_now, d1_cpu_delta;
  22.307 +
  22.308 +    gettimeofday(&wall_now, NULL);
  22.309 +
  22.310 +    d0_cpu_now = xc_domain_get_cpu_usage(xc_handle, 0, /* FIXME */ 0)/1000;
  22.311 +    d1_cpu_now = xc_domain_get_cpu_usage(xc_handle, domid, /* FIXME */ 0)/1000;
  22.312 +
  22.313 +    if ( (d0_cpu_now == -1) || (d1_cpu_now == -1) )
  22.314 +        DPRINTF("ARRHHH!!\n");
  22.315 +
  22.316 +    wall_delta = tv_delta(&wall_now,&wall_last)/1000;
  22.317 +    if ( wall_delta == 0 )
  22.318 +        wall_delta = 1;
  22.319 +
  22.320 +    d0_cpu_delta = (d0_cpu_now - d0_cpu_last)/1000;
  22.321 +    d1_cpu_delta = (d1_cpu_now - d1_cpu_last)/1000;
  22.322 +
  22.323 +    if ( print )
  22.324 +        DPRINTF("delta %lldms, dom0 %d%%, target %d%%, sent %dMb/s, "
  22.325 +                "dirtied %dMb/s %" PRId32 " pages\n",
  22.326 +                wall_delta,
  22.327 +                (int)((d0_cpu_delta*100)/wall_delta),
  22.328 +                (int)((d1_cpu_delta*100)/wall_delta),
  22.329 +                (int)((pages_sent*PAGE_SIZE)/(wall_delta*(1000/8))),
  22.330 +                (int)((stats->dirty_count*PAGE_SIZE)/(wall_delta*(1000/8))),
  22.331 +                stats->dirty_count);
  22.332 +
  22.333 +#ifdef ADAPTIVE_SAVE
  22.334 +    if ( ((stats->dirty_count*PAGE_SIZE)/(wall_delta*(1000/8))) > mbit_rate )
  22.335 +    {
  22.336 +        mbit_rate = (int)((stats->dirty_count*PAGE_SIZE)/(wall_delta*(1000/8)))
  22.337 +            + 50;
  22.338 +        if ( mbit_rate > MAX_MBIT_RATE )
  22.339 +            mbit_rate = MAX_MBIT_RATE;
  22.340 +    }
  22.341 +#endif
  22.342 +
  22.343 +    d0_cpu_last = d0_cpu_now;
  22.344 +    d1_cpu_last = d1_cpu_now;
  22.345 +    wall_last   = wall_now;
  22.346 +
  22.347 +    return 0;
  22.348 +}
  22.349 +
  22.350 +
  22.351 +static int analysis_phase(int xc_handle, uint32_t domid, int p2m_size,
  22.352 +                          unsigned long *arr, int runs)
  22.353 +{
  22.354 +    long long start, now;
  22.355 +    xc_shadow_op_stats_t stats;
  22.356 +    int j;
  22.357 +
  22.358 +    start = llgettimeofday();
  22.359 +
  22.360 +    for ( j = 0; j < runs; j++ )
  22.361 +    {
  22.362 +        int i;
  22.363 +
  22.364 +        xc_shadow_control(xc_handle, domid, XEN_DOMCTL_SHADOW_OP_CLEAN,
  22.365 +                          arr, p2m_size, NULL, 0, NULL);
  22.366 +        DPRINTF("#Flush\n");
  22.367 +        for ( i = 0; i < 40; i++ )
  22.368 +        {
  22.369 +            usleep(50000);
  22.370 +            now = llgettimeofday();
  22.371 +            xc_shadow_control(xc_handle, domid, XEN_DOMCTL_SHADOW_OP_PEEK,
  22.372 +                              NULL, 0, NULL, 0, &stats);
  22.373 +            DPRINTF("now= %lld faults= %"PRId32" dirty= %"PRId32"\n",
  22.374 +                    ((now-start)+500)/1000,
  22.375 +                    stats.fault_count, stats.dirty_count);
  22.376 +        }
  22.377 +    }
  22.378 +
  22.379 +    return -1;
  22.380 +}
  22.381 +
  22.382 +
  22.383 +static int suspend_and_state(int (*suspend)(int), int xc_handle, int io_fd,
  22.384 +                             int dom, xc_dominfo_t *info,
  22.385 +                             vcpu_guest_context_t *ctxt)
  22.386 +{
  22.387 +    int i = 0;
  22.388 +
  22.389 +    if ( !(*suspend)(dom) )
  22.390 +    {
  22.391 +        ERROR("Suspend request failed");
  22.392 +        return -1;
  22.393 +    }
  22.394 +
  22.395 + retry:
  22.396 +
  22.397 +    if ( xc_domain_getinfo(xc_handle, dom, 1, info) != 1 )
  22.398 +    {
  22.399 +        ERROR("Could not get domain info");
  22.400 +        return -1;
  22.401 +    }
  22.402 +
  22.403 +    if ( xc_vcpu_getcontext(xc_handle, dom, 0, ctxt) )
  22.404 +        ERROR("Could not get vcpu context");
  22.405 +
  22.406 +
  22.407 +    if ( info->dying )
  22.408 +    {
  22.409 +        ERROR("domain is dying");
  22.410 +        return -1;
  22.411 +    }
  22.412 +
  22.413 +    if ( info->crashed )
  22.414 +    {
  22.415 +        ERROR("domain has crashed");
  22.416 +        return -1;
  22.417 +    }
  22.418 +
  22.419 +    if ( info->shutdown )
  22.420 +    {
  22.421 +        switch ( info->shutdown_reason )
  22.422 +        {
  22.423 +        case SHUTDOWN_poweroff:
  22.424 +        case SHUTDOWN_reboot:
  22.425 +            ERROR("domain has shut down");
  22.426 +            return -1;
  22.427 +        case SHUTDOWN_suspend:
  22.428 +            return 0;
  22.429 +        case SHUTDOWN_crash:
  22.430 +            ERROR("domain has crashed");
  22.431 +            return -1;
  22.432 +        }
  22.433 +    }
  22.434 +
  22.435 +    if ( info->paused )
  22.436 +    {
  22.437 +        /* Try unpausing domain, wait, and retest. */
  22.438 +        xc_domain_unpause( xc_handle, dom );
  22.439 +        ERROR("Domain was paused. Wait and re-test.");
  22.440 +        usleep(10000); /* 10ms */
  22.441 +        goto retry;
  22.442 +    }
  22.443 +
  22.444 +    if ( ++i < 100 )
  22.445 +    {
  22.446 +        ERROR("Retry suspend domain");
  22.447 +        usleep(10000); /* 10ms */
  22.448 +        goto retry;
  22.449 +    }
  22.450 +
  22.451 +    ERROR("Unable to suspend domain.");
  22.452 +
  22.453 +    return -1;
  22.454 +}
  22.455 +
  22.456 +/*
  22.457 +** Map the top-level page of MFNs from the guest. The guest might not have
  22.458 +** finished resuming from a previous restore operation, so we wait a while for
  22.459 +** it to update the MFN to a reasonable value.
  22.460 +*/
  22.461 +static void *map_frame_list_list(int xc_handle, uint32_t dom,
  22.462 +                                 shared_info_t *shinfo)
  22.463 +{
  22.464 +    int count = 100;
  22.465 +    void *p;
  22.466 +
  22.467 +    while ( count-- && (shinfo->arch.pfn_to_mfn_frame_list_list == 0) )
  22.468 +        usleep(10000);
  22.469 +
  22.470 +    if ( shinfo->arch.pfn_to_mfn_frame_list_list == 0 )
  22.471 +    {
  22.472 +        ERROR("Timed out waiting for frame list updated.");
  22.473 +        return NULL;
  22.474 +    }
  22.475 +
  22.476 +    p = xc_map_foreign_range(xc_handle, dom, PAGE_SIZE, PROT_READ,
  22.477 +                             shinfo->arch.pfn_to_mfn_frame_list_list);
  22.478 +    if ( p == NULL )
  22.479 +        ERROR("Couldn't map p2m_frame_list_list (errno %d)", errno);
  22.480 +
  22.481 +    return p;
  22.482 +}
  22.483 +
  22.484 +/*
  22.485 +** During transfer (or in the state file), all page-table pages must be
  22.486 +** converted into a 'canonical' form where references to actual mfns
  22.487 +** are replaced with references to the corresponding pfns.
  22.488 +**
  22.489 +** This function performs the appropriate conversion, taking into account
  22.490 +** which entries do not require canonicalization (in particular, those
  22.491 +** entries which map the virtual address reserved for the hypervisor).
  22.492 +*/
  22.493 +static int canonicalize_pagetable(unsigned long type, unsigned long pfn,
  22.494 +                           const void *spage, void *dpage)
  22.495 +{
  22.496 +
  22.497 +    int i, pte_last, xen_start, xen_end, race = 0; 
  22.498 +    uint64_t pte;
  22.499 +
  22.500 +    /*
  22.501 +    ** We need to determine which entries in this page table hold
  22.502 +    ** reserved hypervisor mappings. This depends on the current
  22.503 +    ** page table type as well as the number of paging levels.
  22.504 +    */
  22.505 +    xen_start = xen_end = pte_last = PAGE_SIZE / ((pt_levels == 2) ? 4 : 8);
  22.506 +
  22.507 +    if ( (pt_levels == 2) && (type == XEN_DOMCTL_PFINFO_L2TAB) )
  22.508 +        xen_start = (hvirt_start >> L2_PAGETABLE_SHIFT);
  22.509 +
  22.510 +    if ( (pt_levels == 3) && (type == XEN_DOMCTL_PFINFO_L3TAB) )
  22.511 +        xen_start = L3_PAGETABLE_ENTRIES_PAE;
  22.512 +
  22.513 +    /*
  22.514 +    ** in PAE only the L2 mapping the top 1GB contains Xen mappings.
  22.515 +    ** We can spot this by looking for the guest linear mapping which
  22.516 +    ** Xen always ensures is present in that L2. Guests must ensure
  22.517 +    ** that this check will fail for other L2s.
  22.518 +    */
  22.519 +    if ( (pt_levels == 3) && (type == XEN_DOMCTL_PFINFO_L2TAB) )
  22.520 +    {
  22.521 +        int hstart;
  22.522 +        uint64_t he;
  22.523 +
  22.524 +        hstart = (hvirt_start >> L2_PAGETABLE_SHIFT_PAE) & 0x1ff;
  22.525 +        he = ((const uint64_t *) spage)[hstart];
  22.526 +
  22.527 +        if ( ((he >> PAGE_SHIFT) & MFN_MASK_X86) == m2p_mfn0 )
  22.528 +        {
  22.529 +            /* hvirt starts with xen stuff... */
  22.530 +            xen_start = hstart;
  22.531 +        }
  22.532 +        else if ( hvirt_start != 0xf5800000 )
  22.533 +        {
  22.534 +            /* old L2s from before hole was shrunk... */
  22.535 +            hstart = (0xf5800000 >> L2_PAGETABLE_SHIFT_PAE) & 0x1ff;
  22.536 +            he = ((const uint64_t *) spage)[hstart];
  22.537 +            if ( ((he >> PAGE_SHIFT) & MFN_MASK_X86) == m2p_mfn0 )
  22.538 +                xen_start = hstart;
  22.539 +        }
  22.540 +    }
  22.541 +
  22.542 +    if ( (pt_levels == 4) && (type == XEN_DOMCTL_PFINFO_L4TAB) )
  22.543 +    {
  22.544 +        /*
  22.545 +        ** XXX SMH: should compute these from hvirt_start (which we have)
  22.546 +        ** and hvirt_end (which we don't)
  22.547 +        */
  22.548 +        xen_start = 256;
  22.549 +        xen_end   = 272;
  22.550 +    }
  22.551 +
  22.552 +    /* Now iterate through the page table, canonicalizing each PTE */
  22.553 +    for (i = 0; i < pte_last; i++ )
  22.554 +    {
  22.555 +        unsigned long pfn, mfn;
  22.556 +
  22.557 +        if ( pt_levels == 2 )
  22.558 +            pte = ((const uint32_t*)spage)[i];
  22.559 +        else
  22.560 +            pte = ((const uint64_t*)spage)[i];
  22.561 +
  22.562 +        if ( (i >= xen_start) && (i < xen_end) )
  22.563 +            pte = 0;
  22.564 +
  22.565 +        if ( pte & _PAGE_PRESENT )
  22.566 +        {
  22.567 +            mfn = (pte >> PAGE_SHIFT) & MFN_MASK_X86;
  22.568 +            if ( !MFN_IS_IN_PSEUDOPHYS_MAP(mfn) )
  22.569 +            {
  22.570 +                /* This will happen if the type info is stale which
  22.571 +                   is quite feasible under live migration */
  22.572 +                pfn  = 0;  /* zap it - we'll retransmit this page later */
  22.573 +                race = 1;  /* inform the caller of race; fatal if !live */ 
  22.574 +            }
  22.575 +            else
  22.576 +                pfn = mfn_to_pfn(mfn);
  22.577 +
  22.578 +            pte &= ~MADDR_MASK_X86;
  22.579 +            pte |= (uint64_t)pfn << PAGE_SHIFT;
  22.580 +
  22.581 +            /*
  22.582 +             * PAE guest L3Es can contain these flags when running on
  22.583 +             * a 64bit hypervisor. We zap these here to avoid any
  22.584 +             * surprise at restore time...
  22.585 +             */
  22.586 +            if ( (pt_levels == 3) &&
  22.587 +                 (type == XEN_DOMCTL_PFINFO_L3TAB) &&
  22.588 +                 (pte & (_PAGE_USER|_PAGE_RW|_PAGE_ACCESSED)) )
  22.589 +                pte &= ~(_PAGE_USER|_PAGE_RW|_PAGE_ACCESSED);
  22.590 +        }
  22.591 +
  22.592 +        if ( pt_levels == 2 )
  22.593 +            ((uint32_t*)dpage)[i] = pte;
  22.594 +        else
  22.595 +            ((uint64_t*)dpage)[i] = pte;
  22.596 +    }
  22.597 +
  22.598 +    return race;
  22.599 +}
  22.600 +
  22.601 +static xen_pfn_t *xc_map_m2p(int xc_handle,
  22.602 +                                 unsigned long max_mfn,
  22.603 +                                 int prot)
  22.604 +{
  22.605 +    struct xen_machphys_mfn_list xmml;
  22.606 +    privcmd_mmap_entry_t *entries;
  22.607 +    unsigned long m2p_chunks, m2p_size;
  22.608 +    xen_pfn_t *m2p;
  22.609 +    xen_pfn_t *extent_start;
  22.610 +    int i, rc;
  22.611 +
  22.612 +    m2p_size   = M2P_SIZE(max_mfn);
  22.613 +    m2p_chunks = M2P_CHUNKS(max_mfn);
  22.614 +
  22.615 +    xmml.max_extents = m2p_chunks;
  22.616 +    if ( !(extent_start = malloc(m2p_chunks * sizeof(xen_pfn_t))) )
  22.617 +    {
  22.618 +        ERROR("failed to allocate space for m2p mfns");
  22.619 +        return NULL;
  22.620 +    }
  22.621 +    set_xen_guest_handle(xmml.extent_start, extent_start);
  22.622 +
  22.623 +    if ( xc_memory_op(xc_handle, XENMEM_machphys_mfn_list, &xmml) ||
  22.624 +         (xmml.nr_extents != m2p_chunks) )
  22.625 +    {
  22.626 +        ERROR("xc_get_m2p_mfns");
  22.627 +        return NULL;
  22.628 +    }
  22.629 +
  22.630 +    if ( (m2p = mmap(NULL, m2p_size, prot,
  22.631 +                     MAP_SHARED, xc_handle, 0)) == MAP_FAILED )
  22.632 +    {
  22.633 +        ERROR("failed to mmap m2p");
  22.634 +        return NULL;
  22.635 +    }
  22.636 +
  22.637 +    if ( !(entries = malloc(m2p_chunks * sizeof(privcmd_mmap_entry_t))) )
  22.638 +    {
  22.639 +        ERROR("failed to allocate space for mmap entries");
  22.640 +        return NULL;
  22.641 +    }
  22.642 +
  22.643 +    for ( i = 0; i < m2p_chunks; i++ )
  22.644 +    {
  22.645 +        entries[i].va = (unsigned long)(((void *)m2p) + (i * M2P_CHUNK_SIZE));
  22.646 +        entries[i].mfn = extent_start[i];
  22.647 +        entries[i].npages = M2P_CHUNK_SIZE >> PAGE_SHIFT;
  22.648 +    }
  22.649 +
  22.650 +    if ( (rc = xc_map_foreign_ranges(xc_handle, DOMID_XEN,
  22.651 +                                     entries, m2p_chunks)) < 0 )
  22.652 +    {
  22.653 +        ERROR("xc_mmap_foreign_ranges failed (rc = %d)", rc);
  22.654 +        return NULL;
  22.655 +    }
  22.656 +
  22.657 +    m2p_mfn0 = entries[0].mfn;
  22.658 +
  22.659 +    free(extent_start);
  22.660 +    free(entries);
  22.661 +
  22.662 +    return m2p;
  22.663 +}
  22.664 +
  22.665 +
  22.666 +static xen_pfn_t *map_and_save_p2m_table(int xc_handle, 
  22.667 +                                         int io_fd, 
  22.668 +                                         uint32_t dom,
  22.669 +                                         vcpu_guest_context_t *ctxt,
  22.670 +                                         unsigned long p2m_size,
  22.671 +                                         shared_info_t *live_shinfo)
  22.672 +{
  22.673 +    /* Double and single indirect references to the live P2M table */
  22.674 +    xen_pfn_t *live_p2m_frame_list_list = NULL;
  22.675 +    xen_pfn_t *live_p2m_frame_list = NULL;
  22.676 +
  22.677 +    /* A copy of the pfn-to-mfn table frame list. */
  22.678 +    xen_pfn_t *p2m_frame_list = NULL;
  22.679 +
  22.680 +    /* The mapping of the live p2m table itself */
  22.681 +    xen_pfn_t *p2m = NULL;
  22.682 +
  22.683 +    int i, success = 0;
  22.684 +
  22.685 +    live_p2m_frame_list_list = map_frame_list_list(xc_handle, dom,
  22.686 +                                                   live_shinfo);
  22.687 +    if ( !live_p2m_frame_list_list )
  22.688 +        goto out;
  22.689 +
  22.690 +    live_p2m_frame_list =
  22.691 +        xc_map_foreign_batch(xc_handle, dom, PROT_READ,
  22.692 +                             live_p2m_frame_list_list,
  22.693 +                             P2M_FLL_ENTRIES);
  22.694 +    if ( !live_p2m_frame_list )
  22.695 +    {
  22.696 +        ERROR("Couldn't map p2m_frame_list");
  22.697 +        goto out;
  22.698 +    }
  22.699 +
  22.700 +
  22.701 +    /* Map all the frames of the pfn->mfn table. For migrate to succeed,
  22.702 +       the guest must not change which frames are used for this purpose.
  22.703 +       (its not clear why it would want to change them, and we'll be OK
  22.704 +       from a safety POV anyhow. */
  22.705 +
  22.706 +    p2m = xc_map_foreign_batch(xc_handle, dom, PROT_READ,
  22.707 +                               live_p2m_frame_list,
  22.708 +                               P2M_FL_ENTRIES);
  22.709 +    if ( !p2m )
  22.710 +    {
  22.711 +        ERROR("Couldn't map p2m table");
  22.712 +        goto out;
  22.713 +    }
  22.714 +    live_p2m = p2m; /* So that translation macros will work */
  22.715 +    
  22.716 +    /* Get a local copy of the live_P2M_frame_list */
  22.717 +    if ( !(p2m_frame_list = malloc(P2M_FL_SIZE)) )
  22.718 +    {
  22.719 +        ERROR("Couldn't allocate p2m_frame_list array");
  22.720 +        goto out;
  22.721 +    }
  22.722 +    memcpy(p2m_frame_list, live_p2m_frame_list, P2M_FL_SIZE);
  22.723 +
  22.724 +    /* Canonicalise the pfn-to-mfn table frame-number list. */
  22.725 +    for ( i = 0; i < p2m_size; i += fpp )
  22.726 +    {
  22.727 +        if ( !translate_mfn_to_pfn(&p2m_frame_list[i/fpp]) )
  22.728 +        {
  22.729 +            ERROR("Frame# in pfn-to-mfn frame list is not in pseudophys");
  22.730 +            ERROR("entry %d: p2m_frame_list[%ld] is 0x%"PRIx64, i, i/fpp,
  22.731 +                  (uint64_t)p2m_frame_list[i/fpp]);
  22.732 +            goto out;
  22.733 +        }
  22.734 +    }
  22.735 +
  22.736 +    /*
  22.737 +     * Write an extended-info structure to inform the restore code that
  22.738 +     * a PAE guest understands extended CR3 (PDPTs above 4GB). Turns off
  22.739 +     * slow paths in the restore code.
  22.740 +     */
  22.741 +    if ( (pt_levels == 3) &&
  22.742 +         (ctxt->vm_assist & (1UL << VMASST_TYPE_pae_extended_cr3)) )
  22.743 +    {
  22.744 +        unsigned long signature = ~0UL;
  22.745 +        uint32_t tot_sz   = sizeof(struct vcpu_guest_context) + 8;
  22.746 +        uint32_t chunk_sz = sizeof(struct vcpu_guest_context);
  22.747 +        char chunk_sig[]  = "vcpu";
  22.748 +        if ( !write_exact(io_fd, &signature, sizeof(signature)) ||
  22.749 +             !write_exact(io_fd, &tot_sz,    sizeof(tot_sz)) ||
  22.750 +             !write_exact(io_fd, &chunk_sig, 4) ||
  22.751 +             !write_exact(io_fd, &chunk_sz,  sizeof(chunk_sz)) ||
  22.752 +             !write_exact(io_fd, ctxt,       sizeof(*ctxt)) )
  22.753 +        {
  22.754 +            ERROR("write: extended info");
  22.755 +            goto out;
  22.756 +        }
  22.757 +    }
  22.758 +
  22.759 +    if ( !write_exact(io_fd, p2m_frame_list, P2M_FL_SIZE) )
  22.760 +    {
  22.761 +        ERROR("write: p2m_frame_list");
  22.762 +        goto out;
  22.763 +    }    
  22.764 +
  22.765 +    success = 1;
  22.766 +
  22.767 + out:
  22.768 +    
  22.769 +    if ( !success && p2m )
  22.770 +        munmap(p2m, ROUNDUP(p2m_size * sizeof(xen_pfn_t), PAGE_SHIFT));
  22.771 +
  22.772 +    if ( live_p2m_frame_list_list )
  22.773 +        munmap(live_p2m_frame_list_list, PAGE_SIZE);
  22.774 +
  22.775 +    if ( live_p2m_frame_list )
  22.776 +        munmap(live_p2m_frame_list, P2M_FLL_ENTRIES * PAGE_SIZE);
  22.777 +
  22.778 +    if ( p2m_frame_list ) 
  22.779 +        free(p2m_frame_list);
  22.780 +
  22.781 +    return success ? p2m : NULL;
  22.782 +}
  22.783 +
  22.784 +
  22.785 +
  22.786 +int xc_domain_save(int xc_handle, int io_fd, uint32_t dom, uint32_t max_iters,
  22.787 +                   uint32_t max_factor, uint32_t flags, int (*suspend)(int),
  22.788 +                   int hvm, void *(*init_qemu_maps)(int, unsigned), 
  22.789 +                   void (*qemu_flip_buffer)(int, int))
  22.790 +{
  22.791 +    xc_dominfo_t info;
  22.792 +
  22.793 +    int rc = 1, i, j, last_iter, iter = 0;
  22.794 +    int live  = (flags & XCFLAGS_LIVE);
  22.795 +    int debug = (flags & XCFLAGS_DEBUG);
  22.796 +    int race = 0, sent_last_iter, skip_this_iter;
  22.797 +
  22.798 +    /* The new domain's shared-info frame number. */
  22.799 +    unsigned long shared_info_frame;
  22.800 +
  22.801 +    /* A copy of the CPU context of the guest. */
  22.802 +    vcpu_guest_context_t ctxt;
  22.803 +
  22.804 +    /* A table containing the type of each PFN (/not/ MFN!). */
  22.805 +    unsigned long *pfn_type = NULL;
  22.806 +    unsigned long *pfn_batch = NULL;
  22.807 +
  22.808 +    /* A copy of one frame of guest memory. */
  22.809 +    char page[PAGE_SIZE];
  22.810 +
  22.811 +    /* Live mapping of shared info structure */
  22.812 +    shared_info_t *live_shinfo = NULL;
  22.813 +
  22.814 +    /* base of the region in which domain memory is mapped */
  22.815 +    unsigned char *region_base = NULL;
  22.816 +
  22.817 +    /* power of 2 order of p2m_size */
  22.818 +    int order_nr;
  22.819 +
  22.820 +    /* bitmap of pages:
  22.821 +       - that should be sent this iteration (unless later marked as skip);
  22.822 +       - to skip this iteration because already dirty;
  22.823 +       - to fixup by sending at the end if not already resent; */
  22.824 +    unsigned long *to_send = NULL, *to_skip = NULL, *to_fix = NULL;
  22.825 +
  22.826 +    xc_shadow_op_stats_t stats;
  22.827 +
  22.828 +    unsigned long needed_to_fix = 0;
  22.829 +    unsigned long total_sent    = 0;
  22.830 +
  22.831 +    uint64_t vcpumap = 1ULL;
  22.832 +
  22.833 +    /* HVM: a buffer for holding HVM context */
  22.834 +    uint32_t hvm_buf_size = 0;
  22.835 +    uint8_t *hvm_buf = NULL;
  22.836 +
  22.837 +    /* HVM: magic frames for ioreqs and xenstore comms. */
  22.838 +    uint64_t magic_pfns[3]; /* ioreq_pfn, bufioreq_pfn, store_pfn */
  22.839 +
  22.840 +    /* If no explicit control parameters given, use defaults */
  22.841 +    max_iters  = max_iters  ? : DEF_MAX_ITERS;
  22.842 +    max_factor = max_factor ? : DEF_MAX_FACTOR;
  22.843 +
  22.844 +    initialize_mbit_rate();
  22.845 +
  22.846 +    if ( !get_platform_info(xc_handle, dom,
  22.847 +                            &max_mfn, &hvirt_start, &pt_levels) )
  22.848 +    {
  22.849 +        ERROR("Unable to get platform info.");
  22.850 +        return 1;
  22.851 +    }
  22.852 +
  22.853 +    if ( xc_domain_getinfo(xc_handle, dom, 1, &info) != 1 )
  22.854 +    {
  22.855 +        ERROR("Could not get domain info");
  22.856 +        return 1;
  22.857 +    }
  22.858 +
  22.859 +    if ( xc_vcpu_getcontext(xc_handle, dom, 0, &ctxt) )
  22.860 +    {
  22.861 +        ERROR("Could not get vcpu context");
  22.862 +        goto out;
  22.863 +    }
  22.864 +    shared_info_frame = info.shared_info_frame;
  22.865 +
  22.866 +    /* Map the shared info frame */
  22.867 +    if ( !hvm )
  22.868 +    {
  22.869 +        live_shinfo = xc_map_foreign_range(xc_handle, dom, PAGE_SIZE,
  22.870 +                                           PROT_READ, shared_info_frame);
  22.871 +        if ( !live_shinfo )
  22.872 +        {
  22.873 +            ERROR("Couldn't map live_shinfo");
  22.874 +            goto out;
  22.875 +        }
  22.876 +    }
  22.877 +
  22.878 +    /* Get the size of the P2M table */
  22.879 +    p2m_size = xc_memory_op(xc_handle, XENMEM_maximum_gpfn, &dom);
  22.880 +
  22.881 +    /* Domain is still running at this point */
  22.882 +    if ( live )
  22.883 +    {
  22.884 +        /* Live suspend. Enable log-dirty mode. */
  22.885 +        if ( xc_shadow_control(xc_handle, dom,
  22.886 +                               XEN_DOMCTL_SHADOW_OP_ENABLE_LOGDIRTY,
  22.887 +                               NULL, 0, NULL, 0, NULL) < 0 )
  22.888 +        {
  22.889 +            ERROR("Couldn't enable shadow mode");
  22.890 +            goto out;
  22.891 +        }
  22.892 +
  22.893 +        if ( hvm )
  22.894 +        {
  22.895 +            /* Get qemu-dm logging dirty pages too */
  22.896 +            void *seg = init_qemu_maps(dom, BITMAP_SIZE);
  22.897 +            qemu_bitmaps[0] = seg;
  22.898 +            qemu_bitmaps[1] = seg + BITMAP_SIZE;
  22.899 +            qemu_active = 0;
  22.900 +            qemu_non_active = 1;
  22.901 +        }
  22.902 +    }
  22.903 +    else
  22.904 +    {
  22.905 +        /* This is a non-live suspend. Suspend the domain .*/
  22.906 +        if ( suspend_and_state(suspend, xc_handle, io_fd, dom, &info, &ctxt) )
  22.907 +        {
  22.908 +            ERROR("Domain appears not to have suspended");
  22.909 +            goto out;
  22.910 +        }
  22.911 +    }
  22.912 +
  22.913 +    last_iter = !live;
  22.914 +
  22.915 +    /* pretend we sent all the pages last iteration */
  22.916 +    sent_last_iter = p2m_size;
  22.917 +
  22.918 +    /* calculate the power of 2 order of p2m_size, e.g.
  22.919 +       15->4 16->4 17->5 */
  22.920 +    for ( i = p2m_size-1, order_nr = 0; i ; i >>= 1, order_nr++ )
  22.921 +        continue;
  22.922 +
  22.923 +    /* Setup to_send / to_fix and to_skip bitmaps */
  22.924 +    to_send = malloc(BITMAP_SIZE);
  22.925 +    to_fix  = calloc(1, BITMAP_SIZE);
  22.926 +    to_skip = malloc(BITMAP_SIZE);
  22.927 +
  22.928 +    if ( !to_send || !to_fix || !to_skip )
  22.929 +    {
  22.930 +        ERROR("Couldn't allocate to_send array");
  22.931 +        goto out;
  22.932 +    }
  22.933 +
  22.934 +    memset(to_send, 0xff, BITMAP_SIZE);
  22.935 +
  22.936 +    if ( lock_pages(to_send, BITMAP_SIZE) )
  22.937 +    {
  22.938 +        ERROR("Unable to lock to_send");
  22.939 +        return 1;
  22.940 +    }
  22.941 +
  22.942 +    /* (to fix is local only) */
  22.943 +    if ( lock_pages(to_skip, BITMAP_SIZE) )
  22.944 +    {
  22.945 +        ERROR("Unable to lock to_skip");
  22.946 +        return 1;
  22.947 +    }
  22.948 +
  22.949 +    if ( hvm ) 
  22.950 +    {
  22.951 +        /* Need another buffer for HVM context */
  22.952 +        hvm_buf_size = xc_domain_hvm_getcontext(xc_handle, dom, 0, 0);
  22.953 +        if ( hvm_buf_size == -1 )
  22.954 +        {
  22.955 +            ERROR("Couldn't get HVM context size from Xen");
  22.956 +            goto out;
  22.957 +        }
  22.958 +        hvm_buf = malloc(hvm_buf_size);
  22.959 +        if ( !hvm_buf )
  22.960 +        {
  22.961 +            ERROR("Couldn't allocate memory");
  22.962 +            goto out;
  22.963 +        }
  22.964 +    }
  22.965 +
  22.966 +    analysis_phase(xc_handle, dom, p2m_size, to_skip, 0);
  22.967 +
  22.968 +    /* We want zeroed memory so use calloc rather than malloc. */
  22.969 +    pfn_type   = calloc(MAX_BATCH_SIZE, sizeof(*pfn_type));
  22.970 +    pfn_batch  = calloc(MAX_BATCH_SIZE, sizeof(*pfn_batch));
  22.971 +    if ( (pfn_type == NULL) || (pfn_batch == NULL) )
  22.972 +    {
  22.973 +        ERROR("failed to alloc memory for pfn_type and/or pfn_batch arrays");
  22.974 +        errno = ENOMEM;
  22.975 +        goto out;
  22.976 +    }
  22.977 +
  22.978 +    if ( lock_pages(pfn_type, MAX_BATCH_SIZE * sizeof(*pfn_type)) )
  22.979 +    {
  22.980 +        ERROR("Unable to lock");
  22.981 +        goto out;
  22.982 +    }
  22.983 +
  22.984 +    /* Setup the mfn_to_pfn table mapping */
  22.985 +    if ( !(live_m2p = xc_map_m2p(xc_handle, max_mfn, PROT_READ)) )
  22.986 +    {
  22.987 +        ERROR("Failed to map live M2P table");
  22.988 +        goto out;
  22.989 +    }
  22.990 +
  22.991 +    /* Start writing out the saved-domain record. */
  22.992 +    if ( !write_exact(io_fd, &p2m_size, sizeof(unsigned long)) )
  22.993 +    {
  22.994 +        ERROR("write: p2m_size");
  22.995 +        goto out;
  22.996 +    }
  22.997 +
  22.998 +    if ( !hvm )
  22.999 +    {
 22.1000 +        int err = 0;
 22.1001 +        unsigned long mfn;
 22.1002 +
 22.1003 +        /* Map the P2M table, and write the list of P2M frames */
 22.1004 +        live_p2m = map_and_save_p2m_table(xc_handle, io_fd, dom, 
 22.1005 +                                          &ctxt, p2m_size, live_shinfo);
 22.1006 +        if ( live_p2m == NULL )
 22.1007 +        {
 22.1008 +            ERROR("Failed to map/save the p2m frame list");
 22.1009 +            goto out;
 22.1010 +        }
 22.1011 +
 22.1012 +        /*
 22.1013 +         * Quick belt and braces sanity check.
 22.1014 +         */
 22.1015 +        
 22.1016 +        for ( i = 0; i < p2m_size; i++ )
 22.1017 +        {
 22.1018 +            mfn = live_p2m[i];
 22.1019 +            if( (mfn != INVALID_P2M_ENTRY) && (mfn_to_pfn(mfn) != i) )
 22.1020 +            {
 22.1021 +                DPRINTF("i=0x%x mfn=%lx live_m2p=%lx\n", i,
 22.1022 +                        mfn, mfn_to_pfn(mfn));
 22.1023 +                err++;
 22.1024 +            }
 22.1025 +        }
 22.1026 +        DPRINTF("Had %d unexplained entries in p2m table\n", err);
 22.1027 +    }
 22.1028 +
 22.1029 +    print_stats(xc_handle, dom, 0, &stats, 0);
 22.1030 +
 22.1031 +    /* Now write out each data page, canonicalising page tables as we go... */
 22.1032 +    for ( ; ; )
 22.1033 +    {
 22.1034 +        unsigned int prev_pc, sent_this_iter, N, batch;
 22.1035 +
 22.1036 +        iter++;
 22.1037 +        sent_this_iter = 0;
 22.1038 +        skip_this_iter = 0;
 22.1039 +        prev_pc = 0;
 22.1040 +        N = 0;
 22.1041 +
 22.1042 +        DPRINTF("Saving memory pages: iter %d   0%%", iter);
 22.1043 +
 22.1044 +        while ( N < p2m_size )
 22.1045 +        {
 22.1046 +            unsigned int this_pc = (N * 100) / p2m_size;
 22.1047 +            int rc;
 22.1048 +
 22.1049 +            if ( (this_pc - prev_pc) >= 5 )
 22.1050 +            {
 22.1051 +                DPRINTF("\b\b\b\b%3d%%", this_pc);
 22.1052 +                prev_pc = this_pc;
 22.1053 +            }
 22.1054 +
 22.1055 +            if ( !last_iter )
 22.1056 +            {
 22.1057 +                /* Slightly wasteful to peek the whole array evey time,
 22.1058 +                   but this is fast enough for the moment. */
 22.1059 +                rc = xc_shadow_control(
 22.1060 +                    xc_handle, dom, XEN_DOMCTL_SHADOW_OP_PEEK, to_skip, 
 22.1061 +                    p2m_size, NULL, 0, NULL);
 22.1062 +                if ( rc != p2m_size )
 22.1063 +                {
 22.1064 +                    ERROR("Error peeking shadow bitmap");
 22.1065 +                    goto out;
 22.1066 +                }
 22.1067 +            }
 22.1068 +
 22.1069 +            /* load pfn_type[] with the mfn of all the pages we're doing in
 22.1070 +               this batch. */
 22.1071 +            for  ( batch = 0;
 22.1072 +                   (batch < MAX_BATCH_SIZE) && (N < p2m_size);
 22.1073 +                   N++ )
 22.1074 +            {
 22.1075 +                int n = permute(N, p2m_size, order_nr);
 22.1076 +
 22.1077 +                if ( debug )
 22.1078 +                    DPRINTF("%d pfn= %08lx mfn= %08lx %d  [mfn]= %08lx\n",
 22.1079 +                            iter, (unsigned long)n, hvm ? 0 : live_p2m[n],
 22.1080 +                            test_bit(n, to_send),
 22.1081 +                            hvm ? 0 : mfn_to_pfn(live_p2m[n]&0xFFFFF));
 22.1082 +
 22.1083 +                if ( !last_iter &&
 22.1084 +                     test_bit(n, to_send) &&
 22.1085 +                     test_bit(n, to_skip) )
 22.1086 +                    skip_this_iter++; /* stats keeping */
 22.1087 +
 22.1088 +                if ( !((test_bit(n, to_send) && !test_bit(n, to_skip)) ||
 22.1089 +                       (test_bit(n, to_send) && last_iter) ||
 22.1090 +                       (test_bit(n, to_fix)  && last_iter)) )
 22.1091 +                    continue;
 22.1092 +
 22.1093 +                /* Skip PFNs that aren't really there */
 22.1094 +                if ( hvm && ((n >= 0xa0 && n < 0xc0) /* VGA hole */
 22.1095 +                             || (n >= (HVM_BELOW_4G_MMIO_START >> PAGE_SHIFT) 
 22.1096 +                                 && n < (1ULL<<32) >> PAGE_SHIFT)) /* MMIO */ )
 22.1097 +                    continue;
 22.1098 +
 22.1099 +                /*
 22.1100 +                ** we get here if:
 22.1101 +                **  1. page is marked to_send & hasn't already been re-dirtied
 22.1102 +                **  2. (ignore to_skip in last iteration)
 22.1103 +                **  3. add in pages that still need fixup (net bufs)
 22.1104 +                */
 22.1105 +
 22.1106 +                pfn_batch[batch] = n;
 22.1107 +
 22.1108 +                /* Hypercall interfaces operate in PFNs for HVM guests
 22.1109 +                * and MFNs for PV guests */
 22.1110 +                if ( hvm ) 
 22.1111 +                    pfn_type[batch] = n;
 22.1112 +                else
 22.1113 +                    pfn_type[batch] = live_p2m[n];
 22.1114 +                    
 22.1115 +                if ( !is_mapped(pfn_type[batch]) )
 22.1116 +                {
 22.1117 +                    /*
 22.1118 +                    ** not currently in psuedo-physical map -- set bit
 22.1119 +                    ** in to_fix since we must send this page in last_iter
 22.1120 +                    ** unless its sent sooner anyhow, or it never enters
 22.1121 +                    ** pseudo-physical map (e.g. for ballooned down doms)
 22.1122 +                    */
 22.1123 +                    set_bit(n, to_fix);
 22.1124 +                    continue;
 22.1125 +                }
 22.1126 +
 22.1127 +                if ( last_iter &&
 22.1128 +                     test_bit(n, to_fix) &&
 22.1129 +                     !test_bit(n, to_send) )
 22.1130 +                {
 22.1131 +                    needed_to_fix++;
 22.1132 +                    DPRINTF("Fix! iter %d, pfn %x. mfn %lx\n",
 22.1133 +                            iter, n, pfn_type[batch]);
 22.1134 +                }
 22.1135 +                
 22.1136 +                clear_bit(n, to_fix);
 22.1137 +                
 22.1138 +                batch++;
 22.1139 +            }
 22.1140 +
 22.1141 +            if ( batch == 0 )
 22.1142 +                goto skip; /* vanishingly unlikely... */
 22.1143 +
 22.1144 +            region_base = xc_map_foreign_batch(
 22.1145 +                xc_handle, dom, PROT_READ, pfn_type, batch);
 22.1146 +            if ( region_base == NULL )
 22.1147 +            {
 22.1148 +                ERROR("map batch failed");
 22.1149 +                goto out;
 22.1150 +            }
 22.1151 +
 22.1152 +            if ( !hvm )
 22.1153 +            {
 22.1154 +                /* Get page types */
 22.1155 +                for ( j = 0; j < batch; j++ )
 22.1156 +                    ((uint32_t *)pfn_type)[j] = pfn_type[j];
 22.1157 +                if ( xc_get_pfn_type_batch(xc_handle, dom, batch,
 22.1158 +                                           (uint32_t *)pfn_type) )
 22.1159 +                {
 22.1160 +                    ERROR("get_pfn_type_batch failed");
 22.1161 +                    goto out;
 22.1162 +                }
 22.1163 +                for ( j = batch-1; j >= 0; j-- )
 22.1164 +                    pfn_type[j] = ((uint32_t *)pfn_type)[j];
 22.1165 +
 22.1166 +                for ( j = 0; j < batch; j++ )
 22.1167 +                {
 22.1168 +                    
 22.1169 +                    if ( (pfn_type[j] & XEN_DOMCTL_PFINFO_LTAB_MASK) ==
 22.1170 +                         XEN_DOMCTL_PFINFO_XTAB )
 22.1171 +                    {
 22.1172 +                        DPRINTF("type fail: page %i mfn %08lx\n", 
 22.1173 +                                j, pfn_type[j]);
 22.1174 +                        continue;
 22.1175 +                    }
 22.1176 +                    
 22.1177 +                    if ( debug )
 22.1178 +                        DPRINTF("%d pfn= %08lx mfn= %08lx [mfn]= %08lx"
 22.1179 +                                " sum= %08lx\n",
 22.1180 +                                iter,
 22.1181 +                                (pfn_type[j] & XEN_DOMCTL_PFINFO_LTAB_MASK) |
 22.1182 +                                pfn_batch[j],
 22.1183 +                                pfn_type[j],
 22.1184 +                                mfn_to_pfn(pfn_type[j] &
 22.1185 +                                           ~XEN_DOMCTL_PFINFO_LTAB_MASK),
 22.1186 +                                csum_page(region_base + (PAGE_SIZE*j)));
 22.1187 +                    
 22.1188 +                    /* canonicalise mfn->pfn */
 22.1189 +                    pfn_type[j] = (pfn_type[j] & XEN_DOMCTL_PFINFO_LTAB_MASK) |
 22.1190 +                        pfn_batch[j];
 22.1191 +                }
 22.1192 +            }
 22.1193 +
 22.1194 +            if ( !write_exact(io_fd, &batch, sizeof(unsigned int)) )
 22.1195 +            {
 22.1196 +                ERROR("Error when writing to state file (2) (errno %d)",
 22.1197 +                      errno);
 22.1198 +                goto out;
 22.1199 +            }
 22.1200 +
 22.1201 +            if ( !write_exact(io_fd, pfn_type, sizeof(unsigned long)*batch) )
 22.1202 +            {
 22.1203 +                ERROR("Error when writing to state file (3) (errno %d)",
 22.1204 +                      errno);
 22.1205 +                goto out;
 22.1206 +            }
 22.1207 +
 22.1208 +            /* entering this loop, pfn_type is now in pfns (Not mfns) */
 22.1209 +            for ( j = 0; j < batch; j++ )
 22.1210 +            {
 22.1211 +                unsigned long pfn, pagetype;
 22.1212 +                void *spage = (char *)region_base + (PAGE_SIZE*j);
 22.1213 +
 22.1214 +                pfn      = pfn_type[j] & ~XEN_DOMCTL_PFINFO_LTAB_MASK;
 22.1215 +                pagetype = pfn_type[j] &  XEN_DOMCTL_PFINFO_LTAB_MASK;
 22.1216 +
 22.1217 +                /* write out pages in batch */
 22.1218 +                if ( pagetype == XEN_DOMCTL_PFINFO_XTAB )
 22.1219 +                    continue;
 22.1220 +
 22.1221 +                pagetype &= XEN_DOMCTL_PFINFO_LTABTYPE_MASK;
 22.1222 +
 22.1223 +                if ( (pagetype >= XEN_DOMCTL_PFINFO_L1TAB) &&
 22.1224 +                     (pagetype <= XEN_DOMCTL_PFINFO_L4TAB) )
 22.1225 +                {
 22.1226 +                    /* We have a pagetable page: need to rewrite it. */
 22.1227 +                    race = 
 22.1228 +                        canonicalize_pagetable(pagetype, pfn, spage, page); 
 22.1229 +
 22.1230 +                    if ( race && !live )
 22.1231 +                    {
 22.1232 +                        ERROR("Fatal PT race (pfn %lx, type %08lx)", pfn,
 22.1233 +                              pagetype);
 22.1234 +                        goto out;
 22.1235 +                    }
 22.1236 +
 22.1237 +                    if ( ratewrite(io_fd, live, page, PAGE_SIZE) != PAGE_SIZE )
 22.1238 +                    {
 22.1239 +                        ERROR("Error when writing to state file (4)"
 22.1240 +                              " (errno %d)", errno);
 22.1241 +                        goto out;
 22.1242 +                    }
 22.1243 +                }
 22.1244 +                else
 22.1245 +                {
 22.1246 +                    /* We have a normal page: just write it directly. */
 22.1247 +                    if ( ratewrite(io_fd, live, spage, PAGE_SIZE) !=
 22.1248 +                         PAGE_SIZE )
 22.1249 +                    {
 22.1250 +                        ERROR("Error when writing to state file (5)"
 22.1251 +                              " (errno %d)", errno);
 22.1252 +                        goto out;
 22.1253 +                    }
 22.1254 +                }
 22.1255 +            } /* end of the write out for this batch */
 22.1256 +
 22.1257 +            sent_this_iter += batch;
 22.1258 +
 22.1259 +            munmap(region_base, batch*PAGE_SIZE);
 22.1260 +
 22.1261 +        } /* end of this while loop for this iteration */
 22.1262 +
 22.1263 +      skip:
 22.1264 +
 22.1265 +        total_sent += sent_this_iter;
 22.1266 +
 22.1267 +        DPRINTF("\r %d: sent %d, skipped %d, ",
 22.1268 +                iter, sent_this_iter, skip_this_iter );
 22.1269 +
 22.1270 +        if ( last_iter )
 22.1271 +        {
 22.1272 +            print_stats( xc_handle, dom, sent_this_iter, &stats, 1);
 22.1273 +
 22.1274 +            DPRINTF("Total pages sent= %ld (%.2fx)\n",
 22.1275 +                    total_sent, ((float)total_sent)/p2m_size );
 22.1276 +            DPRINTF("(of which %ld were fixups)\n", needed_to_fix  );
 22.1277 +        }
 22.1278 +
 22.1279 +        if ( last_iter && debug )
 22.1280 +        {
 22.1281 +            int minusone = -1;
 22.1282 +            memset(to_send, 0xff, BITMAP_SIZE);
 22.1283 +            debug = 0;
 22.1284 +            DPRINTF("Entering debug resend-all mode\n");
 22.1285 +
 22.1286 +            /* send "-1" to put receiver into debug mode */
 22.1287 +            if ( !write_exact(io_fd, &minusone, sizeof(int)) )
 22.1288 +            {
 22.1289 +                ERROR("Error when writing to state file (6) (errno %d)",
 22.1290 +                      errno);
 22.1291 +                goto out;
 22.1292 +            }
 22.1293 +
 22.1294 +            continue;
 22.1295 +        }
 22.1296 +
 22.1297 +        if ( last_iter )
 22.1298 +            break;
 22.1299 +
 22.1300 +        if ( live )
 22.1301 +        {
 22.1302 +            if ( ((sent_this_iter > sent_last_iter) && RATE_IS_MAX()) ||
 22.1303 +                 (iter >= max_iters) ||
 22.1304 +                 (sent_this_iter+skip_this_iter < 50) ||
 22.1305 +                 (total_sent > p2m_size*max_factor) )
 22.1306 +            {
 22.1307 +                DPRINTF("Start last iteration\n");
 22.1308 +                last_iter = 1;
 22.1309 +
 22.1310 +                if ( suspend_and_state(suspend, xc_handle, io_fd, dom, &info,
 22.1311 +                                       &ctxt) )
 22.1312 +                {
 22.1313 +                    ERROR("Domain appears not to have suspended");
 22.1314 +                    goto out;
 22.1315 +                }
 22.1316 +
 22.1317 +                DPRINTF("SUSPEND shinfo %08lx eip %08lx edx %08lx\n",
 22.1318 +                        info.shared_info_frame,
 22.1319 +                        (unsigned long)ctxt.user_regs.eip,
 22.1320 +                        (unsigned long)ctxt.user_regs.edx);
 22.1321 +            }
 22.1322 +
 22.1323 +            if ( xc_shadow_control(xc_handle, dom, 
 22.1324 +                                   XEN_DOMCTL_SHADOW_OP_CLEAN, to_send, 
 22.1325 +                                   p2m_size, NULL, 0, &stats) != p2m_size )
 22.1326 +            {
 22.1327 +                ERROR("Error flushing shadow PT");
 22.1328 +                goto out;
 22.1329 +            }
 22.1330 +
 22.1331 +            if ( hvm ) 
 22.1332 +            {
 22.1333 +                /* Pull in the dirty bits from qemu-dm too */
 22.1334 +                if ( !last_iter )
 22.1335 +                {
 22.1336 +                    qemu_active = qemu_non_active;
 22.1337 +                    qemu_non_active = qemu_active ? 0 : 1;
 22.1338 +                    qemu_flip_buffer(dom, qemu_active);
 22.1339 +                    for ( j = 0; j < BITMAP_SIZE / sizeof(unsigned long); j++ )
 22.1340 +                    {
 22.1341 +                        to_send[j] |= qemu_bitmaps[qemu_non_active][j];
 22.1342 +                        qemu_bitmaps[qemu_non_active][j] = 0;
 22.1343 +                    }
 22.1344 +                }
 22.1345 +                else
 22.1346 +                {
 22.1347 +                    for ( j = 0; j < BITMAP_SIZE / sizeof(unsigned long); j++ )
 22.1348 +                        to_send[j] |= qemu_bitmaps[qemu_active][j];
 22.1349 +                }
 22.1350 +            }
 22.1351 +
 22.1352 +            sent_last_iter = sent_this_iter;
 22.1353 +
 22.1354 +            print_stats(xc_handle, dom, sent_this_iter, &stats, 1);
 22.1355 +
 22.1356 +        }
 22.1357 +    } /* end of infinite for loop */
 22.1358 +
 22.1359 +    DPRINTF("All memory is saved\n");
 22.1360 +
 22.1361 +    {
 22.1362 +        struct {
 22.1363 +            int minustwo;
 22.1364 +            int max_vcpu_id;
 22.1365 +            uint64_t vcpumap;
 22.1366 +        } chunk = { -2, info.max_vcpu_id };
 22.1367 +
 22.1368 +        if ( info.max_vcpu_id >= 64 )
 22.1369 +        {
 22.1370 +            ERROR("Too many VCPUS in guest!");
 22.1371 +            goto out;
 22.1372 +        }
 22.1373 +
 22.1374 +        for ( i = 1; i <= info.max_vcpu_id; i++ )
 22.1375 +        {
 22.1376 +            xc_vcpuinfo_t vinfo;
 22.1377 +            if ( (xc_vcpu_getinfo(xc_handle, dom, i, &vinfo) == 0) &&
 22.1378 +                 vinfo.online )
 22.1379 +                vcpumap |= 1ULL << i;
 22.1380 +        }
 22.1381 +
 22.1382 +        chunk.vcpumap = vcpumap;
 22.1383 +        if ( !write_exact(io_fd, &chunk, sizeof(chunk)) )
 22.1384 +        {
 22.1385 +            ERROR("Error when writing to state file (errno %d)", errno);
 22.1386 +            goto out;
 22.1387 +        }
 22.1388 +    }
 22.1389 +
 22.1390 +    /* Zero terminate */
 22.1391 +    i = 0;
 22.1392 +    if ( !write_exact(io_fd, &i, sizeof(int)) )
 22.1393 +    {
 22.1394 +        ERROR("Error when writing to state file (6') (errno %d)", errno);
 22.1395 +        goto out;
 22.1396 +    }
 22.1397 +
 22.1398 +    if ( hvm ) 
 22.1399 +    {
 22.1400 +        uint32_t rec_size;
 22.1401 +
 22.1402 +        /* Save magic-page locations. */
 22.1403 +        memset(magic_pfns, 0, sizeof(magic_pfns));
 22.1404 +        xc_get_hvm_param(xc_handle, dom, HVM_PARAM_IOREQ_PFN,
 22.1405 +                         (unsigned long *)&magic_pfns[0]);
 22.1406 +        xc_get_hvm_param(xc_handle, dom, HVM_PARAM_BUFIOREQ_PFN,
 22.1407 +                         (unsigned long *)&magic_pfns[1]);
 22.1408 +        xc_get_hvm_param(xc_handle, dom, HVM_PARAM_STORE_PFN,
 22.1409 +                         (unsigned long *)&magic_pfns[2]);
 22.1410 +        if ( !write_exact(io_fd, magic_pfns, sizeof(magic_pfns)) )
 22.1411 +        {
 22.1412 +            ERROR("Error when writing to state file (7)");
 22.1413 +            goto out;
 22.1414 +        }
 22.1415 +
 22.1416 +        /* Save vcpu contexts */
 22.1417 +
 22.1418 +        for ( i = 0; i <= info.max_vcpu_id; i++ )
 22.1419 +        {
 22.1420 +            if ( !(vcpumap & (1ULL << i)) )
 22.1421 +                continue;
 22.1422 +            
 22.1423 +            if ( xc_vcpu_getcontext(xc_handle, dom, i, &ctxt) )
 22.1424 +            {
 22.1425 +                ERROR("HVM:Could not get vcpu context");
 22.1426 +                goto out;
 22.1427 +            }
 22.1428 +            
 22.1429 +            DPRINTF("write vcpu %d context.\n", i); 
 22.1430 +            if ( !write_exact(io_fd, &(ctxt), sizeof(ctxt)) )
 22.1431 +            {
 22.1432 +                ERROR("write vcpu context failed!\n");
 22.1433 +                goto out;
 22.1434 +            }
 22.1435 +        }
 22.1436 +
 22.1437 +        /* Get HVM context from Xen and save it too */
 22.1438 +        if ( (rec_size = xc_domain_hvm_getcontext(xc_handle, dom, hvm_buf, 
 22.1439 +                                                  hvm_buf_size)) == -1 )
 22.1440 +        {
 22.1441 +            ERROR("HVM:Could not get hvm buffer");
 22.1442 +            goto out;
 22.1443 +        }
 22.1444 +        
 22.1445 +        if ( !write_exact(io_fd, &rec_size, sizeof(uint32_t)) )
 22.1446 +        {
 22.1447 +            ERROR("error write hvm buffer size");
 22.1448 +            goto out;
 22.1449 +        }
 22.1450 +        
 22.1451 +        if ( !write_exact(io_fd, hvm_buf, rec_size) )
 22.1452 +        {
 22.1453 +            ERROR("write HVM info failed!\n");
 22.1454 +            goto out;
 22.1455 +        }
 22.1456 +        
 22.1457 +        /* HVM guests are done now */
 22.1458 +        rc = 0;
 22.1459 +        goto out;
 22.1460 +    }
 22.1461 +
 22.1462 +    /* PV guests only from now on */
 22.1463 +
 22.1464 +    /* Send through a list of all the PFNs that were not in map at the close */
 22.1465 +    {
 22.1466 +        unsigned int i,j;
 22.1467 +        unsigned long pfntab[1024];
 22.1468 +
 22.1469 +        for ( i = 0, j = 0; i < p2m_size; i++ )
 22.1470 +        {
 22.1471 +            if ( !is_mapped(live_p2m[i]) )
 22.1472 +                j++;
 22.1473 +        }
 22.1474 +
 22.1475 +        if ( !write_exact(io_fd, &j, sizeof(unsigned int)) )
 22.1476 +        {
 22.1477 +            ERROR("Error when writing to state file (6a) (errno %d)", errno);
 22.1478 +            goto out;
 22.1479 +        }
 22.1480 +
 22.1481 +        for ( i = 0, j = 0; i < p2m_size; )
 22.1482 +        {
 22.1483 +            if ( !is_mapped(live_p2m[i]) )
 22.1484 +                pfntab[j++] = i;
 22.1485 +
 22.1486 +            i++;
 22.1487 +            if ( (j == 1024) || (i == p2m_size) )
 22.1488 +            {
 22.1489 +                if ( !write_exact(io_fd, &pfntab, sizeof(unsigned long)*j) )
 22.1490 +                {
 22.1491 +                    ERROR("Error when writing to state file (6b) (errno %d)",
 22.1492 +                          errno);
 22.1493 +                    goto out;
 22.1494 +                }
 22.1495 +                j = 0;
 22.1496 +            }
 22.1497 +        }
 22.1498 +    }
 22.1499 +
 22.1500 +    /* Canonicalise the suspend-record frame number. */
 22.1501 +    if ( !translate_mfn_to_pfn(&ctxt.user_regs.edx) )
 22.1502 +    {
 22.1503 +        ERROR("Suspend record is not in range of pseudophys map");
 22.1504 +        goto out;
 22.1505 +    }
 22.1506 +
 22.1507 +    for ( i = 0; i <= info.max_vcpu_id; i++ )
 22.1508 +    {
 22.1509 +        if ( !(vcpumap & (1ULL << i)) )
 22.1510 +            continue;
 22.1511 +
 22.1512 +        if ( (i != 0) && xc_vcpu_getcontext(xc_handle, dom, i, &ctxt) )
 22.1513 +        {
 22.1514 +            ERROR("No context for VCPU%d", i);
 22.1515 +            goto out;
 22.1516 +        }
 22.1517 +
 22.1518 +        /* Canonicalise each GDT frame number. */
 22.1519 +        for ( j = 0; (512*j) < ctxt.gdt_ents; j++ )
 22.1520 +        {
 22.1521 +            if ( !translate_mfn_to_pfn(&ctxt.gdt_frames[j]) )
 22.1522 +            {
 22.1523 +                ERROR("GDT frame is not in range of pseudophys map");
 22.1524 +                goto out;
 22.1525 +            }
 22.1526 +        }
 22.1527 +
 22.1528 +        /* Canonicalise the page table base pointer. */
 22.1529 +        if ( !MFN_IS_IN_PSEUDOPHYS_MAP(xen_cr3_to_pfn(ctxt.ctrlreg[3])) )
 22.1530 +        {
 22.1531 +            ERROR("PT base is not in range of pseudophys map");
 22.1532 +            goto out;
 22.1533 +        }
 22.1534 +        ctxt.ctrlreg[3] = 
 22.1535 +            xen_pfn_to_cr3(mfn_to_pfn(xen_cr3_to_pfn(ctxt.ctrlreg[3])));
 22.1536 +
 22.1537 +        /* Guest pagetable (x86/64) stored in otherwise-unused CR1. */
 22.1538 +        if ( (pt_levels == 4) && ctxt.ctrlreg[1] )
 22.1539 +        {
 22.1540 +            if ( !MFN_IS_IN_PSEUDOPHYS_MAP(xen_cr3_to_pfn(ctxt.ctrlreg[1])) )
 22.1541 +            {
 22.1542 +                ERROR("PT base is not in range of pseudophys map");
 22.1543 +                goto out;
 22.1544 +            }
 22.1545 +            /* Least-significant bit means 'valid PFN'. */
 22.1546 +            ctxt.ctrlreg[1] = 1 |
 22.1547 +                xen_pfn_to_cr3(mfn_to_pfn(xen_cr3_to_pfn(ctxt.ctrlreg[1])));
 22.1548 +        }
 22.1549 +
 22.1550 +        if ( !write_exact(io_fd, &ctxt, sizeof(ctxt)) )
 22.1551 +        {
 22.1552 +            ERROR("Error when writing to state file (1) (errno %d)", errno);
 22.1553 +            goto out;
 22.1554 +        }
 22.1555 +    }
 22.1556 +
 22.1557 +    /*
 22.1558 +     * Reset the MFN to be a known-invalid value. See map_frame_list_list().
 22.1559 +     */
 22.1560 +    memcpy(page, live_shinfo, PAGE_SIZE);
 22.1561 +    ((shared_info_t *)page)->arch.pfn_to_mfn_frame_list_list = 0;
 22.1562 +    if ( !write_exact(io_fd, page, PAGE_SIZE) )
 22.1563 +    {
 22.1564 +        ERROR("Error when writing to state file (1) (errno %d)", errno);
 22.1565 +        goto out;
 22.1566 +    }
 22.1567 +
 22.1568 +    /* Success! */
 22.1569 +    rc = 0;
 22.1570 +
 22.1571 + out:
 22.1572 +
 22.1573 +    if ( live )
 22.1574 +    {
 22.1575 +        if ( xc_shadow_control(xc_handle, dom, 
 22.1576 +                               XEN_DOMCTL_SHADOW_OP_OFF,
 22.1577 +                               NULL, 0, NULL, 0, NULL) < 0 )
 22.1578 +            DPRINTF("Warning - couldn't disable shadow mode");
 22.1579 +    }
 22.1580 +
 22.1581 +    /* Flush last write and discard cache for file. */
 22.1582 +    discard_file_cache(io_fd, 1 /* flush */);
 22.1583 +
 22.1584 +    if ( live_shinfo )
 22.1585 +        munmap(live_shinfo, PAGE_SIZE);
 22.1586 +
 22.1587 +    if ( live_p2m )
 22.1588 +        munmap(live_p2m, ROUNDUP(p2m_size * sizeof(xen_pfn_t), PAGE_SHIFT));
 22.1589 +
 22.1590 +    if ( live_m2p )
 22.1591 +        munmap(live_m2p, M2P_SIZE(max_mfn));
 22.1592 +
 22.1593 +    free(pfn_type);
 22.1594 +    free(pfn_batch);
 22.1595 +    free(to_send);
 22.1596 +    free(to_fix);
 22.1597 +    free(to_skip);
 22.1598 +
 22.1599 +    DPRINTF("Save exit rc=%d\n",rc);
 22.1600 +
 22.1601 +    return !!rc;
 22.1602 +}
 22.1603 +
 22.1604 +/*
 22.1605 + * Local variables:
 22.1606 + * mode: C
 22.1607 + * c-set-style: "BSD"
 22.1608 + * c-basic-offset: 4
 22.1609 + * tab-width: 4
 22.1610 + * indent-tabs-mode: nil
 22.1611 + * End:
 22.1612 + */
    23.1 --- a/tools/libxc/xc_hvm_build.c	Thu Apr 12 10:26:42 2007 -0600
    23.2 +++ b/tools/libxc/xc_hvm_build.c	Thu Apr 12 10:30:12 2007 -0600
    23.3 @@ -29,47 +29,6 @@ typedef union
    23.4      vcpu_guest_context_t c;
    23.5  } vcpu_guest_context_either_t;
    23.6  
    23.7 -
    23.8 -int xc_set_hvm_param(
    23.9 -    int handle, domid_t dom, int param, unsigned long value)
   23.10 -{
   23.11 -    DECLARE_HYPERCALL;
   23.12 -    xen_hvm_param_t arg;
   23.13 -    int rc;
   23.14 -
   23.15 -    hypercall.op     = __HYPERVISOR_hvm_op;
   23.16 -    hypercall.arg[0] = HVMOP_set_param;
   23.17 -    hypercall.arg[1] = (unsigned long)&arg;
   23.18 -    arg.domid = dom;
   23.19 -    arg.index = param;
   23.20 -    arg.value = value;
   23.21 -    if ( lock_pages(&arg, sizeof(arg)) != 0 )
   23.22 -        return -1;
   23.23 -    rc = do_xen_hypercall(handle, &hypercall);
   23.24 -    unlock_pages(&arg, sizeof(arg));
   23.25 -    return rc;
   23.26 -}
   23.27 -
   23.28 -int xc_get_hvm_param(
   23.29 -    int handle, domid_t dom, int param, unsigned long *value)
   23.30 -{
   23.31 -    DECLARE_HYPERCALL;
   23.32 -    xen_hvm_param_t arg;
   23.33 -    int rc;
   23.34 -
   23.35 -    hypercall.op     = __HYPERVISOR_hvm_op;
   23.36 -    hypercall.arg[0] = HVMOP_get_param;
   23.37 -    hypercall.arg[1] = (unsigned long)&arg;
   23.38 -    arg.domid = dom;
   23.39 -    arg.index = param;
   23.40 -    if ( lock_pages(&arg, sizeof(arg)) != 0 )
   23.41 -        return -1;
   23.42 -    rc = do_xen_hypercall(handle, &hypercall);
   23.43 -    unlock_pages(&arg, sizeof(arg));
   23.44 -    *value = arg.value;
   23.45 -    return rc;
   23.46 -}
   23.47 -
   23.48  static void build_e820map(void *e820_page, unsigned long long mem_size)
   23.49  {
   23.50      struct e820entry *e820entry =
    24.1 --- a/tools/libxc/xc_hvm_save.c	Thu Apr 12 10:26:42 2007 -0600
    24.2 +++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
    24.3 @@ -1,755 +0,0 @@
    24.4 -/******************************************************************************
    24.5 - * xc_hvm_save.c
    24.6 - *
    24.7 - * Save the state of a running HVM guest.
    24.8 - *
    24.9 - * Copyright (c) 2003, K A Fraser.
   24.10 - * Copyright (c) 2006 Intel Corperation
   24.11 - * rewriten for hvm guest by Zhai Edwin <edwin.zhai@intel.com>
   24.12 - *
   24.13 - * This program is free software; you can redistribute it and/or modify it
   24.14 - * under the terms and conditions of the GNU General Public License,
   24.15 - * version 2, as published by the Free Software Foundation.
   24.16 - *
   24.17 - * This program is distributed in the hope it will be useful, but WITHOUT
   24.18 - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
   24.19 - * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
   24.20 - * more details.
   24.21 - *
   24.22 - * You should have received a copy of the GNU General Public License along with
   24.23 - * this program; if not, write to the Free Software Foundation, Inc., 59 Temple
   24.24 - * Place - Suite 330, Boston, MA 02111-1307 USA.
   24.25 - *
   24.26 - */
   24.27 -
   24.28 -#include <inttypes.h>
   24.29 -#include <time.h>
   24.30 -#include <stdlib.h>
   24.31 -#include <unistd.h>
   24.32 -#include <sys/time.h>
   24.33 -
   24.34 -#include "xc_private.h"
   24.35 -#include "xg_private.h"
   24.36 -#include "xg_save_restore.h"
   24.37 -
   24.38 -#include <xen/hvm/e820.h>
   24.39 -#include <xen/hvm/params.h>
   24.40 -
   24.41 -/*
   24.42 -** Default values for important tuning parameters. Can override by passing
   24.43 -** non-zero replacement values to xc_hvm_save().
   24.44 -**
   24.45 -** XXX SMH: should consider if want to be able to override MAX_MBIT_RATE too.
   24.46 -**
   24.47 -*/
   24.48 -#define DEF_MAX_ITERS   29   /* limit us to 30 times round loop   */
   24.49 -#define DEF_MAX_FACTOR   3   /* never send more than 3x nr_pfns   */
   24.50 -
   24.51 -/* Shared-memory bitmaps for getting log-dirty bits from qemu */
   24.52 -static unsigned long *qemu_bitmaps[2];
   24.53 -static int qemu_active;
   24.54 -static int qemu_non_active;
   24.55 -
   24.56 -/*
   24.57 -** During (live) save/migrate, we maintain a number of bitmaps to track
   24.58 -** which pages we have to send, to fixup, and to skip.
   24.59 -*/
   24.60 -
   24.61 -#define BITS_PER_LONG (sizeof(unsigned long) * 8)
   24.62 -#define BITS_TO_LONGS(bits) (((bits)+BITS_PER_LONG-1)/BITS_PER_LONG)
   24.63 -#define BITMAP_SIZE   (BITS_TO_LONGS(pfn_array_size) * sizeof(unsigned long))
   24.64 -
   24.65 -#define BITMAP_ENTRY(_nr,_bmap) \
   24.66 -   ((unsigned long *)(_bmap))[(_nr)/BITS_PER_LONG]
   24.67 -
   24.68 -#define BITMAP_SHIFT(_nr) ((_nr) % BITS_PER_LONG)
   24.69 -
   24.70 -static inline int test_bit (int nr, volatile void * addr)
   24.71 -{
   24.72 -    return (BITMAP_ENTRY(nr, addr) >> BITMAP_SHIFT(nr)) & 1;
   24.73 -}
   24.74 -
   24.75 -static inline void clear_bit (int nr, volatile void * addr)
   24.76 -{
   24.77 -    BITMAP_ENTRY(nr, addr) &= ~(1UL << BITMAP_SHIFT(nr));
   24.78 -}
   24.79 -
   24.80 -static inline int permute( int i, int nr, int order_nr  )
   24.81 -{
   24.82 -    /* Need a simple permutation function so that we scan pages in a
   24.83 -       pseudo random order, enabling us to get a better estimate of
   24.84 -       the domain's page dirtying rate as we go (there are often
   24.85 -       contiguous ranges of pfns that have similar behaviour, and we
   24.86 -       want to mix them up. */
   24.87 -
   24.88 -    /* e.g. nr->oder 15->4 16->4 17->5 */
   24.89 -    /* 512MB domain, 128k pages, order 17 */
   24.90 -
   24.91 -    /*
   24.92 -      QPONMLKJIHGFEDCBA
   24.93 -             QPONMLKJIH
   24.94 -      GFEDCBA
   24.95 -     */
   24.96 -
   24.97 -    /*
   24.98 -      QPONMLKJIHGFEDCBA
   24.99 -                  EDCBA
  24.100 -             QPONM
  24.101 -      LKJIHGF
  24.102 -      */
  24.103 -
  24.104 -    do { i = ((i>>(order_nr-10)) | ( i<<10 ) ) & ((1<<order_nr)-1); }
  24.105 -    while ( i >= nr ); /* this won't ever loop if nr is a power of 2 */
  24.106 -
  24.107 -    return i;
  24.108 -}
  24.109 -
  24.110 -
  24.111 -static uint64_t tv_to_us(struct timeval *new)
  24.112 -{
  24.113 -    return (new->tv_sec * 1000000) + new->tv_usec;
  24.114 -}
  24.115 -
  24.116 -static uint64_t llgettimeofday(void)
  24.117 -{
  24.118 -    struct timeval now;
  24.119 -    gettimeofday(&now, NULL);
  24.120 -    return tv_to_us(&now);
  24.121 -}
  24.122 -
  24.123 -static uint64_t tv_delta(struct timeval *new, struct timeval *old)
  24.124 -{
  24.125 -    return (((new->tv_sec - old->tv_sec)*1000000) +
  24.126 -            (new->tv_usec - old->tv_usec));
  24.127 -}
  24.128 -
  24.129 -
  24.130 -#define RATE_IS_MAX() (0)
  24.131 -#define ratewrite(_io_fd, _buf, _n) write((_io_fd), (_buf), (_n))
  24.132 -#define initialize_mbit_rate()
  24.133 -
  24.134 -static inline ssize_t write_exact(int fd, void *buf, size_t count)
  24.135 -{
  24.136 -    return (write(fd, buf, count) == count);
  24.137 -}
  24.138 -
  24.139 -static int print_stats(int xc_handle, uint32_t domid, int pages_sent,
  24.140 -                       xc_shadow_op_stats_t *stats, int print)
  24.141 -{
  24.142 -    static struct timeval wall_last;
  24.143 -    static long long      d0_cpu_last;
  24.144 -    static long long      d1_cpu_last;
  24.145 -
  24.146 -    struct timeval        wall_now;
  24.147 -    long long             wall_delta;
  24.148 -    long long             d0_cpu_now, d0_cpu_delta;
  24.149 -    long long             d1_cpu_now, d1_cpu_delta;
  24.150 -
  24.151 -    gettimeofday(&wall_now, NULL);
  24.152 -
  24.153 -    d0_cpu_now = xc_domain_get_cpu_usage(xc_handle, 0, /* FIXME */ 0)/1000;
  24.154 -    d1_cpu_now = xc_domain_get_cpu_usage(xc_handle, domid, /* FIXME */ 0)/1000;
  24.155 -
  24.156 -    if ( (d0_cpu_now == -1) || (d1_cpu_now == -1) )
  24.157 -        DPRINTF("ARRHHH!!\n");
  24.158 -
  24.159 -    wall_delta = tv_delta(&wall_now,&wall_last)/1000;
  24.160 -    if ( wall_delta == 0 )
  24.161 -        wall_delta = 1;
  24.162 -
  24.163 -    d0_cpu_delta = (d0_cpu_now - d0_cpu_last)/1000;
  24.164 -    d1_cpu_delta = (d1_cpu_now - d1_cpu_last)/1000;
  24.165 -
  24.166 -    if ( print )
  24.167 -        DPRINTF("delta %lldms, dom0 %d%%, target %d%%, sent %dMb/s, "
  24.168 -                "dirtied %dMb/s %" PRId32 " pages\n",
  24.169 -                wall_delta,
  24.170 -                (int)((d0_cpu_delta*100)/wall_delta),
  24.171 -                (int)((d1_cpu_delta*100)/wall_delta),
  24.172 -                (int)((pages_sent*PAGE_SIZE)/(wall_delta*(1000/8))),
  24.173 -                (int)((stats->dirty_count*PAGE_SIZE)/(wall_delta*(1000/8))),
  24.174 -                stats->dirty_count);
  24.175 -
  24.176 -    d0_cpu_last = d0_cpu_now;
  24.177 -    d1_cpu_last = d1_cpu_now;
  24.178 -    wall_last   = wall_now;
  24.179 -
  24.180 -    return 0;
  24.181 -}
  24.182 -
  24.183 -static int analysis_phase(int xc_handle, uint32_t domid, int pfn_array_size,
  24.184 -                          unsigned long *arr, int runs)
  24.185 -{
  24.186 -    long long start, now;
  24.187 -    xc_shadow_op_stats_t stats;
  24.188 -    int j;
  24.189 -
  24.190 -    start = llgettimeofday();
  24.191 -
  24.192 -    for ( j = 0; j < runs; j++ )
  24.193 -    {
  24.194 -        int i;
  24.195 -
  24.196 -        xc_shadow_control(xc_handle, domid, XEN_DOMCTL_SHADOW_OP_CLEAN,
  24.197 -                          arr, pfn_array_size, NULL, 0, NULL);
  24.198 -        DPRINTF("#Flush\n");
  24.199 -        for ( i = 0; i < 40; i++ )
  24.200 -        {
  24.201 -            usleep(50000);
  24.202 -            now = llgettimeofday();
  24.203 -            xc_shadow_control(xc_handle, domid, XEN_DOMCTL_SHADOW_OP_PEEK,
  24.204 -                              NULL, 0, NULL, 0, &stats);
  24.205 -            DPRINTF("now= %lld faults= %"PRId32" dirty= %"PRId32"\n",
  24.206 -                    ((now-start)+500)/1000,
  24.207 -                    stats.fault_count, stats.dirty_count);
  24.208 -        }
  24.209 -    }
  24.210 -
  24.211 -    return -1;
  24.212 -}
  24.213 -
  24.214 -static int suspend_and_state(int (*suspend)(int), int xc_handle, int io_fd,
  24.215 -                             int dom, xc_dominfo_t *info,
  24.216 -                             vcpu_guest_context_t *ctxt)
  24.217 -{
  24.218 -    int i = 0;
  24.219 -
  24.220 -    if ( !(*suspend)(dom) )
  24.221 -    {
  24.222 -        ERROR("Suspend request failed");
  24.223 -        return -1;
  24.224 -    }
  24.225 -
  24.226 - retry:
  24.227 -
  24.228 -    if ( xc_domain_getinfo(xc_handle, dom, 1, info) != 1 )
  24.229 -    {
  24.230 -        ERROR("Could not get domain info");
  24.231 -        return -1;
  24.232 -    }
  24.233 -
  24.234 -    if ( xc_vcpu_getcontext(xc_handle, dom, 0, ctxt) )
  24.235 -        ERROR("Could not get vcpu context");
  24.236 -
  24.237 -    if ( info->shutdown && (info->shutdown_reason == SHUTDOWN_suspend) )
  24.238 -        return 0; /* success */
  24.239 -
  24.240 -    if ( info->paused )
  24.241 -    {
  24.242 -        /* Try unpausing domain, wait, and retest. */
  24.243 -        xc_domain_unpause( xc_handle, dom );
  24.244 -        ERROR("Domain was paused. Wait and re-test.");
  24.245 -        usleep(10000);  /* 10ms */
  24.246 -        goto retry;
  24.247 -    }
  24.248 -
  24.249 -    if ( ++i < 100 )
  24.250 -    {
  24.251 -        ERROR("Retry suspend domain.");
  24.252 -        usleep(10000); /* 10ms */
  24.253 -        goto retry;
  24.254 -    }
  24.255 -
  24.256 -    ERROR("Unable to suspend domain.");
  24.257 -
  24.258 -    return -1;
  24.259 -}
  24.260 -
  24.261 -int xc_hvm_save(int xc_handle, int io_fd, uint32_t dom, uint32_t max_iters,
  24.262 -                uint32_t max_factor, uint32_t flags, int (*suspend)(int),
  24.263 -                void *(*init_qemu_maps)(int, unsigned), 
  24.264 -                void (*qemu_flip_buffer)(int, int))
  24.265 -{
  24.266 -    xc_dominfo_t info;
  24.267 -
  24.268 -    int rc = 1, i, j, last_iter, iter = 0;
  24.269 -    int live  = !!(flags & XCFLAGS_LIVE);
  24.270 -    int debug = !!(flags & XCFLAGS_DEBUG);
  24.271 -    int sent_last_iter, skip_this_iter;
  24.272 -
  24.273 -    /* The highest guest-physical frame number used by the current guest */
  24.274 -    unsigned long max_pfn;
  24.275 -
  24.276 -    /* The size of an array big enough to contain all guest pfns */
  24.277 -    unsigned long pfn_array_size;
  24.278 -
  24.279 -    /* Magic frames: ioreqs and xenstore comms. */
  24.280 -    uint64_t magic_pfns[3]; /* ioreq_pfn, bufioreq_pfn, store_pfn */
  24.281 -
  24.282 -    /* A copy of the CPU context of the guest. */
  24.283 -    vcpu_guest_context_t ctxt;
  24.284 -
  24.285 -    /* A table containg the PFNs (/not/ MFN!) to map. */
  24.286 -    xen_pfn_t *pfn_batch = NULL;
  24.287 -
  24.288 -    /* A copy of hvm domain context buffer*/
  24.289 -    uint32_t hvm_buf_size;
  24.290 -    uint8_t *hvm_buf = NULL;
  24.291 -
  24.292 -    /* base of the region in which domain memory is mapped */
  24.293 -    unsigned char *region_base = NULL;
  24.294 -
  24.295 -    uint32_t rec_size, nr_vcpus;
  24.296 -
  24.297 -    /* power of 2 order of pfn_array_size */
  24.298 -    int order_nr;
  24.299 -
  24.300 -    /* bitmap of pages:
  24.301 -       - that should be sent this iteration (unless later marked as skip);
  24.302 -       - to skip this iteration because already dirty; */
  24.303 -    unsigned long *to_send = NULL, *to_skip = NULL;
  24.304 -
  24.305 -    xc_shadow_op_stats_t stats;
  24.306 -
  24.307 -    unsigned long total_sent = 0;
  24.308 -
  24.309 -    uint64_t vcpumap = 1ULL;
  24.310 -
  24.311 -    DPRINTF("xc_hvm_save: dom=%d, max_iters=%d, max_factor=%d, flags=0x%x, "
  24.312 -            "live=%d, debug=%d.\n", dom, max_iters, max_factor, flags,
  24.313 -            live, debug);
  24.314 -    
  24.315 -    /* If no explicit control parameters given, use defaults */
  24.316 -    max_iters  = max_iters  ? : DEF_MAX_ITERS;
  24.317 -    max_factor = max_factor ? : DEF_MAX_FACTOR;
  24.318 -
  24.319 -    initialize_mbit_rate();
  24.320 -
  24.321 -    if ( xc_domain_getinfo(xc_handle, dom, 1, &info) != 1 )
  24.322 -    {
  24.323 -        ERROR("HVM: Could not get domain info");
  24.324 -        return 1;
  24.325 -    }
  24.326 -    nr_vcpus = info.nr_online_vcpus;
  24.327 -
  24.328 -    if ( mlock(&ctxt, sizeof(ctxt)) )
  24.329 -    {
  24.330 -        ERROR("HVM: Unable to mlock ctxt");
  24.331 -        return 1;
  24.332 -    }
  24.333 -
  24.334 -    /* Only have to worry about vcpu 0 even for SMP */
  24.335 -    if ( xc_vcpu_getcontext(xc_handle, dom, 0, &ctxt) )
  24.336 -    {
  24.337 -        ERROR("HVM: Could not get vcpu context");
  24.338 -        goto out;
  24.339 -    }
  24.340 -
  24.341 -    DPRINTF("saved hvm domain info: max_memkb=0x%lx, nr_pages=0x%lx\n",
  24.342 -            info.max_memkb, info.nr_pages); 
  24.343 -
  24.344 -    if ( live )
  24.345 -    {
  24.346 -        /* Live suspend. Enable log-dirty mode. */
  24.347 -        if ( xc_shadow_control(xc_handle, dom,
  24.348 -                               XEN_DOMCTL_SHADOW_OP_ENABLE_LOGDIRTY,
  24.349 -                               NULL, 0, NULL, 0, NULL) < 0 )
  24.350 -        {
  24.351 -            ERROR("Couldn't enable shadow mode");
  24.352 -            goto out;
  24.353 -        }
  24.354 -    }
  24.355 -    else
  24.356 -    {
  24.357 -        /* This is a non-live suspend. Suspend the domain .*/
  24.358 -        if ( suspend_and_state(suspend, xc_handle, io_fd, dom, &info, &ctxt) )
  24.359 -        {
  24.360 -            ERROR("HVM Domain appears not to have suspended");
  24.361 -            goto out;
  24.362 -        }
  24.363 -    }
  24.364 -
  24.365 -    last_iter = !live;
  24.366 -
  24.367 -    max_pfn = xc_memory_op(xc_handle, XENMEM_maximum_gpfn, &dom);
  24.368 -
  24.369 -    DPRINTF("after 1st handle hvm domain max_pfn=0x%lx, "
  24.370 -            "max_memkb=0x%lx, live=%d.\n",
  24.371 -            max_pfn, info.max_memkb, live);
  24.372 -
  24.373 -    /* Size of any array that covers 0 ... max_pfn */
  24.374 -    pfn_array_size = max_pfn + 1;
  24.375 -    if ( !write_exact(io_fd, &pfn_array_size, sizeof(unsigned long)) )
  24.376 -    {
  24.377 -        ERROR("Error when writing to state file (1)");
  24.378 -        goto out;
  24.379 -    }
  24.380 -
  24.381 -    /* pretend we sent all the pages last iteration */
  24.382 -    sent_last_iter = pfn_array_size;
  24.383 -
  24.384 -    /* calculate the power of 2 order of pfn_array_size, e.g.
  24.385 -       15->4 16->4 17->5 */
  24.386 -    for ( i = pfn_array_size-1, order_nr = 0; i ; i >>= 1, order_nr++ )
  24.387 -        continue;
  24.388 -
  24.389 -    /* Setup to_send / to_fix and to_skip bitmaps */
  24.390 -    to_send = malloc(BITMAP_SIZE);
  24.391 -    to_skip = malloc(BITMAP_SIZE);
  24.392 -
  24.393 -    if ( live )
  24.394 -    {
  24.395 -        /* Get qemu-dm logging dirty pages too */
  24.396 -        void *seg = init_qemu_maps(dom, BITMAP_SIZE);
  24.397 -        qemu_bitmaps[0] = seg;
  24.398 -        qemu_bitmaps[1] = seg + BITMAP_SIZE;
  24.399 -        qemu_active = 0;
  24.400 -        qemu_non_active = 1;
  24.401 -    }
  24.402 -
  24.403 -    hvm_buf_size = xc_domain_hvm_getcontext(xc_handle, dom, 0, 0);
  24.404 -    if ( hvm_buf_size == -1 )
  24.405 -    {
  24.406 -        ERROR("Couldn't get HVM context size from Xen");
  24.407 -        goto out;
  24.408 -    }
  24.409 -    hvm_buf = malloc(hvm_buf_size);
  24.410 -
  24.411 -    if ( !to_send || !to_skip || !hvm_buf )
  24.412 -    {
  24.413 -        ERROR("Couldn't allocate memory");
  24.414 -        goto out;
  24.415 -    }
  24.416 -
  24.417 -    memset(to_send, 0xff, BITMAP_SIZE);
  24.418 -
  24.419 -    if ( lock_pages(to_send, BITMAP_SIZE) )
  24.420 -    {
  24.421 -        ERROR("Unable to lock to_send");
  24.422 -        return 1;
  24.423 -    }
  24.424 -
  24.425 -    /* (to fix is local only) */
  24.426 -    if ( lock_pages(to_skip, BITMAP_SIZE) )
  24.427 -    {
  24.428 -        ERROR("Unable to lock to_skip");
  24.429 -        return 1;
  24.430 -    }
  24.431 -
  24.432 -    analysis_phase(xc_handle, dom, pfn_array_size, to_skip, 0);
  24.433 -
  24.434 -    /* We want zeroed memory so use calloc rather than malloc. */
  24.435 -    pfn_batch = calloc(MAX_BATCH_SIZE, sizeof(*pfn_batch));
  24.436 -    if ( pfn_batch == NULL )
  24.437 -    {
  24.438 -        ERROR("failed to alloc memory for pfn_batch array");
  24.439 -        errno = ENOMEM;
  24.440 -        goto out;
  24.441 -    }
  24.442 -
  24.443 -    for ( ; ; )
  24.444 -    {
  24.445 -        unsigned int prev_pc, sent_this_iter, N, batch;
  24.446 -
  24.447 -        iter++;
  24.448 -        sent_this_iter = 0;
  24.449 -        skip_this_iter = 0;
  24.450 -        prev_pc = 0;
  24.451 -        N=0;
  24.452 -
  24.453 -        DPRINTF("Saving memory pages: iter %d   0%%", iter);
  24.454 -
  24.455 -        while ( N < pfn_array_size )
  24.456 -        {
  24.457 -            unsigned int this_pc = (N * 100) / pfn_array_size;
  24.458 -            int rc;
  24.459 -
  24.460 -            if ( (this_pc - prev_pc) >= 5 )
  24.461 -            {
  24.462 -                DPRINTF("\b\b\b\b%3d%%", this_pc);
  24.463 -                prev_pc = this_pc;
  24.464 -            }
  24.465 -
  24.466 -            if ( !last_iter )
  24.467 -            {
  24.468 -                /* Slightly wasteful to peek the whole array evey time,
  24.469 -                   but this is fast enough for the moment. */
  24.470 -                rc = xc_shadow_control(
  24.471 -                    xc_handle, dom, XEN_DOMCTL_SHADOW_OP_PEEK, to_skip, 
  24.472 -                    pfn_array_size, NULL, 0, NULL);
  24.473 -                if ( rc != pfn_array_size )
  24.474 -                {
  24.475 -                    ERROR("Error peeking shadow bitmap");
  24.476 -                    goto out;
  24.477 -                }
  24.478 -            }
  24.479 -
  24.480 -            /* load pfn_batch[] with the mfn of all the pages we're doing in
  24.481 -               this batch. */
  24.482 -            for ( batch = 0;
  24.483 -                  (batch < MAX_BATCH_SIZE) && (N < pfn_array_size);
  24.484 -                  N++ )
  24.485 -            {
  24.486 -                int n = permute(N, pfn_array_size, order_nr);
  24.487 -
  24.488 -                if ( 0 && debug )
  24.489 -                    DPRINTF("%d pfn= %08lx %d \n",
  24.490 -                            iter, (unsigned long)n, test_bit(n, to_send));
  24.491 -
  24.492 -                if ( !last_iter &&
  24.493 -                     test_bit(n, to_send) &&
  24.494 -                     test_bit(n, to_skip) )
  24.495 -                    skip_this_iter++; /* stats keeping */
  24.496 -
  24.497 -                if ( !((test_bit(n, to_send) && !test_bit(n, to_skip)) ||
  24.498 -                       (test_bit(n, to_send) && last_iter)) )
  24.499 -                    continue;
  24.500 -
  24.501 -                /* Skip PFNs that aren't really there */
  24.502 -                if ( (n >= 0xa0 && n < 0xc0) /* VGA hole */
  24.503 -                     || (n >= (HVM_BELOW_4G_MMIO_START >> PAGE_SHIFT) &&
  24.504 -                         n < (1ULL << 32) >> PAGE_SHIFT) /* 4G MMIO hole */ )
  24.505 -                    continue;
  24.506 -
  24.507 -                /*
  24.508 -                ** we get here if:
  24.509 -                **  1. page is marked to_send & hasn't already been re-dirtied
  24.510 -                **  2. (ignore to_skip in last iteration)
  24.511 -                */
  24.512 -
  24.513 -                pfn_batch[batch] = n;
  24.514 -
  24.515 -                batch++;
  24.516 -            }
  24.517 -
  24.518 -            if ( batch == 0 )
  24.519 -                goto skip; /* vanishingly unlikely... */
  24.520 -
  24.521 -            region_base = xc_map_foreign_batch(
  24.522 -                xc_handle, dom, PROT_READ, pfn_batch, batch);
  24.523 -            if ( region_base == 0 )
  24.524 -            {
  24.525 -                ERROR("map batch failed");
  24.526 -                goto out;
  24.527 -            }
  24.528 -
  24.529 -            /* write num of pfns */
  24.530 -            if ( !write_exact(io_fd, &batch, sizeof(unsigned int)) )
  24.531 -            {
  24.532 -                ERROR("Error when writing to state file (2)");
  24.533 -                goto out;
  24.534 -            }
  24.535 -
  24.536 -            /* write all the pfns */
  24.537 -            if ( !write_exact(io_fd, pfn_batch, sizeof(unsigned long)*batch) )
  24.538 -            {
  24.539 -                ERROR("Error when writing to state file (3)");
  24.540 -                goto out;
  24.541 -            }
  24.542 -
  24.543 -            for ( j = 0; j < batch; j++ )
  24.544 -            {
  24.545 -                if ( pfn_batch[j] & XEN_DOMCTL_PFINFO_LTAB_MASK )
  24.546 -                    continue;
  24.547 -                if ( ratewrite(io_fd, region_base + j*PAGE_SIZE,
  24.548 -                               PAGE_SIZE) != PAGE_SIZE )
  24.549 -                {
  24.550 -                    ERROR("ERROR when writing to state file (4)");
  24.551 -                    goto out;
  24.552 -                }
  24.553 -            }
  24.554 -
  24.555 -            sent_this_iter += batch;
  24.556 -
  24.557 -            munmap(region_base, batch*PAGE_SIZE);
  24.558 -
  24.559 -        } /* end of this while loop for this iteration */
  24.560 -
  24.561 -      skip:
  24.562 -
  24.563 -        total_sent += sent_this_iter;
  24.564 -
  24.565 -        DPRINTF("\r %d: sent %d, skipped %d, ",
  24.566 -                iter, sent_this_iter, skip_this_iter );
  24.567 -
  24.568 -        if ( last_iter )
  24.569 -        {
  24.570 -            print_stats( xc_handle, dom, sent_this_iter, &stats, 1);
  24.571 -            DPRINTF("Total pages sent= %ld (%.2fx)\n",
  24.572 -                    total_sent, ((float)total_sent)/pfn_array_size );
  24.573 -        }
  24.574 -
  24.575 -        if ( last_iter && debug )
  24.576 -        {
  24.577 -            int minusone = -1;
  24.578 -            memset(to_send, 0xff, BITMAP_SIZE);
  24.579 -            debug = 0;
  24.580 -            DPRINTF("Entering debug resend-all mode\n");
  24.581 -
  24.582 -            /* send "-1" to put receiver into debug mode */
  24.583 -            if ( !write_exact(io_fd, &minusone, sizeof(int)) )
  24.584 -            {
  24.585 -                ERROR("Error when writing to state file (6)");
  24.586 -                goto out;
  24.587 -            }
  24.588 -
  24.589 -            continue;
  24.590 -        }
  24.591 -
  24.592 -        if ( last_iter )
  24.593 -            break;
  24.594 -
  24.595 -        if ( live )
  24.596 -        {
  24.597 -            if ( ((sent_this_iter > sent_last_iter) && RATE_IS_MAX()) ||
  24.598 -                 (iter >= max_iters) ||
  24.599 -                 (sent_this_iter+skip_this_iter < 50) ||
  24.600 -                 (total_sent > pfn_array_size*max_factor) )
  24.601 -            {
  24.602 -                DPRINTF("Start last iteration for HVM domain\n");
  24.603 -                last_iter = 1;
  24.604 -
  24.605 -                if ( suspend_and_state(suspend, xc_handle, io_fd, dom, &info,
  24.606 -                                       &ctxt))
  24.607 -                {
  24.608 -                    ERROR("Domain appears not to have suspended");
  24.609 -                    goto out;
  24.610 -                }
  24.611 -
  24.612 -                DPRINTF("SUSPEND eip %08lx edx %08lx\n",
  24.613 -                        (unsigned long)ctxt.user_regs.eip,
  24.614 -                        (unsigned long)ctxt.user_regs.edx);
  24.615 -            }
  24.616 -
  24.617 -            if ( xc_shadow_control(xc_handle, dom, 
  24.618 -                                   XEN_DOMCTL_SHADOW_OP_CLEAN, to_send, 
  24.619 -                                   pfn_array_size, NULL, 
  24.620 -                                   0, &stats) != pfn_array_size )
  24.621 -            {
  24.622 -                ERROR("Error flushing shadow PT");
  24.623 -                goto out;
  24.624 -            }
  24.625 -
  24.626 -            /* Pull in the dirty bits from qemu too */
  24.627 -            if ( !last_iter )
  24.628 -            {
  24.629 -                qemu_active = qemu_non_active;
  24.630 -                qemu_non_active = qemu_active ? 0 : 1;
  24.631 -                qemu_flip_buffer(dom, qemu_active);
  24.632 -                for ( j = 0; j < BITMAP_SIZE / sizeof(unsigned long); j++ )
  24.633 -                {
  24.634 -                    to_send[j] |= qemu_bitmaps[qemu_non_active][j];
  24.635 -                    qemu_bitmaps[qemu_non_active][j] = 0;
  24.636 -                }
  24.637 -            }
  24.638 -            else
  24.639 -            {
  24.640 -                for ( j = 0; j < BITMAP_SIZE / sizeof(unsigned long); j++ )
  24.641 -                    to_send[j] |= qemu_bitmaps[qemu_active][j];
  24.642 -            }
  24.643 -
  24.644 -            sent_last_iter = sent_this_iter;
  24.645 -
  24.646 -            print_stats(xc_handle, dom, sent_this_iter, &stats, 1);
  24.647 -        }
  24.648 -    } /* end of while 1 */
  24.649 -
  24.650 -
  24.651 -    DPRINTF("All HVM memory is saved\n");
  24.652 -
  24.653 -    {
  24.654 -        struct {
  24.655 -            int minustwo;
  24.656 -            int max_vcpu_id;
  24.657 -            uint64_t vcpumap;
  24.658 -        } chunk = { -2, info.max_vcpu_id };
  24.659 -
  24.660 -        if (info.max_vcpu_id >= 64) {
  24.661 -            ERROR("Too many VCPUS in guest!");
  24.662 -            goto out;
  24.663 -        }
  24.664 -
  24.665 -        for (i = 1; i <= info.max_vcpu_id; i++) {
  24.666 -            xc_vcpuinfo_t vinfo;
  24.667 -            if ((xc_vcpu_getinfo(xc_handle, dom, i, &vinfo) == 0) &&
  24.668 -                vinfo.online)
  24.669 -                vcpumap |= 1ULL << i;
  24.670 -        }
  24.671 -
  24.672 -        chunk.vcpumap = vcpumap;
  24.673 -        if(!write_exact(io_fd, &chunk, sizeof(chunk))) {
  24.674 -            ERROR("Error when writing to state file (errno %d)", errno);
  24.675 -            goto out;
  24.676 -        }
  24.677 -    }
  24.678 -
  24.679 -    /* Zero terminate */
  24.680 -    i = 0;
  24.681 -    if ( !write_exact(io_fd, &i, sizeof(int)) )
  24.682 -    {
  24.683 -        ERROR("Error when writing to state file (6)");
  24.684 -        goto out;
  24.685 -    }
  24.686 -
  24.687 -    /* Save magic-page locations. */
  24.688 -    memset(magic_pfns, 0, sizeof(magic_pfns));
  24.689 -    xc_get_hvm_param(xc_handle, dom, HVM_PARAM_IOREQ_PFN,
  24.690 -                     (unsigned long *)&magic_pfns[0]);
  24.691 -    xc_get_hvm_param(xc_handle, dom, HVM_PARAM_BUFIOREQ_PFN,
  24.692 -                     (unsigned long *)&magic_pfns[1]);
  24.693 -    xc_get_hvm_param(xc_handle, dom, HVM_PARAM_STORE_PFN,
  24.694 -                     (unsigned long *)&magic_pfns[2]);
  24.695 -    if ( !write_exact(io_fd, magic_pfns, sizeof(magic_pfns)) )
  24.696 -    {
  24.697 -        ERROR("Error when writing to state file (7)");
  24.698 -        goto out;
  24.699 -    }
  24.700 -
  24.701 -    /* save vcpu/vmcs contexts */
  24.702 -    for ( i = 0; i < nr_vcpus; i++ )
  24.703 -    {
  24.704 -        if ( !(vcpumap & (1ULL << i)) )
  24.705 -            continue;
  24.706 -
  24.707 -        if ( xc_vcpu_getcontext(xc_handle, dom, i, &ctxt) )
  24.708 -        {
  24.709 -            ERROR("HVM:Could not get vcpu context");
  24.710 -            goto out;
  24.711 -        }
  24.712 -
  24.713 -        DPRINTF("write vcpu %d context.\n", i); 
  24.714 -        if ( !write_exact(io_fd, &(ctxt), sizeof(ctxt)) )
  24.715 -        {
  24.716 -            ERROR("write vcpu context failed!\n");
  24.717 -            goto out;
  24.718 -        }
  24.719 -    }
  24.720 -
  24.721 -    if ( (rec_size = xc_domain_hvm_getcontext(xc_handle, dom, hvm_buf, 
  24.722 -                                              hvm_buf_size)) == -1 )
  24.723 -    {
  24.724 -        ERROR("HVM:Could not get hvm buffer");
  24.725 -        goto out;
  24.726 -    }
  24.727 -
  24.728 -    if ( !write_exact(io_fd, &rec_size, sizeof(uint32_t)) )
  24.729 -    {
  24.730 -        ERROR("error write hvm buffer size");
  24.731 -        goto out;
  24.732 -    }
  24.733 -
  24.734 -    if ( !write_exact(io_fd, hvm_buf, rec_size) )
  24.735 -    {
  24.736 -        ERROR("write HVM info failed!\n");
  24.737 -        goto out;
  24.738 -    }
  24.739 -
  24.740 -    /* Success! */
  24.741 -    rc = 0;
  24.742 -
  24.743 - out:
  24.744 -
  24.745 -    if ( live )
  24.746 -    {
  24.747 -        if ( xc_shadow_control(xc_handle, dom, XEN_DOMCTL_SHADOW_OP_OFF,
  24.748 -                               NULL, 0, NULL, 0, NULL) < 0 )
  24.749 -            DPRINTF("Warning - couldn't disable shadow mode");
  24.750 -    }
  24.751 -
  24.752 -    free(hvm_buf);
  24.753 -    free(pfn_batch);
  24.754 -    free(to_send);
  24.755 -    free(to_skip);
  24.756 -
  24.757 -    return !!rc;
  24.758 -}
    25.1 --- a/tools/libxc/xc_linux_save.c	Thu Apr 12 10:26:42 2007 -0600
    25.2 +++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
    25.3 @@ -1,1414 +0,0 @@
    25.4 -/******************************************************************************
    25.5 - * xc_linux_save.c
    25.6 - *
    25.7 - * Save the state of a running Linux session.
    25.8 - *
    25.9 - * Copyright (c) 2003, K A Fraser.
   25.10 - */
   25.11 -
   25.12 -#include <inttypes.h>
   25.13 -#include <time.h>
   25.14 -#include <stdlib.h>
   25.15 -#include <unistd.h>
   25.16 -#include <sys/time.h>
   25.17 -
   25.18 -#include "xc_private.h"
   25.19 -#include "xc_dom.h"
   25.20 -#include "xg_private.h"
   25.21 -#include "xg_save_restore.h"
   25.22 -
   25.23 -/*
   25.24 -** Default values for important tuning parameters. Can override by passing
   25.25 -** non-zero replacement values to xc_linux_save().
   25.26 -**
   25.27 -** XXX SMH: should consider if want to be able to override MAX_MBIT_RATE too.
   25.28 -**
   25.29 -*/
   25.30 -#define DEF_MAX_ITERS   29   /* limit us to 30 times round loop   */
   25.31 -#define DEF_MAX_FACTOR   3   /* never send more than 3x p2m_size  */
   25.32 -
   25.33 -/* max mfn of the whole machine */
   25.34 -static unsigned long max_mfn;
   25.35 -
   25.36 -/* virtual starting address of the hypervisor */
   25.37 -static unsigned long hvirt_start;
   25.38 -
   25.39 -/* #levels of page tables used by the current guest */
   25.40 -static unsigned int pt_levels;
   25.41 -
   25.42 -/* number of pfns this guest has (i.e. number of entries in the P2M) */
   25.43 -static unsigned long p2m_size;
   25.44 -
   25.45 -/* Live mapping of the table mapping each PFN to its current MFN. */
   25.46 -static xen_pfn_t *live_p2m = NULL;
   25.47 -
   25.48 -/* Live mapping of system MFN to PFN table. */
   25.49 -static xen_pfn_t *live_m2p = NULL;
   25.50 -static unsigned long m2p_mfn0;
   25.51 -
   25.52 -/* grep fodder: machine_to_phys */
   25.53 -
   25.54 -#define mfn_to_pfn(_mfn) live_m2p[(_mfn)]
   25.55 -
   25.56 -/*
   25.57 - * Returns TRUE if the given machine frame number has a unique mapping
   25.58 - * in the guest's pseudophysical map.
   25.59 - */
   25.60 -#define MFN_IS_IN_PSEUDOPHYS_MAP(_mfn)          \
   25.61 -    (((_mfn) < (max_mfn)) &&                    \
   25.62 -     ((mfn_to_pfn(_mfn) < (p2m_size)) &&        \
   25.63 -      (live_p2m[mfn_to_pfn(_mfn)] == (_mfn))))
   25.64 -
   25.65 -/* Returns TRUE if MFN is successfully converted to a PFN. */
   25.66 -#define translate_mfn_to_pfn(_pmfn)                             \
   25.67 -({                                                              \
   25.68 -    unsigned long mfn = *(_pmfn);                               \
   25.69 -    int _res = 1;                                               \
   25.70 -    if ( !MFN_IS_IN_PSEUDOPHYS_MAP(mfn) )                       \
   25.71 -        _res = 0;                                               \
   25.72 -    else                                                        \
   25.73 -        *(_pmfn) = mfn_to_pfn(mfn);                             \
   25.74 -    _res;                                                       \
   25.75 -})
   25.76 -
   25.77 -/*
   25.78 -** During (live) save/migrate, we maintain a number of bitmaps to track
   25.79 -** which pages we have to send, to fixup, and to skip.
   25.80 -*/
   25.81 -
   25.82 -#define BITS_PER_LONG (sizeof(unsigned long) * 8)
   25.83 -#define BITMAP_SIZE   ((p2m_size + BITS_PER_LONG - 1) / 8)
   25.84 -
   25.85 -#define BITMAP_ENTRY(_nr,_bmap) \
   25.86 -   ((volatile unsigned long *)(_bmap))[(_nr)/BITS_PER_LONG]
   25.87 -
   25.88 -#define BITMAP_SHIFT(_nr) ((_nr) % BITS_PER_LONG)
   25.89 -
   25.90 -static inline int test_bit (int nr, volatile void * addr)
   25.91 -{
   25.92 -    return (BITMAP_ENTRY(nr, addr) >> BITMAP_SHIFT(nr)) & 1;
   25.93 -}
   25.94 -
   25.95 -static inline void clear_bit (int nr, volatile void * addr)
   25.96 -{
   25.97 -    BITMAP_ENTRY(nr, addr) &= ~(1UL << BITMAP_SHIFT(nr));
   25.98 -}
   25.99 -
  25.100 -static inline void set_bit ( int nr, volatile void * addr)
  25.101 -{
  25.102 -    BITMAP_ENTRY(nr, addr) |= (1UL << BITMAP_SHIFT(nr));
  25.103 -}
  25.104 -
  25.105 -/* Returns the hamming weight (i.e. the number of bits set) in a N-bit word */
  25.106 -static inline unsigned int hweight32(unsigned int w)
  25.107 -{
  25.108 -    unsigned int res = (w & 0x55555555) + ((w >> 1) & 0x55555555);
  25.109 -    res = (res & 0x33333333) + ((res >> 2) & 0x33333333);
  25.110 -    res = (res & 0x0F0F0F0F) + ((res >> 4) & 0x0F0F0F0F);
  25.111 -    res = (res & 0x00FF00FF) + ((res >> 8) & 0x00FF00FF);
  25.112 -    return (res & 0x0000FFFF) + ((res >> 16) & 0x0000FFFF);
  25.113 -}
  25.114 -
  25.115 -static inline int count_bits ( int nr, volatile void *addr)
  25.116 -{
  25.117 -    int i, count = 0;
  25.118 -    volatile unsigned long *p = (volatile unsigned long *)addr;
  25.119 -    /* We know that the array is padded to unsigned long. */
  25.120 -    for ( i = 0; i < (nr / (sizeof(unsigned long)*8)); i++, p++ )
  25.121 -        count += hweight32(*p);
  25.122 -    return count;
  25.123 -}
  25.124 -
  25.125 -static inline int permute( int i, int nr, int order_nr  )
  25.126 -{
  25.127 -    /* Need a simple permutation function so that we scan pages in a
  25.128 -       pseudo random order, enabling us to get a better estimate of
  25.129 -       the domain's page dirtying rate as we go (there are often
  25.130 -       contiguous ranges of pfns that have similar behaviour, and we
  25.131 -       want to mix them up. */
  25.132 -
  25.133 -    /* e.g. nr->oder 15->4 16->4 17->5 */
  25.134 -    /* 512MB domain, 128k pages, order 17 */
  25.135 -
  25.136 -    /*
  25.137 -      QPONMLKJIHGFEDCBA
  25.138 -             QPONMLKJIH
  25.139 -      GFEDCBA
  25.140 -     */
  25.141 -
  25.142 -    /*
  25.143 -      QPONMLKJIHGFEDCBA
  25.144 -                  EDCBA
  25.145 -             QPONM
  25.146 -      LKJIHGF
  25.147 -      */
  25.148 -
  25.149 -    do { i = ((i>>(order_nr-10)) | ( i<<10 ) ) & ((1<<order_nr)-1); }
  25.150 -    while ( i >= nr ); /* this won't ever loop if nr is a power of 2 */
  25.151 -
  25.152 -    return i;
  25.153 -}
  25.154 -
  25.155 -static uint64_t tv_to_us(struct timeval *new)
  25.156 -{
  25.157 -    return (new->tv_sec * 1000000) + new->tv_usec;
  25.158 -}
  25.159 -
  25.160 -static uint64_t llgettimeofday(void)
  25.161 -{
  25.162 -    struct timeval now;
  25.163 -    gettimeofday(&now, NULL);
  25.164 -    return tv_to_us(&now);
  25.165 -}
  25.166 -
  25.167 -static uint64_t tv_delta(struct timeval *new, struct timeval *old)
  25.168 -{
  25.169 -    return (((new->tv_sec - old->tv_sec)*1000000) +
  25.170 -            (new->tv_usec - old->tv_usec));
  25.171 -}
  25.172 -
  25.173 -static int noncached_write(int fd, int live, void *buffer, int len) 
  25.174 -{
  25.175 -    static int write_count = 0;
  25.176 -
  25.177 -    int rc = write(fd,buffer,len);
  25.178 -
  25.179 -    write_count += len;
  25.180 -    if ( write_count >= (MAX_PAGECACHE_USAGE * PAGE_SIZE) )
  25.181 -    {
  25.182 -        /* Time to discard cache - dont care if this fails */
  25.183 -        discard_file_cache(fd, 0 /* no flush */);
  25.184 -        write_count = 0;
  25.185 -    }
  25.186 -
  25.187 -    return rc;
  25.188 -}
  25.189 -
  25.190 -#ifdef ADAPTIVE_SAVE
  25.191 -
  25.192 -/*
  25.193 -** We control the rate at which we transmit (or save) to minimize impact
  25.194 -** on running domains (including the target if we're doing live migrate).
  25.195 -*/
  25.196 -
  25.197 -#define MAX_MBIT_RATE    500      /* maximum transmit rate for migrate */
  25.198 -#define START_MBIT_RATE  100      /* initial transmit rate for migrate */
  25.199 -
  25.200 -/* Scaling factor to convert between a rate (in Mb/s) and time (in usecs) */
  25.201 -#define RATE_TO_BTU      781250
  25.202 -
  25.203 -/* Amount in bytes we allow ourselves to send in a burst */
  25.204 -#define BURST_BUDGET (100*1024)
  25.205 -
  25.206 -/* We keep track of the current and previous transmission rate */
  25.207 -static int mbit_rate, ombit_rate = 0;
  25.208 -
  25.209 -/* Have we reached the maximum transmission rate? */
  25.210 -#define RATE_IS_MAX() (mbit_rate == MAX_MBIT_RATE)
  25.211 -
  25.212 -static inline void initialize_mbit_rate()
  25.213 -{
  25.214 -    mbit_rate = START_MBIT_RATE;
  25.215 -}
  25.216 -
  25.217 -static int ratewrite(int io_fd, int live, void *buf, int n)
  25.218 -{
  25.219 -    static int budget = 0;
  25.220 -    static int burst_time_us = -1;
  25.221 -    static struct timeval last_put = { 0 };
  25.222 -    struct timeval now;
  25.223 -    struct timespec delay;
  25.224 -    long long delta;
  25.225 -
  25.226 -    if ( START_MBIT_RATE == 0 )
  25.227 -        return noncached_write(io_fd, live, buf, n);
  25.228 -
  25.229 -    budget -= n;
  25.230 -    if ( budget < 0 )
  25.231 -    {
  25.232 -        if ( mbit_rate != ombit_rate )
  25.233 -        {
  25.234 -            burst_time_us = RATE_TO_BTU / mbit_rate;
  25.235 -            ombit_rate = mbit_rate;
  25.236 -            DPRINTF("rate limit: %d mbit/s burst budget %d slot time %d\n",
  25.237 -                    mbit_rate, BURST_BUDGET, burst_time_us);
  25.238 -        }
  25.239 -        if ( last_put.tv_sec == 0 )
  25.240 -        {
  25.241 -            budget += BURST_BUDGET;
  25.242 -            gettimeofday(&last_put, NULL);
  25.243 -        }
  25.244 -        else
  25.245 -        {
  25.246 -            while ( budget < 0 )
  25.247 -            {
  25.248 -                gettimeofday(&now, NULL);
  25.249 -                delta = tv_delta(&now, &last_put);
  25.250 -                while ( delta > burst_time_us )
  25.251 -                {
  25.252 -                    budget += BURST_BUDGET;
  25.253 -                    last_put.tv_usec += burst_time_us;
  25.254 -                    if ( last_put.tv_usec > 1000000 
  25.255 -                    {
  25.256 -                        last_put.tv_usec -= 1000000;
  25.257 -                        last_put.tv_sec++;
  25.258 -                    }
  25.259 -                    delta -= burst_time_us;
  25.260 -                }
  25.261 -                if ( budget > 0 )
  25.262 -                    break;
  25.263 -                delay.tv_sec = 0;
  25.264 -                delay.tv_nsec = 1000 * (burst_time_us - delta);
  25.265 -                while ( delay.tv_nsec > 0 )
  25.266 -                    if ( nanosleep(&delay, &delay) == 0 )
  25.267 -                        break;
  25.268 -            }
  25.269 -        }
  25.270 -    }
  25.271 -    return noncached_write(io_fd, live, buf, n);
  25.272 -}
  25.273 -
  25.274 -#else /* ! ADAPTIVE SAVE */
  25.275 -
  25.276 -#define RATE_IS_MAX() (0)
  25.277 -#define ratewrite(_io_fd, _live, _buf, _n) noncached_write((_io_fd), (_live), (_buf), (_n))
  25.278 -#define initialize_mbit_rate()
  25.279 -
  25.280 -#endif
  25.281 -
  25.282 -static inline ssize_t write_exact(int fd, void *buf, size_t count)
  25.283 -{
  25.284 -    return (write(fd, buf, count) == count);
  25.285 -}
  25.286 -
  25.287 -static int print_stats(int xc_handle, uint32_t domid, int pages_sent,
  25.288 -                       xc_shadow_op_stats_t *stats, int print)
  25.289 -{
  25.290 -    static struct timeval wall_last;
  25.291 -    static long long      d0_cpu_last;
  25.292 -    static long long      d1_cpu_last;
  25.293 -
  25.294 -    struct timeval        wall_now;
  25.295 -    long long             wall_delta;
  25.296 -    long long             d0_cpu_now, d0_cpu_delta;
  25.297 -    long long             d1_cpu_now, d1_cpu_delta;
  25.298 -
  25.299 -    gettimeofday(&wall_now, NULL);
  25.300 -
  25.301 -    d0_cpu_now = xc_domain_get_cpu_usage(xc_handle, 0, /* FIXME */ 0)/1000;
  25.302 -    d1_cpu_now = xc_domain_get_cpu_usage(xc_handle, domid, /* FIXME */ 0)/1000;
  25.303 -
  25.304 -    if ( (d0_cpu_now == -1) || (d1_cpu_now == -1) )
  25.305 -        DPRINTF("ARRHHH!!\n");
  25.306 -
  25.307 -    wall_delta = tv_delta(&wall_now,&wall_last)/1000;
  25.308 -    if ( wall_delta == 0 )
  25.309 -        wall_delta = 1;
  25.310 -
  25.311 -    d0_cpu_delta = (d0_cpu_now - d0_cpu_last)/1000;
  25.312 -    d1_cpu_delta = (d1_cpu_now - d1_cpu_last)/1000;
  25.313 -
  25.314 -    if ( print )
  25.315 -        DPRINTF("delta %lldms, dom0 %d%%, target %d%%, sent %dMb/s, "
  25.316 -                "dirtied %dMb/s %" PRId32 " pages\n",
  25.317 -                wall_delta,
  25.318 -                (int)((d0_cpu_delta*100)/wall_delta),
  25.319 -                (int)((d1_cpu_delta*100)/wall_delta),
  25.320 -                (int)((pages_sent*PAGE_SIZE)/(wall_delta*(1000/8))),
  25.321 -                (int)((stats->dirty_count*PAGE_SIZE)/(wall_delta*(1000/8))),
  25.322 -                stats->dirty_count);
  25.323 -
  25.324 -#ifdef ADAPTIVE_SAVE
  25.325 -    if ( ((stats->dirty_count*PAGE_SIZE)/(wall_delta*(1000/8))) > mbit_rate )
  25.326 -    {
  25.327 -        mbit_rate = (int)((stats->dirty_count*PAGE_SIZE)/(wall_delta*(1000/8)))
  25.328 -            + 50;
  25.329 -        if ( mbit_rate > MAX_MBIT_RATE )
  25.330 -            mbit_rate = MAX_MBIT_RATE;
  25.331 -    }
  25.332 -#endif
  25.333 -
  25.334 -    d0_cpu_last = d0_cpu_now;
  25.335 -    d1_cpu_last = d1_cpu_now;
  25.336 -    wall_last   = wall_now;
  25.337 -
  25.338 -    return 0;
  25.339 -}
  25.340 -
  25.341 -
  25.342 -static int analysis_phase(int xc_handle, uint32_t domid, int p2m_size,
  25.343 -                          unsigned long *arr, int runs)
  25.344 -{
  25.345 -    long long start, now;
  25.346 -    xc_shadow_op_stats_t stats;
  25.347 -    int j;
  25.348 -
  25.349 -    start = llgettimeofday();
  25.350 -
  25.351 -    for ( j = 0; j < runs; j++ )
  25.352 -    {
  25.353 -        int i;
  25.354 -
  25.355 -        xc_shadow_control(xc_handle, domid, XEN_DOMCTL_SHADOW_OP_CLEAN,
  25.356 -                          arr, p2m_size, NULL, 0, NULL);
  25.357 -        DPRINTF("#Flush\n");
  25.358 -        for ( i = 0; i < 40; i++ )
  25.359 -        {
  25.360 -            usleep(50000);
  25.361 -            now = llgettimeofday();
  25.362 -            xc_shadow_control(xc_handle, domid, XEN_DOMCTL_SHADOW_OP_PEEK,
  25.363 -                              NULL, 0, NULL, 0, &stats);
  25.364 -            DPRINTF("now= %lld faults= %"PRId32" dirty= %"PRId32"\n",
  25.365 -                    ((now-start)+500)/1000,
  25.366 -                    stats.fault_count, stats.dirty_count);
  25.367 -        }
  25.368 -    }
  25.369 -
  25.370 -    return -1;
  25.371 -}
  25.372 -
  25.373 -
  25.374 -static int suspend_and_state(int (*suspend)(int), int xc_handle, int io_fd,
  25.375 -                             int dom, xc_dominfo_t *info,
  25.376 -                             vcpu_guest_context_t *ctxt)
  25.377 -{
  25.378 -    int i = 0;
  25.379 -
  25.380 -    if ( !(*suspend)(dom) )
  25.381 -    {
  25.382 -        ERROR("Suspend request failed");
  25.383 -        return -1;
  25.384 -    }
  25.385 -
  25.386 - retry:
  25.387 -
  25.388 -    if ( xc_domain_getinfo(xc_handle, dom, 1, info) != 1 )
  25.389 -    {
  25.390 -        ERROR("Could not get domain info");
  25.391 -        return -1;
  25.392 -    }
  25.393 -
  25.394 -    if ( xc_vcpu_getcontext(xc_handle, dom, 0, ctxt) )
  25.395 -        ERROR("Could not get vcpu context");
  25.396 -
  25.397 -
  25.398 -    if ( info->dying )
  25.399 -    {
  25.400 -        ERROR("domain is dying");
  25.401 -        return -1;
  25.402 -    }
  25.403 -
  25.404 -    if ( info->crashed )
  25.405 -    {
  25.406 -        ERROR("domain has crashed");
  25.407 -        return -1;
  25.408 -    }
  25.409 -
  25.410 -    if ( info->shutdown )
  25.411 -    {
  25.412 -        switch ( info->shutdown_reason )
  25.413 -        {
  25.414 -        case SHUTDOWN_poweroff:
  25.415 -        case SHUTDOWN_reboot:
  25.416 -            ERROR("domain has shut down");
  25.417 -            return -1;
  25.418 -        case SHUTDOWN_suspend:
  25.419 -            return 0;
  25.420 -        case SHUTDOWN_crash:
  25.421 -            ERROR("domain has crashed");
  25.422 -            return -1;
  25.423 -        }
  25.424 -    }
  25.425 -
  25.426 -    if ( info->paused )
  25.427 -    {
  25.428 -        /* Try unpausing domain, wait, and retest. */
  25.429 -        xc_domain_unpause( xc_handle, dom );
  25.430 -        ERROR("Domain was paused. Wait and re-test.");
  25.431 -        usleep(10000); /* 10ms */
  25.432 -        goto retry;
  25.433 -    }
  25.434 -
  25.435 -    if ( ++i < 100 )
  25.436 -    {
  25.437 -        ERROR("Retry suspend domain");
  25.438 -        usleep(10000); /* 10ms */
  25.439 -        goto retry;
  25.440 -    }
  25.441 -
  25.442 -    ERROR("Unable to suspend domain.");
  25.443 -
  25.444 -    return -1;
  25.445 -}
  25.446 -
  25.447 -/*
  25.448 -** Map the top-level page of MFNs from the guest. The guest might not have
  25.449 -** finished resuming from a previous restore operation, so we wait a while for
  25.450 -** it to update the MFN to a reasonable value.
  25.451 -*/
  25.452 -static void *map_frame_list_list(int xc_handle, uint32_t dom,
  25.453 -                                 shared_info_t *shinfo)
  25.454 -{
  25.455 -    int count = 100;
  25.456 -    void *p;
  25.457 -
  25.458 -    while ( count-- && (shinfo->arch.pfn_to_mfn_frame_list_list == 0) )
  25.459 -        usleep(10000);
  25.460 -
  25.461 -    if ( shinfo->arch.pfn_to_mfn_frame_list_list == 0 )
  25.462 -    {
  25.463 -        ERROR("Timed out waiting for frame list updated.");
  25.464 -        return NULL;
  25.465 -    }
  25.466 -
  25.467 -    p = xc_map_foreign_range(xc_handle, dom, PAGE_SIZE, PROT_READ,
  25.468 -                             shinfo->arch.pfn_to_mfn_frame_list_list);
  25.469 -    if ( p == NULL )
  25.470 -        ERROR("Couldn't map p2m_frame_list_list (errno %d)", errno);
  25.471 -
  25.472 -    return p;
  25.473 -}
  25.474 -
  25.475 -/*
  25.476 -** During transfer (or in the state file), all page-table pages must be
  25.477 -** converted into a 'canonical' form where references to actual mfns
  25.478 -** are replaced with references to the corresponding pfns.
  25.479 -**
  25.480 -** This function performs the appropriate conversion, taking into account
  25.481 -** which entries do not require canonicalization (in particular, those
  25.482 -** entries which map the virtual address reserved for the hypervisor).
  25.483 -*/
  25.484 -static int canonicalize_pagetable(unsigned long type, unsigned long pfn,
  25.485 -                           const void *spage, void *dpage)
  25.486 -{
  25.487 -
  25.488 -    int i, pte_last, xen_start, xen_end, race = 0; 
  25.489 -    uint64_t pte;
  25.490 -
  25.491 -    /*
  25.492 -    ** We need to determine which entries in this page table hold
  25.493 -    ** reserved hypervisor mappings. This depends on the current
  25.494 -    ** page table type as well as the number of paging levels.
  25.495 -    */
  25.496 -    xen_start = xen_end = pte_last = PAGE_SIZE / ((pt_levels == 2) ? 4 : 8);
  25.497 -
  25.498 -    if ( (pt_levels == 2) && (type == XEN_DOMCTL_PFINFO_L2TAB) )
  25.499 -        xen_start = (hvirt_start >> L2_PAGETABLE_SHIFT);
  25.500 -
  25.501 -    if ( (pt_levels == 3) && (type == XEN_DOMCTL_PFINFO_L3TAB) )
  25.502 -        xen_start = L3_PAGETABLE_ENTRIES_PAE;
  25.503 -
  25.504 -    /*
  25.505 -    ** in PAE only the L2 mapping the top 1GB contains Xen mappings.
  25.506 -    ** We can spot this by looking for the guest linear mapping which
  25.507 -    ** Xen always ensures is present in that L2. Guests must ensure
  25.508 -    ** that this check will fail for other L2s.
  25.509 -    */
  25.510 -    if ( (pt_levels == 3) && (type == XEN_DOMCTL_PFINFO_L2TAB) )
  25.511 -    {
  25.512 -        int hstart;
  25.513 -        uint64_t he;
  25.514 -
  25.515 -        hstart = (hvirt_start >> L2_PAGETABLE_SHIFT_PAE) & 0x1ff;
  25.516 -        he = ((const uint64_t *) spage)[hstart];
  25.517 -
  25.518 -        if ( ((he >> PAGE_SHIFT) & MFN_MASK_X86) == m2p_mfn0 )
  25.519 -        {
  25.520 -            /* hvirt starts with xen stuff... */
  25.521 -            xen_start = hstart;
  25.522 -        }
  25.523 -        else if ( hvirt_start != 0xf5800000 )
  25.524 -        {
  25.525 -            /* old L2s from before hole was shrunk... */
  25.526 -            hstart = (0xf5800000 >> L2_PAGETABLE_SHIFT_PAE) & 0x1ff;
  25.527 -            he = ((const uint64_t *) spage)[hstart];
  25.528 -            if ( ((he >> PAGE_SHIFT) & MFN_MASK_X86) == m2p_mfn0 )
  25.529 -                xen_start = hstart;
  25.530 -        }
  25.531 -    }
  25.532 -
  25.533 -    if ( (pt_levels == 4) && (type == XEN_DOMCTL_PFINFO_L4TAB) )
  25.534 -    {
  25.535 -        /*
  25.536 -        ** XXX SMH: should compute these from hvirt_start (which we have)
  25.537 -        ** and hvirt_end (which we don't)
  25.538 -        */
  25.539 -        xen_start = 256;
  25.540 -        xen_end   = 272;
  25.541 -    }
  25.542 -
  25.543 -    /* Now iterate through the page table, canonicalizing each PTE */
  25.544 -    for (i = 0; i < pte_last; i++ )
  25.545 -    {
  25.546 -        unsigned long pfn, mfn;
  25.547 -
  25.548 -        if ( pt_levels == 2 )
  25.549 -            pte = ((const uint32_t*)spage)[i];
  25.550 -        else
  25.551 -            pte = ((const uint64_t*)spage)[i];
  25.552 -
  25.553 -        if ( (i >= xen_start) && (i < xen_end) )
  25.554 -            pte = 0;
  25.555 -
  25.556 -        if ( pte & _PAGE_PRESENT )
  25.557 -        {
  25.558 -            mfn = (pte >> PAGE_SHIFT) & MFN_MASK_X86;
  25.559 -            if ( !MFN_IS_IN_PSEUDOPHYS_MAP(mfn) )
  25.560 -            {
  25.561 -                /* This will happen if the type info is stale which
  25.562 -                   is quite feasible under live migration */
  25.563 -                pfn  = 0;  /* zap it - we'll retransmit this page later */
  25.564 -                race = 1;  /* inform the caller of race; fatal if !live */ 
  25.565 -            }
  25.566 -            else
  25.567 -                pfn = mfn_to_pfn(mfn);
  25.568 -
  25.569 -            pte &= ~MADDR_MASK_X86;
  25.570 -            pte |= (uint64_t)pfn << PAGE_SHIFT;
  25.571 -
  25.572 -            /*
  25.573 -             * PAE guest L3Es can contain these flags when running on
  25.574 -             * a 64bit hypervisor. We zap these here to avoid any
  25.575 -             * surprise at restore time...
  25.576 -             */
  25.577 -            if ( (pt_levels == 3) &&
  25.578 -                 (type == XEN_DOMCTL_PFINFO_L3TAB) &&
  25.579 -                 (pte & (_PAGE_USER|_PAGE_RW|_PAGE_ACCESSED)) )
  25.580 -                pte &= ~(_PAGE_USER|_PAGE_RW|_PAGE_ACCESSED);
  25.581 -        }
  25.582 -
  25.583 -        if ( pt_levels == 2 )
  25.584 -            ((uint32_t*)dpage)[i] = pte;
  25.585 -        else
  25.586 -            ((uint64_t*)dpage)[i] = pte;
  25.587 -    }
  25.588 -
  25.589 -    return race;
  25.590 -}
  25.591 -
  25.592 -static xen_pfn_t *xc_map_m2p(int xc_handle,
  25.593 -                                 unsigned long max_mfn,
  25.594 -                                 int prot)
  25.595 -{
  25.596 -    struct xen_machphys_mfn_list xmml;
  25.597 -    privcmd_mmap_entry_t *entries;
  25.598 -    unsigned long m2p_chunks, m2p_size;
  25.599 -    xen_pfn_t *m2p;
  25.600 -    xen_pfn_t *extent_start;
  25.601 -    int i, rc;
  25.602 -
  25.603 -    m2p_size   = M2P_SIZE(max_mfn);
  25.604 -    m2p_chunks = M2P_CHUNKS(max_mfn);
  25.605 -
  25.606 -    xmml.max_extents = m2p_chunks;
  25.607 -    if ( !(extent_start = malloc(m2p_chunks * sizeof(xen_pfn_t))) )
  25.608 -    {
  25.609 -        ERROR("failed to allocate space for m2p mfns");
  25.610 -        return NULL;
  25.611 -    }
  25.612 -    set_xen_guest_handle(xmml.extent_start, extent_start);
  25.613 -
  25.614 -    if ( xc_memory_op(xc_handle, XENMEM_machphys_mfn_list, &xmml) ||
  25.615 -         (xmml.nr_extents != m2p_chunks) )
  25.616 -    {
  25.617 -        ERROR("xc_get_m2p_mfns");
  25.618 -        return NULL;
  25.619 -    }
  25.620 -
  25.621 -    if ( (m2p = mmap(NULL, m2p_size, prot,
  25.622 -                     MAP_SHARED, xc_handle, 0)) == MAP_FAILED )
  25.623 -    {
  25.624 -        ERROR("failed to mmap m2p");
  25.625 -        return NULL;
  25.626 -    }
  25.627 -
  25.628 -    if ( !(entries = malloc(m2p_chunks * sizeof(privcmd_mmap_entry_t))) )
  25.629 -    {
  25.630 -        ERROR("failed to allocate space for mmap entries");
  25.631 -        return NULL;
  25.632 -    }
  25.633 -
  25.634 -    for ( i = 0; i < m2p_chunks; i++ )
  25.635 -    {
  25.636 -        entries[i].va = (unsigned long)(((void *)m2p) + (i * M2P_CHUNK_SIZE));
  25.637 -        entries[i].mfn = extent_start[i];
  25.638 -        entries[i].npages = M2P_CHUNK_SIZE >> PAGE_SHIFT;
  25.639 -    }
  25.640 -
  25.641 -    if ( (rc = xc_map_foreign_ranges(xc_handle, DOMID_XEN,
  25.642 -                                     entries, m2p_chunks)) < 0 )
  25.643 -    {
  25.644 -        ERROR("xc_mmap_foreign_ranges failed (rc = %d)", rc);
  25.645 -        return NULL;
  25.646 -    }
  25.647 -
  25.648 -    m2p_mfn0 = entries[0].mfn;
  25.649 -
  25.650 -    free(extent_start);
  25.651 -    free(entries);
  25.652 -
  25.653 -    return m2p;
  25.654 -}
  25.655 -
  25.656 -int xc_linux_save(int xc_handle, int io_fd, uint32_t dom, uint32_t max_iters,
  25.657 -                  uint32_t max_factor, uint32_t flags, int (*suspend)(int))
  25.658 -{
  25.659 -    xc_dominfo_t info;
  25.660 -
  25.661 -    int rc = 1, i, j, last_iter, iter = 0;
  25.662 -    int live  = (flags & XCFLAGS_LIVE);
  25.663 -    int debug = (flags & XCFLAGS_DEBUG);
  25.664 -    int race = 0, sent_last_iter, skip_this_iter;
  25.665 -
  25.666 -    /* The new domain's shared-info frame number. */
  25.667 -    unsigned long shared_info_frame;
  25.668 -
  25.669 -    /* A copy of the CPU context of the guest. */
  25.670 -    vcpu_guest_context_t ctxt;
  25.671 -
  25.672 -    /* A table containg the type of each PFN (/not/ MFN!). */
  25.673 -    unsigned long *pfn_type = NULL;
  25.674 -    unsigned long *pfn_batch = NULL;
  25.675 -
  25.676 -    /* A temporary mapping, and a copy, of one frame of guest memory. */
  25.677 -    char page[PAGE_SIZE];
  25.678 -
  25.679 -    /* Double and single indirect references to the live P2M table */
  25.680 -    xen_pfn_t *live_p2m_frame_list_list = NULL;
  25.681 -    xen_pfn_t *live_p2m_frame_list = NULL;
  25.682 -
  25.683 -    /* A copy of the pfn-to-mfn table frame list. */
  25.684 -    xen_pfn_t *p2m_frame_list = NULL;
  25.685 -
  25.686 -    /* Live mapping of shared info structure */
  25.687 -    shared_info_t *live_shinfo = NULL;
  25.688 -
  25.689 -    /* base of the region in which domain memory is mapped */
  25.690 -    unsigned char *region_base = NULL;
  25.691 -
  25.692 -    /* power of 2 order of p2m_size */
  25.693 -    int order_nr;
  25.694 -
  25.695 -    /* bitmap of pages:
  25.696 -       - that should be sent this iteration (unless later marked as skip);
  25.697 -       - to skip this iteration because already dirty;
  25.698 -       - to fixup by sending at the end if not already resent; */
  25.699 -    unsigned long *to_send = NULL, *to_skip = NULL, *to_fix = NULL;
  25.700 -
  25.701 -    xc_shadow_op_stats_t stats;
  25.702 -
  25.703 -    unsigned long needed_to_fix = 0;
  25.704 -    unsigned long total_sent    = 0;
  25.705 -
  25.706 -    uint64_t vcpumap = 1ULL;
  25.707 -
  25.708 -    /* If no explicit control parameters given, use defaults */
  25.709 -    max_iters  = max_iters  ? : DEF_MAX_ITERS;
  25.710 -    max_factor = max_factor ? : DEF_MAX_FACTOR;
  25.711 -
  25.712 -    initialize_mbit_rate();
  25.713 -
  25.714 -    if ( !get_platform_info(xc_handle, dom,
  25.715 -                            &max_mfn, &hvirt_start, &pt_levels) )
  25.716 -    {
  25.717 -        ERROR("Unable to get platform info.");
  25.718 -        return 1;
  25.719 -    }
  25.720 -
  25.721 -    if ( xc_domain_getinfo(xc_handle, dom, 1, &info) != 1 )
  25.722 -    {
  25.723 -        ERROR("Could not get domain info");
  25.724 -        return 1;
  25.725 -    }
  25.726 -
  25.727 -    if ( xc_vcpu_getcontext(xc_handle, dom, 0, &ctxt) )
  25.728 -    {
  25.729 -        ERROR("Could not get vcpu context");
  25.730 -        goto out;
  25.731 -    }
  25.732 -    shared_info_frame = info.shared_info_frame;
  25.733 -
  25.734 -    /* Map the shared info frame */
  25.735 -    if ( !(live_shinfo = xc_map_foreign_range(xc_handle, dom, PAGE_SIZE,
  25.736 -                                              PROT_READ, shared_info_frame)) )
  25.737 -    {
  25.738 -        ERROR("Couldn't map live_shinfo");
  25.739 -        goto out;
  25.740 -    }
  25.741 -
  25.742 -    p2m_size = live_shinfo->arch.max_pfn;
  25.743 -
  25.744 -    live_p2m_frame_list_list = map_frame_list_list(xc_handle, dom,
  25.745 -                                                   live_shinfo);
  25.746 -    if ( !live_p2m_frame_list_list )
  25.747 -        goto out;
  25.748 -
  25.749 -    live_p2m_frame_list =
  25.750 -        xc_map_foreign_batch(xc_handle, dom, PROT_READ,
  25.751 -                             live_p2m_frame_list_list,
  25.752 -                             P2M_FLL_ENTRIES);
  25.753 -    if ( !live_p2m_frame_list )
  25.754 -    {
  25.755 -        ERROR("Couldn't map p2m_frame_list");
  25.756 -        goto out;
  25.757 -    }
  25.758 -
  25.759 -    /* Map all the frames of the pfn->mfn table. For migrate to succeed,
  25.760 -       the guest must not change which frames are used for this purpose.
  25.761 -       (its not clear why it would want to change them, and we'll be OK
  25.762 -       from a safety POV anyhow. */
  25.763 -
  25.764 -    live_p2m = xc_map_foreign_batch(xc_handle, dom, PROT_READ,
  25.765 -                                    live_p2m_frame_list,
  25.766 -                                    P2M_FL_ENTRIES);
  25.767 -    if ( !live_p2m )
  25.768 -    {
  25.769 -        ERROR("Couldn't map p2m table");
  25.770 -        goto out;
  25.771 -    }
  25.772 -
  25.773 -    /* Setup the mfn_to_pfn table mapping */
  25.774 -    if ( !(live_m2p = xc_map_m2p(xc_handle, max_mfn, PROT_READ)) )
  25.775 -    {
  25.776 -        ERROR("Failed to map live M2P table");
  25.777 -        goto out;
  25.778 -    }
  25.779 -
  25.780 -
  25.781 -    /* Get a local copy of the live_P2M_frame_list */
  25.782 -    if ( !(p2m_frame_list = malloc(P2M_FL_SIZE)) )
  25.783 -    {
  25.784 -        ERROR("Couldn't allocate p2m_frame_list array");
  25.785 -        goto out;
  25.786 -    }
  25.787 -    memcpy(p2m_frame_list, live_p2m_frame_list, P2M_FL_SIZE);
  25.788 -
  25.789 -    /* Canonicalise the pfn-to-mfn table frame-number list. */
  25.790 -    for ( i = 0; i < p2m_size; i += fpp )
  25.791 -    {
  25.792 -        if ( !translate_mfn_to_pfn(&p2m_frame_list[i/fpp]) )
  25.793 -        {
  25.794 -            ERROR("Frame# in pfn-to-mfn frame list is not in pseudophys");
  25.795 -            ERROR("entry %d: p2m_frame_list[%ld] is 0x%"PRIx64, i, i/fpp,
  25.796 -                  (uint64_t)p2m_frame_list[i/fpp]);
  25.797 -            goto out;
  25.798 -        }
  25.799 -    }
  25.800 -
  25.801 -    /* Domain is still running at this point */
  25.802 -    if ( live )
  25.803 -    {
  25.804 -        /* Live suspend. Enable log-dirty mode. */
  25.805 -        if ( xc_shadow_control(xc_handle, dom,
  25.806 -                               XEN_DOMCTL_SHADOW_OP_ENABLE_LOGDIRTY,
  25.807 -                               NULL, 0, NULL, 0, NULL) < 0 )
  25.808 -        {
  25.809 -            ERROR("Couldn't enable shadow mode");
  25.810 -            goto out;
  25.811 -        }
  25.812 -    }
  25.813 -    else
  25.814 -    {
  25.815 -        /* This is a non-live suspend. Suspend the domain .*/
  25.816 -        if ( suspend_and_state(suspend, xc_handle, io_fd, dom, &info, &ctxt) )
  25.817 -        {
  25.818 -            ERROR("Domain appears not to have suspended");
  25.819 -            goto out;
  25.820 -        }
  25.821 -    }
  25.822 -
  25.823 -    last_iter = !live;
  25.824 -
  25.825 -    /* pretend we sent all the pages last iteration */
  25.826 -    sent_last_iter = p2m_size;
  25.827 -
  25.828 -    /* calculate the power of 2 order of p2m_size, e.g.
  25.829 -       15->4 16->4 17->5 */
  25.830 -    for ( i = p2m_size-1, order_nr = 0; i ; i >>= 1, order_nr++ )
  25.831 -        continue;
  25.832 -
  25.833 -    /* Setup to_send / to_fix and to_skip bitmaps */
  25.834 -    to_send = malloc(BITMAP_SIZE);
  25.835 -    to_fix  = calloc(1, BITMAP_SIZE);
  25.836 -    to_skip = malloc(BITMAP_SIZE);
  25.837 -
  25.838 -    if ( !to_send || !to_fix || !to_skip )
  25.839 -    {
  25.840 -        ERROR("Couldn't allocate to_send array");
  25.841 -        goto out;
  25.842 -    }
  25.843 -
  25.844 -    memset(to_send, 0xff, BITMAP_SIZE);
  25.845 -
  25.846 -    if ( lock_pages(to_send, BITMAP_SIZE) )
  25.847 -    {
  25.848 -        ERROR("Unable to lock to_send");
  25.849 -        return 1;
  25.850 -    }
  25.851 -
  25.852 -    /* (to fix is local only) */
  25.853 -    if ( lock_pages(to_skip, BITMAP_SIZE) )
  25.854 -    {
  25.855 -        ERROR("Unable to lock to_skip");
  25.856 -        return 1;
  25.857 -    }
  25.858 -
  25.859 -    analysis_phase(xc_handle, dom, p2m_size, to_skip, 0);
  25.860 -
  25.861 -    /* We want zeroed memory so use calloc rather than malloc. */
  25.862 -    pfn_type   = calloc(MAX_BATCH_SIZE, sizeof(*pfn_type));
  25.863 -    pfn_batch  = calloc(MAX_BATCH_SIZE, sizeof(*pfn_batch));
  25.864 -    if ( (pfn_type == NULL) || (pfn_batch == NULL) )
  25.865 -    {
  25.866 -        ERROR("failed to alloc memory for pfn_type and/or pfn_batch arrays");
  25.867 -        errno = ENOMEM;
  25.868 -        goto out;
  25.869 -    }
  25.870 -
  25.871 -    if ( lock_pages(pfn_type, MAX_BATCH_SIZE * sizeof(*pfn_type)) )
  25.872 -    {
  25.873 -        ERROR("Unable to lock");
  25.874 -        goto out;
  25.875 -    }
  25.876 -
  25.877 -    /*
  25.878 -     * Quick belt and braces sanity check.
  25.879 -     */
  25.880 -    {
  25.881 -        int err=0;
  25.882 -        unsigned long mfn;
  25.883 -        for ( i = 0; i < p2m_size; i++ )
  25.884 -        {
  25.885 -            mfn = live_p2m[i];
  25.886 -            if( (mfn != INVALID_P2M_ENTRY) && (mfn_to_pfn(mfn) != i) )
  25.887 -            {
  25.888 -                DPRINTF("i=0x%x mfn=%lx live_m2p=%lx\n", i,
  25.889 -                        mfn, mfn_to_pfn(mfn));
  25.890 -                err++;
  25.891 -            }
  25.892 -        }
  25.893 -        DPRINTF("Had %d unexplained entries in p2m table\n", err);
  25.894 -    }
  25.895 -
  25.896 -    /* Start writing out the saved-domain record. */
  25.897 -    if ( !write_exact(io_fd, &p2m_size, sizeof(unsigned long)) )
  25.898 -    {
  25.899 -        ERROR("write: p2m_size");
  25.900 -        goto out;
  25.901 -    }
  25.902 -
  25.903 -    /*
  25.904 -     * Write an extended-info structure to inform the restore code that
  25.905 -     * a PAE guest understands extended CR3 (PDPTs above 4GB). Turns off
  25.906 -     * slow paths in the restore code.
  25.907 -     */
  25.908 -    if ( (pt_levels == 3) &&
  25.909 -         (ctxt.vm_assist & (1UL << VMASST_TYPE_pae_extended_cr3)) )
  25.910 -    {
  25.911 -        unsigned long signature = ~0UL;
  25.912 -        uint32_t tot_sz   = sizeof(struct vcpu_guest_context) + 8;
  25.913 -        uint32_t chunk_sz = sizeof(struct vcpu_guest_context);
  25.914 -        char chunk_sig[]  = "vcpu";
  25.915 -        if ( !write_exact(io_fd, &signature, sizeof(signature)) ||
  25.916 -             !write_exact(io_fd, &tot_sz,    sizeof(tot_sz)) ||
  25.917 -             !write_exact(io_fd, &chunk_sig, 4) ||
  25.918 -             !write_exact(io_fd, &chunk_sz,  sizeof(chunk_sz)) ||
  25.919 -             !write_exact(io_fd, &ctxt,      sizeof(ctxt)) )
  25.920 -        {
  25.921 -            ERROR("write: extended info");
  25.922 -            goto out;
  25.923 -        }
  25.924 -    }
  25.925 -
  25.926 -    if ( !write_exact(io_fd, p2m_frame_list, P2M_FL_SIZE) )
  25.927 -    {
  25.928 -        ERROR("write: p2m_frame_list");
  25.929 -        goto out;
  25.930 -    }
  25.931 -
  25.932 -    print_stats(xc_handle, dom, 0, &stats, 0);
  25.933 -
  25.934 -    /* Now write out each data page, canonicalising page tables as we go... */
  25.935 -    for ( ; ; )
  25.936 -    {
  25.937 -        unsigned int prev_pc, sent_this_iter, N, batch;
  25.938 -
  25.939 -        iter++;
  25.940 -        sent_this_iter = 0;
  25.941 -        skip_this_iter = 0;
  25.942 -        prev_pc = 0;
  25.943 -        N = 0;
  25.944 -
  25.945 -        DPRINTF("Saving memory pages: iter %d   0%%", iter);
  25.946 -
  25.947 -        while ( N < p2m_size )
  25.948 -        {
  25.949 -            unsigned int this_pc = (N * 100) / p2m_size;
  25.950 -            int rc;
  25.951 -
  25.952 -            if ( (this_pc - prev_pc) >= 5 )
  25.953 -            {
  25.954 -                DPRINTF("\b\b\b\b%3d%%", this_pc);
  25.955 -                prev_pc = this_pc;
  25.956 -            }
  25.957 -
  25.958 -            if ( !last_iter )
  25.959 -            {
  25.960 -                /* Slightly wasteful to peek the whole array evey time,
  25.961 -                   but this is fast enough for the moment. */
  25.962 -                rc = xc_shadow_control(
  25.963 -                    xc_handle, dom, XEN_DOMCTL_SHADOW_OP_PEEK, to_skip, 
  25.964 -                    p2m_size, NULL, 0, NULL);
  25.965 -                if ( rc != p2m_size )
  25.966 -                {
  25.967 -                    ERROR("Error peeking shadow bitmap");
  25.968 -                    goto out;
  25.969 -                }
  25.970 -            }
  25.971 -
  25.972 -            /* load pfn_type[] with the mfn of all the pages we're doing in
  25.973 -               this batch. */
  25.974 -            for  ( batch = 0;
  25.975 -                   (batch < MAX_BATCH_SIZE) && (N < p2m_size);
  25.976 -                   N++ )
  25.977 -            {
  25.978 -                int n = permute(N, p2m_size, order_nr);
  25.979 -
  25.980 -                if ( debug )
  25.981 -                    DPRINTF("%d pfn= %08lx mfn= %08lx %d  [mfn]= %08lx\n",
  25.982 -                            iter, (unsigned long)n, live_p2m[n],
  25.983 -                            test_bit(n, to_send),
  25.984 -                            mfn_to_pfn(live_p2m[n]&0xFFFFF));
  25.985 -
  25.986 -                if ( !last_iter &&
  25.987 -                     test_bit(n, to_send) &&
  25.988 -                     test_bit(n, to_skip) )
  25.989 -                    skip_this_iter++; /* stats keeping */
  25.990 -
  25.991 -                if ( !((test_bit(n, to_send) && !test_bit(n, to_skip)) ||
  25.992 -                       (test_bit(n, to_send) && last_iter) ||
  25.993 -                       (test_bit(n, to_fix)  && last_iter)) )
  25.994 -                    continue;
  25.995 -
  25.996 -                /*
  25.997 -                ** we get here if:
  25.998 -                **  1. page is marked to_send & hasn't already been re-dirtied
  25.999 -                **  2. (ignore to_skip in last iteration)
 25.1000 -                **  3. add in pages that still need fixup (net bufs)
 25.1001 -                */
 25.1002 -
 25.1003 -                pfn_batch[batch] = n;
 25.1004 -                pfn_type[batch]  = live_p2m[n];
 25.1005 -
 25.1006 -                if ( !is_mapped(pfn_type[batch]) )
 25.1007 -                {
 25.1008 -                    /*
 25.1009 -                    ** not currently in psuedo-physical map -- set bit
 25.1010 -                    ** in to_fix since we must send this page in last_iter
 25.1011 -                    ** unless its sent sooner anyhow, or it never enters
 25.1012 -                    ** pseudo-physical map (e.g. for ballooned down domains)
 25.1013 -                    */
 25.1014 -                    set_bit(n, to_fix);
 25.1015 -                    continue;
 25.1016 -                }
 25.1017 -
 25.1018 -                if ( last_iter &&
 25.1019 -                     test_bit(n, to_fix) &&
 25.1020 -                     !test_bit(n, to_send) )
 25.1021 -                {
 25.1022 -                    needed_to_fix++;
 25.1023 -                    DPRINTF("Fix! iter %d, pfn %x. mfn %lx\n",
 25.1024 -                            iter, n, pfn_type[batch]);
 25.1025 -                }
 25.1026 -
 25.1027 -                clear_bit(n, to_fix);
 25.1028 -
 25.1029 -                batch++;
 25.1030 -            }
 25.1031 -
 25.1032 -            if ( batch == 0 )
 25.1033 -                goto skip; /* vanishingly unlikely... */
 25.1034 -
 25.1035 -            region_base = xc_map_foreign_batch(
 25.1036 -                xc_handle, dom, PROT_READ, pfn_type, batch);
 25.1037 -            if ( region_base == NULL )
 25.1038 -            {
 25.1039 -                ERROR("map batch failed");
 25.1040 -                goto out;
 25.1041 -            }
 25.1042 -
 25.1043 -            for ( j = 0; j < batch; j++ )
 25.1044 -                ((uint32_t *)pfn_type)[j] = pfn_type[j];
 25.1045 -            if ( xc_get_pfn_type_batch(xc_handle, dom, batch,
 25.1046 -                                       (uint32_t *)pfn_type) )
 25.1047 -            {
 25.1048 -                ERROR("get_pfn_type_batch failed");
 25.1049 -                goto out;
 25.1050 -            }
 25.1051 -            for ( j = batch-1; j >= 0; j-- )
 25.1052 -                pfn_type[j] = ((uint32_t *)pfn_type)[j];
 25.1053 -
 25.1054 -            for ( j = 0; j < batch; j++ )
 25.1055 -            {
 25.1056 -
 25.1057 -                if ( (pfn_type[j] & XEN_DOMCTL_PFINFO_LTAB_MASK) ==
 25.1058 -                     XEN_DOMCTL_PFINFO_XTAB )
 25.1059 -                {
 25.1060 -                    DPRINTF("type fail: page %i mfn %08lx\n", j, pfn_type[j]);
 25.1061 -                    continue;
 25.1062 -                }
 25.1063 -
 25.1064 -                if ( debug )
 25.1065 -                    DPRINTF("%d pfn= %08lx mfn= %08lx [mfn]= %08lx"
 25.1066 -                            " sum= %08lx\n",
 25.1067 -                            iter,
 25.1068 -                            (pfn_type[j] & XEN_DOMCTL_PFINFO_LTAB_MASK) |
 25.1069 -                            pfn_batch[j],
 25.1070 -                            pfn_type[j],
 25.1071 -                            mfn_to_pfn(pfn_type[j] &
 25.1072 -                                       ~XEN_DOMCTL_PFINFO_LTAB_MASK),
 25.1073 -                            csum_page(region_base + (PAGE_SIZE*j)));
 25.1074 -
 25.1075 -                /* canonicalise mfn->pfn */
 25.1076 -                pfn_type[j] = (pfn_type[j] & XEN_DOMCTL_PFINFO_LTAB_MASK) |
 25.1077 -                    pfn_batch[j];
 25.1078 -            }
 25.1079 -
 25.1080 -            if ( !write_exact(io_fd, &batch, sizeof(unsigned int)) )
 25.1081 -            {
 25.1082 -                ERROR("Error when writing to state file (2) (errno %d)",
 25.1083 -                      errno);
 25.1084 -                goto out;
 25.1085 -            }
 25.1086 -
 25.1087 -            if ( !write_exact(io_fd, pfn_type, sizeof(unsigned long)*j) )
 25.1088 -            {
 25.1089 -                ERROR("Error when writing to state file (3) (errno %d)",
 25.1090 -                      errno);
 25.1091 -                goto out;
 25.1092 -            }
 25.1093 -
 25.1094 -            /* entering this loop, pfn_type is now in pfns (Not mfns) */
 25.1095 -            for ( j = 0; j < batch; j++ )
 25.1096 -            {
 25.1097 -                unsigned long pfn, pagetype;
 25.1098 -                void *spage = (char *)region_base + (PAGE_SIZE*j);
 25.1099 -
 25.1100 -                pfn      = pfn_type[j] & ~XEN_DOMCTL_PFINFO_LTAB_MASK;
 25.1101 -                pagetype = pfn_type[j] &  XEN_DOMCTL_PFINFO_LTAB_MASK;
 25.1102 -
 25.1103 -                /* write out pages in batch */
 25.1104 -                if ( pagetype == XEN_DOMCTL_PFINFO_XTAB )
 25.1105 -                    continue;
 25.1106 -
 25.1107 -                pagetype &= XEN_DOMCTL_PFINFO_LTABTYPE_MASK;
 25.1108 -
 25.1109 -                if ( (pagetype >= XEN_DOMCTL_PFINFO_L1TAB) &&
 25.1110 -                     (pagetype <= XEN_DOMCTL_PFINFO_L4TAB) )
 25.1111 -                {
 25.1112 -                    /* We have a pagetable page: need to rewrite it. */
 25.1113 -                    race = 
 25.1114 -                        canonicalize_pagetable(pagetype, pfn, spage, page); 
 25.1115 -
 25.1116 -                    if ( race && !live )
 25.1117 -                    {
 25.1118 -                        ERROR("Fatal PT race (pfn %lx, type %08lx)", pfn,
 25.1119 -                              pagetype);
 25.1120 -                        goto out;
 25.1121 -                    }
 25.1122 -
 25.1123 -                    if ( ratewrite(io_fd, live, page, PAGE_SIZE) != PAGE_SIZE )
 25.1124 -                    {
 25.1125 -                        ERROR("Error when writing to state file (4)"
 25.1126 -                              " (errno %d)", errno);
 25.1127 -                        goto out;
 25.1128 -                    }
 25.1129 -                }
 25.1130 -                else
 25.1131 -                {
 25.1132 -                    /* We have a normal page: just write it directly. */
 25.1133 -                    if ( ratewrite(io_fd, live, spage, PAGE_SIZE) !=
 25.1134 -                         PAGE_SIZE )
 25.1135 -                    {
 25.1136 -                        ERROR("Error when writing to state file (5)"
 25.1137 -                              " (errno %d)", errno);
 25.1138 -                        goto out;
 25.1139 -                    }
 25.1140 -                }
 25.1141 -            } /* end of the write out for this batch */
 25.1142 -
 25.1143 -            sent_this_iter += batch;
 25.1144 -
 25.1145 -            munmap(region_base, batch*PAGE_SIZE);
 25.1146 -
 25.1147 -        } /* end of this while loop for this iteration */
 25.1148 -
 25.1149 -      skip:
 25.1150 -
 25.1151 -        total_sent += sent_this_iter;
 25.1152 -
 25.1153 -        DPRINTF("\r %d: sent %d, skipped %d, ",
 25.1154 -                iter, sent_this_iter, skip_this_iter );
 25.1155 -
 25.1156 -        if ( last_iter )
 25.1157 -        {
 25.1158 -            print_stats( xc_handle, dom, sent_this_iter, &stats, 1);
 25.1159 -
 25.1160 -            DPRINTF("Total pages sent= %ld (%.2fx)\n",
 25.1161 -                    total_sent, ((float)total_sent)/p2m_size );
 25.1162 -            DPRINTF("(of which %ld were fixups)\n", needed_to_fix  );
 25.1163 -        }
 25.1164 -
 25.1165 -        if ( last_iter && debug )
 25.1166 -        {
 25.1167 -            int minusone = -1;
 25.1168 -            memset(to_send, 0xff, BITMAP_SIZE);
 25.1169 -            debug = 0;
 25.1170 -            DPRINTF("Entering debug resend-all mode\n");
 25.1171 -
 25.1172 -            /* send "-1" to put receiver into debug mode */
 25.1173 -            if ( !write_exact(io_fd, &minusone, sizeof(int)) )
 25.1174 -            {
 25.1175 -                ERROR("Error when writing to state file (6) (errno %d)",
 25.1176 -                      errno);
 25.1177 -                goto out;
 25.1178 -            }
 25.1179 -
 25.1180 -            continue;
 25.1181 -        }
 25.1182 -
 25.1183 -        if ( last_iter )
 25.1184 -            break;
 25.1185 -
 25.1186 -        if ( live )
 25.1187 -        {
 25.1188 -            if ( ((sent_this_iter > sent_last_iter) && RATE_IS_MAX()) ||
 25.1189 -                 (iter >= max_iters) ||
 25.1190 -                 (sent_this_iter+skip_this_iter < 50) ||
 25.1191 -                 (total_sent > p2m_size*max_factor) )
 25.1192 -            {
 25.1193 -                DPRINTF("Start last iteration\n");
 25.1194 -                last_iter = 1;
 25.1195 -
 25.1196 -                if ( suspend_and_state(suspend, xc_handle, io_fd, dom, &info,
 25.1197 -                                       &ctxt) )
 25.1198 -                {
 25.1199 -                    ERROR("Domain appears not to have suspended");
 25.1200 -                    goto out;
 25.1201 -                }
 25.1202 -
 25.1203 -                DPRINTF("SUSPEND shinfo %08lx eip %08lx edx %08lx\n",
 25.1204 -                        info.shared_info_frame,
 25.1205 -                        (unsigned long)ctxt.user_regs.eip,
 25.1206 -                        (unsigned long)ctxt.user_regs.edx);
 25.1207 -            }
 25.1208 -
 25.1209 -            if ( xc_shadow_control(xc_handle, dom, 
 25.1210 -                                   XEN_DOMCTL_SHADOW_OP_CLEAN, to_send, 
 25.1211 -                                   p2m_size, NULL, 0, &stats) != p2m_size )
 25.1212 -            {
 25.1213 -                ERROR("Error flushing shadow PT");
 25.1214 -                goto out;
 25.1215 -            }
 25.1216 -
 25.1217 -            sent_last_iter = sent_this_iter;
 25.1218 -
 25.1219 -            print_stats(xc_handle, dom, sent_this_iter, &stats, 1);
 25.1220 -
 25.1221 -        }
 25.1222 -    } /* end of infinite for loop */
 25.1223 -
 25.1224 -    DPRINTF("All memory is saved\n");
 25.1225 -
 25.1226 -    {
 25.1227 -        struct {
 25.1228 -            int minustwo;
 25.1229 -            int max_vcpu_id;
 25.1230 -            uint64_t vcpumap;
 25.1231 -        } chunk = { -2, info.max_vcpu_id };
 25.1232 -
 25.1233 -        if ( info.max_vcpu_id >= 64 )
 25.1234 -        {
 25.1235 -            ERROR("Too many VCPUS in guest!");
 25.1236 -            goto out;
 25.1237 -        }
 25.1238 -
 25.1239 -        for ( i = 1; i <= info.max_vcpu_id; i++ )
 25.1240 -        {
 25.1241 -            xc_vcpuinfo_t vinfo;
 25.1242 -            if ( (xc_vcpu_getinfo(xc_handle, dom, i, &vinfo) == 0) &&
 25.1243 -                 vinfo.online )
 25.1244 -                vcpumap |= 1ULL << i;
 25.1245 -        }
 25.1246 -
 25.1247 -        chunk.vcpumap = vcpumap;
 25.1248 -        if ( !write_exact(io_fd, &chunk, sizeof(chunk)) )
 25.1249 -        {
 25.1250 -            ERROR("Error when writing to state file (errno %d)", errno);
 25.1251 -            goto out;
 25.1252 -        }
 25.1253 -    }
 25.1254 -
 25.1255 -    /* Zero terminate */
 25.1256 -    i = 0;
 25.1257 -    if ( !write_exact(io_fd, &i, sizeof(int)) )
 25.1258 -    {
 25.1259 -        ERROR("Error when writing to state file (6') (errno %d)", errno);
 25.1260 -        goto out;
 25.1261 -    }
 25.1262 -
 25.1263 -    /* Send through a list of all the PFNs that were not in map at the close */
 25.1264 -    {
 25.1265 -        unsigned int i,j;
 25.1266 -        unsigned long pfntab[1024];
 25.1267 -
 25.1268 -        for ( i = 0, j = 0; i < p2m_size; i++ )
 25.1269 -        {
 25.1270 -            if ( !is_mapped(live_p2m[i]) )
 25.1271 -                j++;
 25.1272 -        }
 25.1273 -
 25.1274 -        if ( !write_exact(io_fd, &j, sizeof(unsigned int)) )
 25.1275 -        {
 25.1276 -            ERROR("Error when writing to state file (6a) (errno %d)", errno);
 25.1277 -            goto out;
 25.1278 -        }
 25.1279 -
 25.1280 -        for ( i = 0, j = 0; i < p2m_size; )
 25.1281 -        {
 25.1282 -            if ( !is_mapped(live_p2m[i]) )
 25.1283 -                pfntab[j++] = i;
 25.1284 -
 25.1285 -            i++;
 25.1286 -            if ( (j == 1024) || (i == p2m_size) )
 25.1287 -            {
 25.1288 -                if ( !write_exact(io_fd, &pfntab, sizeof(unsigned long)*j) )
 25.1289 -                {
 25.1290 -                    ERROR("Error when writing to state file (6b) (errno %d)",
 25.1291 -                          errno);
 25.1292 -                    goto out;
 25.1293 -                }
 25.1294 -                j = 0;
 25.1295 -            }
 25.1296 -        }
 25.1297 -    }
 25.1298 -
 25.1299 -    /* Canonicalise the suspend-record frame number. */
 25.1300 -    if ( !translate_mfn_to_pfn(&ctxt.user_regs.edx) )
 25.1301 -    {
 25.1302 -        ERROR("Suspend record is not in range of pseudophys map");
 25.1303 -        goto out;
 25.1304 -    }
 25.1305 -
 25.1306 -    for ( i = 0; i <= info.max_vcpu_id; i++ )
 25.1307 -    {
 25.1308 -        if ( !(vcpumap & (1ULL << i)) )
 25.1309 -            continue;
 25.1310 -
 25.1311 -        if ( (i != 0) && xc_vcpu_getcontext(xc_handle, dom, i, &ctxt) )
 25.1312 -        {
 25.1313 -            ERROR("No context for VCPU%d", i);
 25.1314 -            goto out;
 25.1315 -        }
 25.1316 -
 25.1317 -        /* Canonicalise each GDT frame number. */
 25.1318 -        for ( j = 0; (512*j) < ctxt.gdt_ents; j++ )
 25.1319 -        {
 25.1320 -            if ( !translate_mfn_to_pfn(&ctxt.gdt_frames[j]) )
 25.1321 -            {
 25.1322 -                ERROR("GDT frame is not in range of pseudophys map");
 25.1323 -                goto out;
 25.1324 -            }
 25.1325 -        }
 25.1326 -
 25.1327 -        /* Canonicalise the page table base pointer. */
 25.1328 -        if ( !MFN_IS_IN_PSEUDOPHYS_MAP(xen_cr3_to_pfn(ctxt.ctrlreg[3])) )
 25.1329 -        {
 25.1330 -            ERROR("PT base is not in range of pseudophys map");
 25.1331 -            goto out;
 25.1332 -        }
 25.1333 -        ctxt.ctrlreg[3] = 
 25.1334 -            xen_pfn_to_cr3(mfn_to_pfn(xen_cr3_to_pfn(ctxt.ctrlreg[3])));
 25.1335 -
 25.1336 -        /* Guest pagetable (x86/64) stored in otherwise-unused CR1. */
 25.1337 -        if ( (pt_levels == 4) && ctxt.ctrlreg[1] )
 25.1338 -        {
 25.1339 -            if ( !MFN_IS_IN_PSEUDOPHYS_MAP(xen_cr3_to_pfn(ctxt.ctrlreg[1])) )
 25.1340 -            {
 25.1341 -                ERROR("PT base is not in range of pseudophys map");
 25.1342 -                goto out;
 25.1343 -            }
 25.1344 -            /* Least-significant bit means 'valid PFN'. */
 25.1345 -            ctxt.ctrlreg[1] = 1 |
 25.1346 -                xen_pfn_to_cr3(mfn_to_pfn(xen_cr3_to_pfn(ctxt.ctrlreg[1])));
 25.1347 -        }
 25.1348 -
 25.1349 -        if ( !write_exact(io_fd, &ctxt, sizeof(ctxt)) )
 25.1350 -        {
 25.1351 -            ERROR("Error when writing to state file (1) (errno %d)", errno);
 25.1352 -            goto out;
 25.1353 -        }
 25.1354 -    }
 25.1355 -
 25.1356 -    /*
 25.1357 -     * Reset the MFN to be a known-invalid value. See map_frame_list_list().
 25.1358 -     */
 25.1359 -    memcpy(page, live_shinfo, PAGE_SIZE);
 25.1360 -    ((shared_info_t *)page)->arch.pfn_to_mfn_frame_list_list = 0;
 25.1361 -    if ( !write_exact(io_fd, page, PAGE_SIZE) )
 25.1362 -    {
 25.1363 -        ERROR("Error when writing to state file (1) (errno %d)", errno);
 25.1364 -        goto out;
 25.1365 -    }
 25.1366 -
 25.1367 -    /* Success! */
 25.1368 -    rc = 0;
 25.1369 -
 25.1370 - out:
 25.1371 -
 25.1372 -    if ( live )
 25.1373 -    {
 25.1374 -        if ( xc_shadow_control(xc_handle, dom, 
 25.1375 -                               XEN_DOMCTL_SHADOW_OP_OFF,
 25.1376 -                               NULL, 0, NULL, 0, NULL) < 0 )
 25.1377 -            DPRINTF("Warning - couldn't disable shadow mode");
 25.1378 -    }
 25.1379 -
 25.1380 -    /* Flush last write and discard cache for file. */
 25.1381 -    discard_file_cache(io_fd, 1 /* flush */);
 25.1382 -
 25.1383 -    if ( live_shinfo )
 25.1384 -        munmap(live_shinfo, PAGE_SIZE);
 25.1385 -
 25.1386 -    if ( live_p2m_frame_list_list )
 25.1387 -        munmap(live_p2m_frame_list_list, PAGE_SIZE);
 25.1388 -
 25.1389 -    if ( live_p2m_frame_list )
 25.1390 -        munmap(live_p2m_frame_list, P2M_FLL_ENTRIES * PAGE_SIZE);
 25.1391 -
 25.1392 -    if ( live_p2m )
 25.1393 -        munmap(live_p2m, ROUNDUP(p2m_size * sizeof(xen_pfn_t), PAGE_SHIFT));
 25.1394 -
 25.1395 -    if ( live_m2p )
 25.1396 -        munmap(live_m2p, M2P_SIZE(max_mfn));
 25.1397 -
 25.1398 -    free(pfn_type);
 25.1399 -    free(pfn_batch);
 25.1400 -    free(to_send);
 25.1401 -    free(to_fix);
 25.1402 -    free(to_skip);
 25.1403 -
 25.1404 -    DPRINTF("Save exit rc=%d\n",rc);
 25.1405 -
 25.1406 -    return !!rc;
 25.1407 -}
 25.1408 -
 25.1409 -/*
 25.1410 - * Local variables:
 25.1411 - * mode: C
 25.1412 - * c-set-style: "BSD"
 25.1413 - * c-basic-offset: 4
 25.1414 - * tab-width: 4
 25.1415 - * indent-tabs-mode: nil
 25.1416 - * End:
 25.1417 - */
    26.1 --- a/tools/libxc/xc_resume.c	Thu Apr 12 10:26:42 2007 -0600
    26.2 +++ b/tools/libxc/xc_resume.c	Thu Apr 12 10:30:12 2007 -0600
    26.3 @@ -3,24 +3,71 @@
    26.4  #include "xg_save_restore.h"
    26.5  
    26.6  #if defined(__i386__) || defined(__x86_64__)
    26.7 +
    26.8 +#include <xen/foreign/x86_32.h>
    26.9 +#include <xen/foreign/x86_64.h>
   26.10 +#include <xen/hvm/params.h>
   26.11 +
   26.12 +/* Need to provide the right flavour of vcpu context for Xen */
   26.13 +typedef union
   26.14 +{
   26.15 +    vcpu_guest_context_x86_64_t c64;
   26.16 +    vcpu_guest_context_x86_32_t c32;   
   26.17 +    vcpu_guest_context_t c;
   26.18 +} vcpu_guest_context_either_t;
   26.19 +
   26.20  static int modify_returncode(int xc_handle, uint32_t domid)
   26.21  {
   26.22 -    vcpu_guest_context_t ctxt;
   26.23 +    vcpu_guest_context_either_t ctxt;
   26.24 +    xc_dominfo_t info;
   26.25 +    xen_capabilities_info_t caps;
   26.26      int rc;
   26.27  
   26.28 -    if ( (rc = xc_vcpu_getcontext(xc_handle, domid, 0, &ctxt)) != 0 )
   26.29 +    if ( xc_domain_getinfo(xc_handle, domid, 1, &info) != 1 )
   26.30 +    {
   26.31 +        PERROR("Could not get domain info");
   26.32 +        return -1;
   26.33 +    }
   26.34 +
   26.35 +    /* HVM guests without PV drivers do not have a return code to modify. */
   26.36 +    if ( info.hvm )
   26.37 +    {
   26.38 +        unsigned long irq = 0;
   26.39 +        xc_get_hvm_param(xc_handle, domid, HVM_PARAM_CALLBACK_IRQ, &irq);
   26.40 +        if ( !irq )
   26.41 +            return 0;
   26.42 +    }
   26.43 +
   26.44 +    if ( xc_version(xc_handle, XENVER_capabilities, &caps) != 0 )
   26.45 +    {
   26.46 +        PERROR("Could not get Xen capabilities\n");
   26.47 +        return -1;
   26.48 +    }
   26.49 +
   26.50 +    if ( (rc = xc_vcpu_getcontext(xc_handle, domid, 0, &ctxt.c)) != 0 )
   26.51          return rc;
   26.52 -    ctxt.user_regs.eax = 1;
   26.53 -    if ( (rc = xc_vcpu_setcontext(xc_handle, domid, 0, &ctxt)) != 0 )
   26.54 +
   26.55 +    if ( !info.hvm )
   26.56 +        ctxt.c.user_regs.eax = 1;
   26.57 +    else if ( strstr(caps, "x86_64") )
   26.58 +        ctxt.c64.user_regs.eax = 1;
   26.59 +    else
   26.60 +        ctxt.c32.user_regs.eax = 1;
   26.61 +
   26.62 +    if ( (rc = xc_vcpu_setcontext(xc_handle, domid, 0, &ctxt.c)) != 0 )
   26.63          return rc;
   26.64  
   26.65      return 0;
   26.66  }
   26.67 +
   26.68  #else
   26.69 +
   26.70  static int modify_returncode(int xc_handle, uint32_t domid)
   26.71  {
   26.72      return 0;
   26.73 +
   26.74  }
   26.75 +
   26.76  #endif
   26.77  
   26.78  static int xc_domain_resume_cooperative(int xc_handle, uint32_t domid)
   26.79 @@ -65,6 +112,12 @@ static int xc_domain_resume_any(int xc_h
   26.80       * (x86 only) Rewrite store_mfn and console_mfn back to MFN (from PFN).
   26.81       */
   26.82  #if defined(__i386__) || defined(__x86_64__)
   26.83 +    if ( info.hvm )
   26.84 +    {
   26.85 +        ERROR("Cannot resume uncooperative HVM guests");
   26.86 +        return rc;
   26.87 +    }
   26.88 +
   26.89      /* Map the shared info frame */
   26.90      shinfo = xc_map_foreign_range(xc_handle, domid, PAGE_SIZE,
   26.91                                    PROT_READ, info.shared_info_frame);
    27.1 --- a/tools/libxc/xenctrl.h	Thu Apr 12 10:26:42 2007 -0600
    27.2 +++ b/tools/libxc/xenctrl.h	Thu Apr 12 10:30:12 2007 -0600
    27.3 @@ -840,6 +840,9 @@ const char *xc_error_code_to_desc(int co
    27.4   */
    27.5  xc_error_handler xc_set_error_handler(xc_error_handler handler);
    27.6  
    27.7 +int xc_set_hvm_param(int handle, domid_t dom, int param, unsigned long value);
    27.8 +int xc_get_hvm_param(int handle, domid_t dom, int param, unsigned long *value);
    27.9 +
   27.10  /* PowerPC specific. */
   27.11  int xc_alloc_real_mode_area(int xc_handle,
   27.12                              uint32_t domid,
    28.1 --- a/tools/libxc/xenguest.h	Thu Apr 12 10:26:42 2007 -0600
    28.2 +++ b/tools/libxc/xenguest.h	Thu Apr 12 10:30:12 2007 -0600
    28.3 @@ -16,26 +16,19 @@
    28.4  
    28.5  
    28.6  /**
    28.7 - * This function will save a domain running Linux.
    28.8 + * This function will save a running domain.
    28.9   *
   28.10   * @parm xc_handle a handle to an open hypervisor interface
   28.11   * @parm fd the file descriptor to save a domain to
   28.12   * @parm dom the id of the domain
   28.13   * @return 0 on success, -1 on failure
   28.14   */
   28.15 -int xc_linux_save(int xc_handle, int io_fd, uint32_t dom, uint32_t max_iters,
   28.16 -                  uint32_t max_factor, uint32_t flags /* XCFLAGS_xxx */,
   28.17 -                  int (*suspend)(int domid));
   28.18 +int xc_domain_save(int xc_handle, int io_fd, uint32_t dom, uint32_t max_iters,
   28.19 +                   uint32_t max_factor, uint32_t flags /* XCFLAGS_xxx */,
   28.20 +                   int (*suspend)(int domid), int hvm,
   28.21 +                   void *(*init_qemu_maps)(int, unsigned),  /* HVM only */
   28.22 +                   void (*qemu_flip_buffer)(int, int));     /* HVM only */
   28.23  
   28.24 -/**
   28.25 - * This function will save a hvm domain running unmodified guest.
   28.26 - * @return 0 on success, -1 on failure
   28.27 - */
   28.28 -int xc_hvm_save(int xc_handle, int io_fd, uint32_t dom, uint32_t max_iters,
   28.29 -                uint32_t max_factor, uint32_t flags /* XCFLAGS_xxx */,
   28.30 -                int (*suspend)(int domid),  
   28.31 -                void *(*init_qemu_maps)(int, unsigned), 
   28.32 -                void (*qemu_flip_buffer)(int, int));
   28.33  
   28.34  /**
   28.35   * This function will restore a saved domain.
   28.36 @@ -143,11 +136,6 @@ int xc_hvm_build_mem(int xc_handle,
   28.37                       const char *image_buffer,
   28.38                       unsigned long image_size);
   28.39  
   28.40 -int xc_set_hvm_param(
   28.41 -    int handle, domid_t dom, int param, unsigned long value);
   28.42 -int xc_get_hvm_param(
   28.43 -    int handle, domid_t dom, int param, unsigned long *value);
   28.44 -
   28.45  /* PowerPC specific. */
   28.46  int xc_prose_build(int xc_handle,
   28.47                     uint32_t domid,
    29.1 --- a/tools/libxc/xg_private.c	Thu Apr 12 10:26:42 2007 -0600
    29.2 +++ b/tools/libxc/xg_private.c	Thu Apr 12 10:30:12 2007 -0600
    29.3 @@ -198,17 +198,6 @@ unsigned long csum_page(void *page)
    29.4      return -1;
    29.5  }
    29.6  
    29.7 -__attribute__((weak)) 
    29.8 -    int xc_hvm_save(int xc_handle, int io_fd, uint32_t dom, uint32_t max_iters,
    29.9 -                    uint32_t max_factor, uint32_t flags,
   29.10 -                    int (*suspend)(int domid), 
   29.11 -                    void *(*init_qemu_maps)(int, unsigned), 
   29.12 -                    void (*qemu_flip_buffer)(int, int))
   29.13 -{
   29.14 -    errno = ENOSYS;
   29.15 -    return -1;
   29.16 -}
   29.17 -
   29.18  __attribute__((weak)) int xc_get_hvm_param(
   29.19      int handle, domid_t dom, int param, unsigned long *value)
   29.20  {
    30.1 --- a/tools/libxen/include/xen_host_cpu.h	Thu Apr 12 10:26:42 2007 -0600
    30.2 +++ b/tools/libxen/include/xen_host_cpu.h	Thu Apr 12 10:30:12 2007 -0600
    30.3 @@ -70,6 +70,7 @@ typedef struct xen_host_cpu_record
    30.4      char *modelname;
    30.5      char *stepping;
    30.6      char *flags;
    30.7 +    char *features;
    30.8      double utilisation;
    30.9  } xen_host_cpu_record;
   30.10  
   30.11 @@ -223,6 +224,13 @@ xen_host_cpu_get_flags(xen_session *sess
   30.12  
   30.13  
   30.14  /**
   30.15 + * Get the features field of the given host_cpu.
   30.16 + */
   30.17 +extern bool
   30.18 +xen_host_cpu_get_features(xen_session *session, char **result, xen_host_cpu host_cpu);
   30.19 +
   30.20 +
   30.21 +/**
   30.22   * Get the utilisation field of the given host_cpu.
   30.23   */
   30.24  extern bool
    31.1 --- a/tools/libxen/include/xen_vm.h	Thu Apr 12 10:26:42 2007 -0600
    31.2 +++ b/tools/libxen/include/xen_vm.h	Thu Apr 12 10:30:12 2007 -0600
    31.3 @@ -838,6 +838,28 @@ xen_vm_set_vcpus_number_live(xen_session
    31.4  
    31.5  
    31.6  /**
    31.7 + * Add the given key-value pair to VM.VCPUs_params, and apply that
    31.8 + * value on the running VM.
    31.9 + */
   31.10 +extern bool
   31.11 +xen_vm_add_to_vcpus_params_live(xen_session *session, xen_vm self, char *key, char *value);
   31.12 +
   31.13 +
   31.14 +/**
   31.15 + * Set memory_dynamic_max in database and on running VM.
   31.16 + */
   31.17 +extern bool
   31.18 +xen_vm_set_memory_dynamic_max_live(xen_session *session, xen_vm self, int64_t max);
   31.19 +
   31.20 +
   31.21 +/**
   31.22 + * Set memory_dynamic_min in database and on running VM.
   31.23 + */
   31.24 +extern bool
   31.25 +xen_vm_set_memory_dynamic_min_live(xen_session *session, xen_vm self, int64_t min);
   31.26 +
   31.27 +
   31.28 +/**
   31.29   * Send the given key as a sysrq to this VM.  The key is specified as a
   31.30   * single character (a String of length 1).  This can only be called when the
   31.31   * specified VM is in the Running state.
    32.1 --- a/tools/libxen/include/xen_vm_metrics.h	Thu Apr 12 10:26:42 2007 -0600
    32.2 +++ b/tools/libxen/include/xen_vm_metrics.h	Thu Apr 12 10:30:12 2007 -0600
    32.3 @@ -22,6 +22,7 @@
    32.4  #include "xen_common.h"
    32.5  #include "xen_int_float_map.h"
    32.6  #include "xen_int_int_map.h"
    32.7 +#include "xen_string_set.h"
    32.8  #include "xen_string_string_map.h"
    32.9  #include "xen_vm_metrics_decl.h"
   32.10  
   32.11 @@ -70,6 +71,7 @@ typedef struct xen_vm_metrics_record
   32.12      xen_int_float_map *vcpus_utilisation;
   32.13      xen_int_int_map *vcpus_cpu;
   32.14      xen_string_string_map *vcpus_params;
   32.15 +    struct xen_string_set *state;
   32.16      time_t start_time;
   32.17      time_t last_updated;
   32.18  } xen_vm_metrics_record;
   32.19 @@ -210,6 +212,13 @@ xen_vm_metrics_get_vcpus_params(xen_sess
   32.20  
   32.21  
   32.22  /**
   32.23 + * Get the state field of the given VM_metrics.
   32.24 + */
   32.25 +extern bool
   32.26 +xen_vm_metrics_get_state(xen_session *session, struct xen_string_set **result, xen_vm_metrics vm_metrics);
   32.27 +
   32.28 +
   32.29 +/**
   32.30   * Get the start_time field of the given VM_metrics.
   32.31   */
   32.32  extern bool
    33.1 --- a/tools/libxen/src/xen_host_cpu.c	Thu Apr 12 10:26:42 2007 -0600
    33.2 +++ b/tools/libxen/src/xen_host_cpu.c	Thu Apr 12 10:30:12 2007 -0600
    33.3 @@ -61,6 +61,9 @@ static const struct_member xen_host_cpu_
    33.4          { .key = "flags",
    33.5            .type = &abstract_type_string,
    33.6            .offset = offsetof(xen_host_cpu_record, flags) },
    33.7 +        { .key = "features",
    33.8 +          .type = &abstract_type_string,
    33.9 +          .offset = offsetof(xen_host_cpu_record, features) },
   33.10          { .key = "utilisation",
   33.11            .type = &abstract_type_float,
   33.12            .offset = offsetof(xen_host_cpu_record, utilisation) }
   33.13 @@ -90,6 +93,7 @@ xen_host_cpu_record_free(xen_host_cpu_re
   33.14      free(record->modelname);
   33.15      free(record->stepping);
   33.16      free(record->flags);
   33.17 +    free(record->features);
   33.18      free(record);
   33.19  }
   33.20  
   33.21 @@ -252,6 +256,23 @@ xen_host_cpu_get_flags(xen_session *sess
   33.22  
   33.23  
   33.24  bool
   33.25 +xen_host_cpu_get_features(xen_session *session, char **result, xen_host_cpu host_cpu)
   33.26 +{
   33.27 +    abstract_value param_values[] =
   33.28 +        {
   33.29 +            { .type = &abstract_type_string,
   33.30 +              .u.string_val = host_cpu }
   33.31 +        };
   33.32 +
   33.33 +    abstract_type result_type = abstract_type_string;
   33.34 +
   33.35 +    *result = NULL;
   33.36 +    XEN_CALL_("host_cpu.get_features");
   33.37 +    return session->ok;
   33.38 +}
   33.39 +
   33.40 +
   33.41 +bool
   33.42  xen_host_cpu_get_utilisation(xen_session *session, double *result, xen_host_cpu host_cpu)
   33.43  {
   33.44      abstract_value param_values[] =
    34.1 --- a/tools/libxen/src/xen_vm.c	Thu Apr 12 10:26:42 2007 -0600
    34.2 +++ b/tools/libxen/src/xen_vm.c	Thu Apr 12 10:30:12 2007 -0600
    34.3 @@ -1610,6 +1610,56 @@ xen_vm_set_vcpus_number_live(xen_session
    34.4  
    34.5  
    34.6  bool
    34.7 +xen_vm_add_to_vcpus_params_live(xen_session *session, xen_vm self, char *key, char *value)
    34.8 +{
    34.9 +    abstract_value param_values[] =
   34.10 +        {
   34.11 +            { .type = &abstract_type_string,
   34.12 +              .u.string_val = self },
   34.13 +            { .type = &abstract_type_string,
   34.14 +              .u.string_val = key },
   34.15 +            { .type = &abstract_type_string,
   34.16 +              .u.string_val = value }
   34.17 +        };
   34.18 +
   34.19 +    xen_call_(session, "VM.add_to_VCPUs_params_live", param_values, 3, NULL, NULL);
   34.20 +    return session->ok;
   34.21 +}
   34.22 +
   34.23 +
   34.24 +bool
   34.25 +xen_vm_set_memory_dynamic_max_live(xen_session *session, xen_vm self, int64_t max)
   34.26 +{
   34.27 +    abstract_value param_values[] =
   34.28 +        {
   34.29 +            { .type = &abstract_type_string,
   34.30 +              .u.string_val = self },
   34.31 +            { .type = &abstract_type_int,
   34.32 +              .u.int_val = max }
   34.33 +        };
   34.34 +
   34.35 +    xen_call_(session, "VM.set_memory_dynamic_max_live", param_values, 2, NULL, NULL);
   34.36 +    return session->ok;
   34.37 +}
   34.38 +
   34.39 +
   34.40 +bool
   34.41 +xen_vm_set_memory_dynamic_min_live(xen_session *session, xen_vm self, int64_t min)
   34.42 +{
   34.43 +    abstract_value param_values[] =
   34.44 +        {
   34.45 +            { .type = &abstract_type_string,
   34.46 +              .u.string_val = self },
   34.47 +            { .type = &abstract_type_int,
   34.48 +              .u.int_val = min }
   34.49 +        };
   34.50 +
   34.51 +    xen_call_(session, "VM.set_memory_dynamic_min_live", param_values, 2, NULL, NULL);
   34.52 +    return session->ok;
   34.53 +}
   34.54 +
   34.55 +
   34.56 +bool
   34.57  xen_vm_send_sysrq(xen_session *session, xen_vm vm, char *key)
   34.58  {
   34.59      abstract_value param_values[] =
    35.1 --- a/tools/libxen/src/xen_vm_metrics.c	Thu Apr 12 10:26:42 2007 -0600
    35.2 +++ b/tools/libxen/src/xen_vm_metrics.c	Thu Apr 12 10:30:12 2007 -0600
    35.3 @@ -57,6 +57,9 @@ static const struct_member xen_vm_metric
    35.4          { .key = "VCPUs_params",
    35.5            .type = &abstract_type_string_string_map,
    35.6            .offset = offsetof(xen_vm_metrics_record, vcpus_params) },
    35.7 +        { .key = "state",
    35.8 +          .type = &abstract_type_string_set,
    35.9 +          .offset = offsetof(xen_vm_metrics_record, state) },
   35.10          { .key = "start_time",
   35.11            .type = &abstract_type_datetime,
   35.12            .offset = offsetof(xen_vm_metrics_record, start_time) },
   35.13 @@ -87,6 +90,7 @@ xen_vm_metrics_record_free(xen_vm_metric
   35.14      xen_int_float_map_free(record->vcpus_utilisation);
   35.15      xen_int_int_map_free(record->vcpus_cpu);
   35.16      xen_string_string_map_free(record->vcpus_params);
   35.17 +    xen_string_set_free(record->state);
   35.18      free(record);
   35.19  }
   35.20  
   35.21 @@ -215,6 +219,23 @@ xen_vm_metrics_get_vcpus_params(xen_sess
   35.22  
   35.23  
   35.24  bool
   35.25 +xen_vm_metrics_get_state(xen_session *session, struct xen_string_set **result, xen_vm_metrics vm_metrics)
   35.26 +{
   35.27 +    abstract_value param_values[] =
   35.28 +        {
   35.29 +            { .type = &abstract_type_string,
   35.30 +              .u.string_val = vm_metrics }
   35.31 +        };
   35.32 +
   35.33 +    abstract_type result_type = abstract_type_string_set;
   35.34 +
   35.35 +    *result = NULL;
   35.36 +    XEN_CALL_("VM_metrics.get_state");
   35.37 +    return session->ok;
   35.38 +}
   35.39 +
   35.40 +
   35.41 +bool
   35.42  xen_vm_metrics_get_start_time(xen_session *session, time_t *result, xen_vm_metrics vm_metrics)
   35.43  {
   35.44      abstract_value param_values[] =
    36.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
    36.2 +++ b/tools/pygrub/src/LiloConf.py	Thu Apr 12 10:30:12 2007 -0600
    36.3 @@ -0,0 +1,147 @@
    36.4 +#
    36.5 +#LiloConf.py
    36.6 +#
    36.7 +
    36.8 +import sys, re, os
    36.9 +import logging
   36.10 +import GrubConf
   36.11 +
   36.12 +class LiloImage(object):
   36.13 +    def __init__(self, lines, path):
   36.14 +        self.reset(lines, path)
   36.15 +
   36.16 +    def __repr__(self):
   36.17 +        return ("title: %s\n"
   36.18 +                "  root: %s\n"
   36.19 +                "  kernel: %s\n"
   36.20 +                "  args: %s\n"
   36.21 +                "  initrd: %s\n" %(self.title, self.root, self.kernel,
   36.22 +                                   self.args, self.initrd))
   36.23 +    def reset(self, lines, path):
   36.24 +        self._root = self._initrd = self._kernel = self._args = None
   36.25 +        self.title = ""
   36.26 +        self.lines = []
   36.27 +        self.path = path
   36.28 +        map(self.set_from_line, lines)
   36.29 +        self.root = "" # dummy
   36.30 +
   36.31 +    def set_from_line(self, line, replace = None):
   36.32 +        (com, arg) = GrubConf.grub_exact_split(line, 2)
   36.33 +
   36.34 +        if self.commands.has_key(com):
   36.35 +            if self.commands[com] is not None:
   36.36 +                exec("%s = r\'%s\'" %(self.commands[com], re.sub('^"(.+)"$', r"\1", arg.strip())))
   36.37 +            else:
   36.38 +                logging.info("Ignored image directive %s" %(com,))
   36.39 +        else:
   36.40 +            logging.warning("Unknown image directive %s" %(com,))
   36.41 +
   36.42 +        # now put the line in the list of lines
   36.43 +        if replace is None:
   36.44 +            self.lines.append(line)
   36.45 +        else:
   36.46 +            self.lines.pop(replace)
   36.47 +            self.lines.insert(replace, line)
   36.48 +
   36.49 +    def set_kernel(self, val):
   36.50 +        self._kernel = (None, self.path + "/" + val)
   36.51 +    def get_kernel(self):
   36.52 +        return self._kernel
   36.53 +    kernel = property(get_kernel, set_kernel)
   36.54 +
   36.55 +    def set_initrd(self, val):
   36.56 +        self._initrd = (None, self.path + "/" + val)
   36.57 +    def get_initrd(self):
   36.58 +        return self._initrd
   36.59 +    initrd = property(get_initrd, set_initrd)
   36.60 +
   36.61 +    # set up command handlers
   36.62 +    commands = { "label": "self.title",
   36.63 +                 "root": "self.root",
   36.64 +                 "rootnoverify": "self.root",
   36.65 +                 "image": "self.kernel",
   36.66 +                 "initrd": "self.initrd",
   36.67 +                 "append": "self.args",
   36.68 +                 "read-only": None,
   36.69 +                 "chainloader": None,
   36.70 +                 "module": None}
   36.71 +
   36.72 +class LiloConfigFile(object):
   36.73 +    def __init__(self, fn = None):
   36.74 +        self.filename = fn
   36.75 +        self.images = []
   36.76 +        self.timeout = -1
   36.77 +        self._default = 0
   36.78 +
   36.79 +        if fn is not None:
   36.80 +            self.parse()
   36.81 +
   36.82 +    def parse(self, buf = None):
   36.83 +        if buf is None:
   36.84 +            if self.filename is None:
   36.85 +                raise ValueError, "No config file defined to parse!"
   36.86 +
   36.87 +            f = open(self.filename, 'r')
   36.88 +            lines = f.readlines()
   36.89 +            f.close()
   36.90 +        else:
   36.91 +            lines = buf.split("\n")
   36.92 +
   36.93 +        path = os.path.dirname(self.filename)
   36.94 +        img = []
   36.95 +        for l in lines:
   36.96 +            l = l.strip()
   36.97 +            # skip blank lines
   36.98 +            if len(l) == 0:
   36.99 +                continue
  36.100 +            # skip comments
  36.101 +            if l.startswith('#'):
  36.102 +                continue
  36.103 +            # new image
  36.104 +            if l.startswith("image"):
  36.105 +                if len(img) > 0:
  36.106 +                    self.add_image(LiloImage(img, path))
  36.107 +                img = [l]
  36.108 +                continue
  36.109 +
  36.110 +            if len(img) > 0:
  36.111 +                img.append(l)
  36.112 +                continue
  36.113 +
  36.114 +            (com, arg) = GrubConf.grub_exact_split(l, 2)
  36.115 +            if self.commands.has_key(com):
  36.116 +                if self.commands[com] is not None:
  36.117 +                    exec("%s = r\"%s\"" %(self.commands[com], arg.strip()))
  36.118 +                else:
  36.119 +                    logging.info("Ignored directive %s" %(com,))
  36.120 +            else:
  36.121 +                logging.warning("Unknown directive %s" %(com,))
  36.122 +
  36.123 +        if len(img) > 0:
  36.124 +            self.add_image(LiloImage(img, path))
  36.125 +
  36.126 +    def add_image(self, image):
  36.127 +        self.images.append(image)
  36.128 +
  36.129 +    def _get_default(self):
  36.130 +        for i in range(0, len(self.images) - 1):
  36.131 +            if self.images[i].title == self._default:
  36.132 +                return i
  36.133 +        return 0
  36.134 +    def _set_default(self, val):
  36.135 +        self._default = val
  36.136 +    default = property(_get_default, _set_default)
  36.137 +
  36.138 +    commands = { "default": "self.default",
  36.139 +                 "timeout": "self.timeout",
  36.140 +                 "prompt": None,
  36.141 +                 "relocatable": None,
  36.142 +                 }
  36.143 +
  36.144 +if __name__ == "__main__":
  36.145 +    if sys.argv < 2:
  36.146 +        raise RuntimeError, "Need a grub.conf to read"
  36.147 +    g = LiloConfigFile(sys.argv[1])
  36.148 +    for i in g.images:
  36.149 +        print i #, i.title, i.root, i.kernel, i.args, i.initrd
  36.150 +    print g.default
    37.1 --- a/tools/pygrub/src/pygrub	Thu Apr 12 10:26:42 2007 -0600
    37.2 +++ b/tools/pygrub/src/pygrub	Thu Apr 12 10:30:12 2007 -0600
    37.3 @@ -16,6 +16,7 @@
    37.4  import os, sys, string, struct, tempfile, re
    37.5  import copy
    37.6  import logging
    37.7 +import platform
    37.8  
    37.9  import curses, _curses, curses.wrapper, curses.textpad, curses.ascii
   37.10  import getopt
   37.11 @@ -24,6 +25,7 @@ sys.path = [ '/usr/lib/python' ] + sys.p
   37.12  
   37.13  import fsimage
   37.14  import grub.GrubConf
   37.15 +import grub.LiloConf
   37.16  
   37.17  PYGRUB_VER = 0.5
   37.18  
   37.19 @@ -59,6 +61,13 @@ def get_active_partition(file):
   37.20          if struct.unpack("<c", buf[poff:poff+1]) == ('\x80',):
   37.21              return buf[poff:poff+16]
   37.22  
   37.23 +        # type=0xee: GUID partition table
   37.24 +        # XXX assume the first partition is active
   37.25 +        if struct.unpack("<c", buf[poff+4:poff+5]) == ('\xee',):
   37.26 +            os.lseek(fd, 0x400, 0)
   37.27 +            buf = os.read(fd, 512)
   37.28 +            return buf[24:40] # XXX buf[32:40]
   37.29 +
   37.30      # if there's not a partition marked as active, fall back to
   37.31      # the first partition
   37.32      return buf[446:446+16]
   37.33 @@ -346,7 +355,13 @@ class Grub:
   37.34          if not os.access(fn, os.R_OK):
   37.35              raise RuntimeError, "Unable to access %s" %(fn,)
   37.36  
   37.37 -        self.cf = grub.GrubConf.GrubConfigFile()
   37.38 +        if platform.machine() == 'ia64':
   37.39 +            self.cf = grub.LiloConf.LiloConfigFile()
   37.40 +            file_list = ("/efi/redhat/elilo.conf",)
   37.41 +        else:
   37.42 +            self.cf = grub.GrubConf.GrubConfigFile()
   37.43 +            file_list = ("/boot/grub/menu.lst", "/boot/grub/grub.conf",
   37.44 +                         "/grub/menu.lst", "/grub/grub.conf")
   37.45  
   37.46          if not fs:
   37.47              # set the config file and parse it
   37.48 @@ -354,18 +369,15 @@ class Grub:
   37.49              self.cf.parse()
   37.50              return
   37.51  
   37.52 -        grubfile = None
   37.53 -        for f in ("/boot/grub/menu.lst", "/boot/grub/grub.conf",
   37.54 -                  "/grub/menu.lst", "/grub/grub.conf"):
   37.55 +        for f in file_list:
   37.56              if fs.file_exists(f):
   37.57 -                grubfile = f
   37.58 +                self.cf.filename = f
   37.59                  break
   37.60 -        if grubfile is None:
   37.61 -            raise RuntimeError, "we couldn't find grub config file in the image provided."
   37.62 -        f = fs.open_file(grubfile)
   37.63 +        if self.cf.filename is None:
   37.64 +            raise RuntimeError, "couldn't find bootloader config file in the image provided."
   37.65 +        f = fs.open_file(self.cf.filename)
   37.66          buf = f.read()
   37.67          del f
   37.68 -        # then parse the grub config
   37.69          self.cf.parse(buf)
   37.70  
   37.71      def run(self):
    38.1 --- a/tools/python/README.XendConfig	Thu Apr 12 10:26:42 2007 -0600
    38.2 +++ b/tools/python/README.XendConfig	Thu Apr 12 10:30:12 2007 -0600
    38.3 @@ -115,6 +115,7 @@ otherConfig
    38.4                                  image.nographic
    38.5                                  image.vnc
    38.6                                  image.sdl
    38.7 +                                image.monitor
    38.8                                  image.vncdisplay
    38.9                                  image.vncunused
   38.10                                  image.hvm.device_model
    39.1 --- a/tools/python/README.sxpcfg	Thu Apr 12 10:26:42 2007 -0600
    39.2 +++ b/tools/python/README.sxpcfg	Thu Apr 12 10:30:12 2007 -0600
    39.3 @@ -63,6 +63,7 @@ image
    39.4    - fdb
    39.5    - soundhw
    39.6    - localtime
    39.7 +  - monitor
    39.8    - serial
    39.9    - stdvga
   39.10    - isa
    40.1 --- a/tools/python/xen/xend/XendCheckpoint.py	Thu Apr 12 10:26:42 2007 -0600
    40.2 +++ b/tools/python/xen/xend/XendCheckpoint.py	Thu Apr 12 10:30:12 2007 -0600
    40.3 @@ -75,13 +75,6 @@ def save(fd, dominfo, network, live, dst
    40.4  
    40.5          image_cfg = dominfo.info.get('image', {})
    40.6          hvm = dominfo.info.is_hvm()
    40.7 -        stdvga = 0
    40.8 -
    40.9 -        if hvm:
   40.10 -            log.info("save hvm domain")
   40.11 -            if dominfo.info['platform'].has_key('stdvga'):
   40.12 -                if dominfo.info['platform']['stdvga'] == 1:
   40.13 -                    stdvga = 1
   40.14  
   40.15          # xc_save takes three customization parameters: maxit, max_f, and
   40.16          # flags the last controls whether or not save is 'live', while the
    41.1 --- a/tools/python/xen/xend/XendConfig.py	Thu Apr 12 10:26:42 2007 -0600
    41.2 +++ b/tools/python/xen/xend/XendConfig.py	Thu Apr 12 10:30:12 2007 -0600
    41.3 @@ -117,7 +117,7 @@ LEGACY_CFG_TO_XENAPI_CFG = reverse_dict(
    41.4  
    41.5  # Platform configuration keys.
    41.6  XENAPI_PLATFORM_CFG = [ 'acpi', 'apic', 'boot', 'device_model', 'display', 
    41.7 -                        'fda', 'fdb', 'keymap', 'isa', 'localtime',
    41.8 +                        'fda', 'fdb', 'keymap', 'isa', 'localtime', 'monitor', 
    41.9                          'nographic', 'pae', 'rtc_timeoffset', 'serial', 'sdl',
   41.10                          'soundhw','stdvga', 'usb', 'usbdevice', 'vnc',
   41.11                          'vncconsole', 'vncdisplay', 'vnclisten',
    42.1 --- a/tools/python/xen/xend/XendDomainInfo.py	Thu Apr 12 10:26:42 2007 -0600
    42.2 +++ b/tools/python/xen/xend/XendDomainInfo.py	Thu Apr 12 10:30:12 2007 -0600
    42.3 @@ -1601,7 +1601,6 @@ class XendDomainInfo:
    42.4              self.image = image.create(self, self.info)
    42.5              if self.image:
    42.6                  self.image.createDeviceModel(True)
    42.7 -                self.image.register_shutdown_watch()
    42.8          self._storeDomDetails()
    42.9          self._registerWatches()
   42.10          self.refreshShutdown()
    43.1 --- a/tools/python/xen/xend/XendNode.py	Thu Apr 12 10:26:42 2007 -0600
    43.2 +++ b/tools/python/xen/xend/XendNode.py	Thu Apr 12 10:30:12 2007 -0600
    43.3 @@ -603,7 +603,7 @@ class XendNode:
    43.4          return [[k, info[k]] for k in ITEM_ORDER]
    43.5  
    43.6      def xendinfo(self):
    43.7 -        return [['xend_config_format', 3]]
    43.8 +        return [['xend_config_format', 4]]
    43.9  
   43.10      #
   43.11      # utilisation tracking
    44.1 --- a/tools/python/xen/xend/image.py	Thu Apr 12 10:26:42 2007 -0600
    44.2 +++ b/tools/python/xen/xend/image.py	Thu Apr 12 10:30:12 2007 -0600
    44.3 @@ -284,17 +284,16 @@ class HVMImageHandler(ImageHandler):
    44.4          log.debug("acpi           = %d", self.acpi)
    44.5          log.debug("apic           = %d", self.apic)
    44.6  
    44.7 -        self.register_shutdown_watch()
    44.8 -        self.register_reboot_feature_watch()
    44.9 -
   44.10 -        return xc.hvm_build(domid          = self.vm.getDomid(),
   44.11 -                            image          = self.kernel,
   44.12 -                            store_evtchn   = store_evtchn,
   44.13 -                            memsize        = mem_mb,
   44.14 -                            vcpus          = self.vm.getVCpuCount(),
   44.15 -                            pae            = self.pae,
   44.16 -                            acpi           = self.acpi,
   44.17 -                            apic           = self.apic)
   44.18 +        rc = xc.hvm_build(domid          = self.vm.getDomid(),
   44.19 +                          image          = self.kernel,
   44.20 +                          store_evtchn   = store_evtchn,
   44.21 +                          memsize        = mem_mb,
   44.22 +                          vcpus          = self.vm.getVCpuCount(),
   44.23 +                          pae            = self.pae,
   44.24 +                          acpi           = self.acpi,
   44.25 +                          apic           = self.apic)
   44.26 +        rc['notes'] = { 'SUSPEND_CANCEL': 1 }
   44.27 +        return rc
   44.28  
   44.29      # Return a list of cmd line args to the device models based on the
   44.30      # xm config file
   44.31 @@ -418,6 +417,8 @@ class HVMImageHandler(ImageHandler):
   44.32          else:
   44.33              ret.append('-nographic')
   44.34  
   44.35 +        if int(vmConfig['platform'].get('monitor', 0)) != 0:
   44.36 +            ret.append('-monitor vc')
   44.37          return ret
   44.38  
   44.39      def createDeviceModel(self, restore = False):
   44.40 @@ -448,13 +449,9 @@ class HVMImageHandler(ImageHandler):
   44.41          log.info("device model pid: %d", self.pid)
   44.42  
   44.43      def recreate(self):
   44.44 -        self.register_shutdown_watch()
   44.45 -        self.register_reboot_feature_watch()
   44.46          self.pid = self.vm.gatherDom(('image/device-model-pid', int))
   44.47  
   44.48      def destroy(self, suspend = False):
   44.49 -        self.unregister_shutdown_watch()
   44.50 -        self.unregister_reboot_feature_watch();
   44.51          if self.pid:
   44.52              try:
   44.53                  sig = signal.SIGKILL
   44.54 @@ -473,74 +470,6 @@ class HVMImageHandler(ImageHandler):
   44.55                  pass
   44.56              self.pid = None
   44.57  
   44.58 -    def register_shutdown_watch(self):
   44.59 -        """ add xen store watch on control/shutdown """
   44.60 -        self.shutdownWatch = xswatch(self.vm.dompath + "/control/shutdown",
   44.61 -                                     self.hvm_shutdown)
   44.62 -        log.debug("hvm shutdown watch registered")
   44.63 -
   44.64 -    def unregister_shutdown_watch(self):
   44.65 -        """Remove the watch on the control/shutdown, if any. Nothrow
   44.66 -        guarantee."""
   44.67 -
   44.68 -        try:
   44.69 -            if self.shutdownWatch:
   44.70 -                self.shutdownWatch.unwatch()
   44.71 -        except:
   44.72 -            log.exception("Unwatching hvm shutdown watch failed.")
   44.73 -        self.shutdownWatch = None
   44.74 -        log.debug("hvm shutdown watch unregistered")
   44.75 -
   44.76 -    def hvm_shutdown(self, _):
   44.77 -        """ watch call back on node control/shutdown,
   44.78 -            if node changed, this function will be called
   44.79 -        """
   44.80 -        xd = xen.xend.XendDomain.instance()
   44.81 -        try:
   44.82 -            vm = xd.domain_lookup( self.vm.getDomid() )
   44.83 -        except XendError:
   44.84 -            # domain isn't registered, no need to clean it up.
   44.85 -            return False
   44.86 -
   44.87 -        reason = vm.getShutdownReason()
   44.88 -        log.debug("hvm_shutdown fired, shutdown reason=%s", reason)
   44.89 -        if reason in REVERSE_DOMAIN_SHUTDOWN_REASONS:
   44.90 -            vm.info['shutdown'] = 1
   44.91 -            vm.info['shutdown_reason'] = \
   44.92 -                REVERSE_DOMAIN_SHUTDOWN_REASONS[reason]
   44.93 -            vm.refreshShutdown(vm.info)
   44.94 -
   44.95 -        return True # Keep watching
   44.96 -
   44.97 -    def register_reboot_feature_watch(self):
   44.98 -        """ add xen store watch on control/feature-reboot """
   44.99 -        self.rebootFeatureWatch = xswatch(self.vm.dompath + "/control/feature-reboot", \
  44.100 -                                         self.hvm_reboot_feature)
  44.101 -        log.debug("hvm reboot feature watch registered")
  44.102 -
  44.103 -    def unregister_reboot_feature_watch(self):
  44.104 -        """Remove the watch on the control/feature-reboot, if any. Nothrow
  44.105 -        guarantee."""
  44.106 -
  44.107 -        try:
  44.108 -            if self.rebootFeatureWatch:
  44.109 -                self.rebootFeatureWatch.unwatch()
  44.110 -        except:
  44.111 -            log.exception("Unwatching hvm reboot feature watch failed.")
  44.112 -        self.rebootFeatureWatch = None
  44.113 -        log.debug("hvm reboot feature watch unregistered")
  44.114 -
  44.115 -    def hvm_reboot_feature(self, _):
  44.116 -        """ watch call back on node control/feature-reboot,
  44.117 -            if node changed, this function will be called
  44.118 -        """
  44.119 -        status = self.vm.readDom('control/feature-reboot')
  44.120 -        log.debug("hvm_reboot_feature fired, module status=%s", status)
  44.121 -        if status == '1':
  44.122 -            self.unregister_shutdown_watch()
  44.123 -
  44.124 -        return True # Keep watching
  44.125 -
  44.126  
  44.127  class IA64_HVM_ImageHandler(HVMImageHandler):
  44.128  
    45.1 --- a/tools/python/xen/xend/server/DevController.py	Thu Apr 12 10:26:42 2007 -0600
    45.2 +++ b/tools/python/xen/xend/server/DevController.py	Thu Apr 12 10:30:12 2007 -0600
    45.3 @@ -223,6 +223,7 @@ class DevController:
    45.4                  xstransact.Remove(backpath)
    45.5              xstransact.Remove(frontpath)
    45.6  
    45.7 +        self.vm._removeVm("device/%s/%d" % (self.deviceClass, devid))
    45.8  
    45.9      def configurations(self):
   45.10          return map(self.configuration, self.deviceIDs())
    46.1 --- a/tools/python/xen/xend/server/netif.py	Thu Apr 12 10:26:42 2007 -0600
    46.2 +++ b/tools/python/xen/xend/server/netif.py	Thu Apr 12 10:30:12 2007 -0600
    46.3 @@ -88,46 +88,6 @@ def parseRate(ratestr):
    46.4      return "%lu,%lu" % (bytes_per_interval, interval_usecs)
    46.5  
    46.6  
    46.7 -write_rate_G_re = re.compile('^([0-9]+)000000000(B/s@[0-9]+us)$')
    46.8 -write_rate_M_re = re.compile('^([0-9]+)000000(B/s@[0-9]+us)$')
    46.9 -write_rate_K_re = re.compile('^([0-9]+)000(B/s@[0-9]+us)$')
   46.10 -write_rate_s_re = re.compile('^([0-9]+[GMK]?B/s@[0-9]+)000000us$')
   46.11 -write_rate_m_re = re.compile('^([0-9]+[GMK]?B/s@[0-9]+)000us$')
   46.12 -
   46.13 -def formatRate(rate):
   46.14 -    (bytes_per_interval, interval_usecs) = map(long, rate.split(','))
   46.15 -
   46.16 -    if interval_usecs != 0:
   46.17 -        bytes_per_second = (bytes_per_interval * 1000 * 1000) / interval_usecs
   46.18 -    else:
   46.19 -        bytes_per_second = 0xffffffffL
   46.20 -
   46.21 -    ratestr = "%uB/s@%uus" % (bytes_per_second, interval_usecs)
   46.22 -
   46.23 -    # look for '000's
   46.24 -    m = write_rate_G_re.match(ratestr)
   46.25 -    if m:
   46.26 -        ratestr = m.group(1) + "G" + m.group(2)
   46.27 -    else:
   46.28 -        m = write_rate_M_re.match(ratestr)
   46.29 -        if m:
   46.30 -            ratestr = m.group(1) + "M" + m.group(2)
   46.31 -        else:
   46.32 -            m = write_rate_K_re.match(ratestr)
   46.33 -            if m:
   46.34 -                ratestr = m.group(1) + "K" + m.group(2)
   46.35 -
   46.36 -    m = write_rate_s_re.match(ratestr)
   46.37 -    if m:
   46.38 -        ratestr = m.group(1) + "s"
   46.39 -    else:
   46.40 -        m = write_rate_m_re.match(ratestr)
   46.41 -        if m:
   46.42 -            ratestr = m.group(1) + "ms"
   46.43 -
   46.44 -    return ratestr
   46.45 -
   46.46 -
   46.47  class NetifController(DevController):
   46.48      """Network interface controller. Handles all network devices for a domain.
   46.49      """
   46.50 @@ -138,8 +98,7 @@ class NetifController(DevController):
   46.51      def getDeviceDetails(self, config):
   46.52          """@see DevController.getDeviceDetails"""
   46.53  
   46.54 -        script = os.path.join(xoptions.network_script_dir,
   46.55 -                              config.get('script', xoptions.get_vif_script()))
   46.56 +        script  = config.get('script', xoptions.get_vif_script())
   46.57          typ     = config.get('type')
   46.58          bridge  = config.get('bridge')
   46.59          mac     = config.get('mac')
   46.60 @@ -149,24 +108,17 @@ class NetifController(DevController):
   46.61          ipaddr  = config.get('ip')
   46.62          model   = config.get('model')
   46.63  
   46.64 -        devid = self.allocateDeviceID()
   46.65 -
   46.66          if not typ:
   46.67              typ = xoptions.netback_type
   46.68 -            
   46.69 +
   46.70          if not mac:
   46.71              mac = randomMAC()
   46.72  
   46.73 +        devid = self.allocateDeviceID()
   46.74 +
   46.75          back = { 'script' : script,
   46.76                   'mac'    : mac,
   46.77 -                 'handle' : "%i" % devid,
   46.78                   'type'   : typ }
   46.79 -
   46.80 -        if typ == 'ioemu':
   46.81 -            front = {}
   46.82 -        else:
   46.83 -            front = { 'handle' : "%i" % devid,
   46.84 -                      'mac'    : mac }
   46.85          if ipaddr:
   46.86              back['ip'] = ipaddr
   46.87          if bridge:
   46.88 @@ -174,12 +126,26 @@ class NetifController(DevController):
   46.89          if vifname:
   46.90              back['vifname'] = vifname
   46.91          if rate:
   46.92 -            back['rate'] = parseRate(rate)
   46.93 +            back['rate'] = rate
   46.94          if uuid:
   46.95              back['uuid'] = uuid
   46.96          if model:
   46.97              back['model'] = model
   46.98  
   46.99 +        config_path = "device/%s/%d/" % (self.deviceClass, devid)
  46.100 +        for x in back:
  46.101 +            self.vm._writeVm(config_path + x, back[x])
  46.102 +
  46.103 +        back['handle'] = "%i" % devid
  46.104 +        back['script'] = os.path.join(xoptions.network_script_dir, script)
  46.105 +        if rate:
  46.106 +            back['rate'] = parseRate(rate)
  46.107 +
  46.108 +        front = {}
  46.109 +        if typ != 'ioemu':
  46.110 +            front = { 'handle' : "%i" % devid,
  46.111 +                      'mac'    : mac }
  46.112 +
  46.113          return (devid, back, front)
  46.114  
  46.115  
  46.116 @@ -187,14 +153,17 @@ class NetifController(DevController):
  46.117          """@see DevController.configuration"""
  46.118  
  46.119          result = DevController.getDeviceConfiguration(self, devid)
  46.120 -        devinfo =  self.readBackend(devid, 'script', 'ip', 'bridge',
  46.121 -                                    'mac', 'type', 'vifname', 'rate',
  46.122 -                                    'uuid', 'model')
  46.123 +
  46.124 +        config_path = "device/%s/%d/" % (self.deviceClass, devid)
  46.125 +        devinfo = ()
  46.126 +        for x in ( 'script', 'ip', 'bridge', 'mac',
  46.127 +                   'type', 'vifname', 'rate', 'uuid', 'model' ):
  46.128 +            y = self.vm._readVm(config_path + x)
  46.129 +            devinfo += (y,)
  46.130          (script, ip, bridge, mac, typ, vifname, rate, uuid, model) = devinfo
  46.131  
  46.132          if script:
  46.133 -            network_script_dir = xoptions.network_script_dir + os.sep
  46.134 -            result['script'] = script.replace(network_script_dir, "")
  46.135 +            result['script'] = script
  46.136          if ip:
  46.137              result['ip'] = ip
  46.138          if bridge:
  46.139 @@ -206,11 +175,10 @@ class NetifController(DevController):
  46.140          if vifname:
  46.141              result['vifname'] = vifname
  46.142          if rate:
  46.143 -            result['rate'] = formatRate(rate)
  46.144 +            result['rate'] = rate
  46.145          if uuid:
  46.146              result['uuid'] = uuid
  46.147          if model:
  46.148              result['model'] = model
  46.149              
  46.150          return result
  46.151 -
    47.1 --- a/tools/python/xen/xm/create.dtd	Thu Apr 12 10:26:42 2007 -0600
    47.2 +++ b/tools/python/xen/xm/create.dtd	Thu Apr 12 10:30:12 2007 -0600
    47.3 @@ -95,7 +95,7 @@
    47.4                   src             %URI; #REQUIRED
    47.5                   type            %VDI_TYPE; #REQUIRED
    47.6                   size            CDATA #REQUIRED
    47.7 -                 shareable       CDATA #REQUIRED
    47.8 +                 sharable        CDATA #REQUIRED
    47.9                   read_only       CDATA #REQUIRED>
   47.10  
   47.11  <!ELEMENT name   (label, 
    48.1 --- a/tools/python/xen/xm/create.py	Thu Apr 12 10:26:42 2007 -0600
    48.2 +++ b/tools/python/xen/xm/create.py	Thu Apr 12 10:30:12 2007 -0600
    48.3 @@ -421,6 +421,10 @@ gopts.var('serial', val='FILE',
    48.4            fn=set_value, default='',
    48.5            use="Path to serial or pty or vc")
    48.6  
    48.7 +gopts.var('monitor', val='no|yes',
    48.8 +          fn=set_bool, default=0,
    48.9 +          use="""Should the device model use monitor?""")
   48.10 +
   48.11  gopts.var('localtime', val='no|yes',
   48.12            fn=set_bool, default=0,
   48.13            use="Is RTC set to localtime?")
    49.1 --- a/tools/python/xen/xm/main.py	Thu Apr 12 10:26:42 2007 -0600
    49.2 +++ b/tools/python/xen/xm/main.py	Thu Apr 12 10:30:12 2007 -0600
    49.3 @@ -1544,34 +1544,59 @@ def xm_info(args):
    49.4  
    49.5          host_metrics_record = server.xenapi.host_metrics.get_record(host_record["metrics"])
    49.6  
    49.7 +        def getVal(keys, default=""):
    49.8 +            data = host_record
    49.9 +            for key in keys:
   49.10 +                if key in data:
   49.11 +                    data = data[key]
   49.12 +                else:
   49.13 +                    return default
   49.14 +            return data
   49.15 +
   49.16 +        def getCpuMhz():
   49.17 +            cpu_speeds = [int(host_cpu_record["speed"])
   49.18 +                          for host_cpu_record in host_cpu_records
   49.19 +                          if "speed" in host_cpu_record]
   49.20 +            if len(cpu_speeds) > 0:
   49.21 +                return sum(cpu_speeds) / len(cpu_speeds)
   49.22 +            else:
   49.23 +                return 0
   49.24 +
   49.25 +        getCpuMhz()
   49.26 +
   49.27 +        def getCpuFeatures():
   49.28 +            if len(host_cpu_records) > 0:
   49.29 +                return host_cpu_records[0].get("features", "")
   49.30 +            else:
   49.31 +                return ""
   49.32 +                
   49.33          info = {
   49.34 -            "host":              host_record["name_label"],
   49.35 -            "release":           host_record["software_version"]["release"],
   49.36 -            "version":           host_record["software_version"]["version"],
   49.37 -            "machine":           host_record["software_version"]["machine"],
   49.38 -            "nr_cpus":           len(host_record["host_CPUs"]),
   49.39 -            "nr_nodes":          host_record["cpu_configuration"]["nr_nodes"],
   49.40 -            "sockets_per_node":  host_record["cpu_configuration"]["sockets_per_node"],
   49.41 -            "cores_per_socket":  host_record["cpu_configuration"]["cores_per_socket"],
   49.42 -            "threads_per_core":  host_record["cpu_configuration"]["threads_per_core"],
   49.43 -            "cpu_mhz":           sum([int(host_cpu_record["speed"]) for host_cpu_record in host_cpu_records])
   49.44 -                                   / len(host_cpu_records),
   49.45 -            "hw_caps":           host_cpu_records[0]["features"],
   49.46 +            "host":              getVal(["name_label"]),
   49.47 +            "release":           getVal(["software_version", "release"]),
   49.48 +            "version":           getVal(["software_version", "version"]),
   49.49 +            "machine":           getVal(["software_version", "machine"]),
   49.50 +            "nr_cpus":           len(getVal(["host_CPUs"], [])),
   49.51 +            "nr_nodes":          getVal(["cpu_configuration", "nr_nodes"]),
   49.52 +            "sockets_per_node":  getVal(["cpu_configuration", "sockets_per_node"]),
   49.53 +            "cores_per_socket":  getVal(["cpu_configuration", "cores_per_socket"]),
   49.54 +            "threads_per_core":  getVal(["cpu_configuration", "threads_per_core"]),
   49.55 +            "cpu_mhz":           getCpuMhz(),
   49.56 +            "hw_caps":           getCpuFeatures(),
   49.57              "total_memory":      int(host_metrics_record["memory_total"])/1024/1024,
   49.58              "free_memory":       int(host_metrics_record["memory_free"])/1024/1024,
   49.59 -            "xen_major":         host_record["software_version"]["xen_major"],
   49.60 -            "xen_minor":         host_record["software_version"]["xen_minor"],
   49.61 -            "xen_extra":         host_record["software_version"]["xen_extra"],
   49.62 -            "xen_caps":          " ".join(host_record["capabilities"]),
   49.63 -            "xen_scheduler":     host_record["sched_policy"],
   49.64 -            "xen_pagesize":      host_record["other_config"]["xen_pagesize"],
   49.65 -            "platform_params":   host_record["other_config"]["platform_params"],
   49.66 -            "xen_changeset":     host_record["software_version"]["xen_changeset"],
   49.67 -            "cc_compiler":       host_record["software_version"]["cc_compiler"],
   49.68 -            "cc_compile_by":     host_record["software_version"]["cc_compile_by"],
   49.69 -            "cc_compile_domain": host_record["software_version"]["cc_compile_domain"],
   49.70 -            "cc_compile_date":   host_record["software_version"]["cc_compile_date"],
   49.71 -            "xend_config_format":host_record["software_version"]["xend_config_format"]                                
   49.72 +            "xen_major":         getVal(["software_version", "xen_major"]),
   49.73 +            "xen_minor":         getVal(["software_version", "xen_minor"]),
   49.74 +            "xen_extra":         getVal(["software_version", "xen_extra"]),
   49.75 +            "xen_caps":          " ".join(getVal(["capabilities"], [])),
   49.76 +            "xen_scheduler":     getVal(["sched_policy"]),
   49.77 +            "xen_pagesize":      getVal(["other_config", "xen_pagesize"]),
   49.78 +            "platform_params":   getVal(["other_config", "platform_params"]),
   49.79 +            "xen_changeset":     getVal(["software_version", "xen_changeset"]),
   49.80 +            "cc_compiler":       getVal(["software_version", "cc_compiler"]),
   49.81 +            "cc_compile_by":     getVal(["software_version", "cc_compile_by"]),
   49.82 +            "cc_compile_domain": getVal(["software_version", "cc_compile_domain"]),
   49.83 +            "cc_compile_date":   getVal(["software_version", "cc_compile_date"]),
   49.84 +            "xend_config_format":getVal(["software_version", "xend_config_format"])                                
   49.85          }
   49.86  
   49.87          sorted = info.items()
    50.1 --- a/tools/python/xen/xm/xenapi_create.py	Thu Apr 12 10:26:42 2007 -0600
    50.2 +++ b/tools/python/xen/xm/xenapi_create.py	Thu Apr 12 10:30:12 2007 -0600
    50.3 @@ -212,8 +212,8 @@ class xenapi_create:
    50.4              "SR":               self.DEFAULT_STORAGE_REPOSITORY,  
    50.5              "virtual_size":     vdi.attributes["size"].value,
    50.6              "type":             vdi.attributes["type"].value,
    50.7 -            "shareable":        vdi.attributes["shareable"].value,
    50.8 -            "read_only":        vdi.attributes["read_only"].value,
    50.9 +            "sharable":         bool(vdi.attributes["sharable"].value),
   50.10 +            "read_only":        bool(vdi.attributes["read_only"].value),
   50.11              "other_config":     {"location":
   50.12                  vdi.attributes["src"].value}
   50.13              }
   50.14 @@ -629,10 +629,10 @@ class sxp2xml:
   50.15          vdi.attributes["src"] = src
   50.16          vdi.attributes["read_only"] \
   50.17              = (get_child_by_name(vbd_sxp, "mode") != "w") \
   50.18 -               and "true" or "false"
   50.19 +               and "True" or "False"
   50.20          vdi.attributes["size"] = '-1'
   50.21          vdi.attributes["type"] = "system"
   50.22 -        vdi.attributes["shareable"] = "false"
   50.23 +        vdi.attributes["sharable"] = "False"
   50.24          vdi.attributes["name"] = name
   50.25  
   50.26          vdi.appendChild(self.make_name_tag(name, document))
    51.1 --- a/tools/xcutils/xc_save.c	Thu Apr 12 10:26:42 2007 -0600
    51.2 +++ b/tools/xcutils/xc_save.c	Thu Apr 12 10:30:12 2007 -0600
    51.3 @@ -174,12 +174,9 @@ main(int argc, char **argv)
    51.4      max_f = atoi(argv[4]);
    51.5      flags = atoi(argv[5]);
    51.6  
    51.7 -    if (flags & XCFLAGS_HVM)
    51.8 -        ret = xc_hvm_save(xc_fd, io_fd, domid, maxit, max_f, flags, 
    51.9 -                          &suspend, &init_qemu_maps, &qemu_flip_buffer);
   51.10 -    else 
   51.11 -        ret = xc_linux_save(xc_fd, io_fd, domid, maxit, max_f, flags, 
   51.12 -                            &suspend);
   51.13 +    ret = xc_domain_save(xc_fd, io_fd, domid, maxit, max_f, flags, 
   51.14 +                         &suspend, !!(flags & XCFLAGS_HVM),
   51.15 +                         &init_qemu_maps, &qemu_flip_buffer);
   51.16  
   51.17      xc_interface_close(xc_fd);
   51.18  
    52.1 --- a/tools/xm-test/lib/XmTestLib/NetConfig.py	Thu Apr 12 10:26:42 2007 -0600
    52.2 +++ b/tools/xm-test/lib/XmTestLib/NetConfig.py	Thu Apr 12 10:30:12 2007 -0600
    52.3 @@ -44,7 +44,11 @@ def getXendNetConfig():
    52.4      if not xconfig:
    52.5          xconfig = "/etc/xen/xend-config.sxp"
    52.6  
    52.7 -    configfile = open(xconfig, 'r')
    52.8 +    try:
    52.9 +        configfile = open(xconfig, 'r')
   52.10 +    except:
   52.11 +        return "bridge"
   52.12 +    
   52.13      S = configfile.read()
   52.14      pin = Parser()
   52.15      pin.input(S)
    53.1 --- a/unmodified_drivers/linux-2.6/compat-include/xen/platform-compat.h	Thu Apr 12 10:26:42 2007 -0600
    53.2 +++ b/unmodified_drivers/linux-2.6/compat-include/xen/platform-compat.h	Thu Apr 12 10:30:12 2007 -0600
    53.3 @@ -2,8 +2,8 @@
    53.4  #define COMPAT_INCLUDE_XEN_PLATFORM_COMPAT_H
    53.5  
    53.6  #include <linux/version.h>
    53.7 -
    53.8  #include <linux/spinlock.h>
    53.9 +#include <asm/maddr.h>
   53.10  
   53.11  #if defined(__LINUX_COMPILER_H) && !defined(__always_inline)
   53.12  #define __always_inline inline
   53.13 @@ -98,8 +98,6 @@ extern char *kasprintf(gfp_t gfp, const 
   53.14  
   53.15  #if defined(_PAGE_PRESENT) && !defined(_PAGE_NX)
   53.16  #define _PAGE_NX 0
   53.17 -#endif
   53.18 -
   53.19  /*
   53.20   * This variable at present is referenced by netfront, but only in code that
   53.21   * is dead when running in hvm guests. To detect potential active uses of it
   53.22 @@ -107,5 +105,6 @@ extern char *kasprintf(gfp_t gfp, const 
   53.23   * mappings created with it will fault when accessed.
   53.24   */
   53.25  #define __supported_pte_mask ((maddr_t)0)
   53.26 +#endif
   53.27  
   53.28  #endif
    54.1 --- a/unmodified_drivers/linux-2.6/platform-pci/evtchn.c	Thu Apr 12 10:26:42 2007 -0600
    54.2 +++ b/unmodified_drivers/linux-2.6/platform-pci/evtchn.c	Thu Apr 12 10:30:12 2007 -0600
    54.3 @@ -28,8 +28,10 @@
    54.4   * IN THE SOFTWARE.
    54.5   */
    54.6  
    54.7 +#include <linux/config.h>
    54.8  #include <linux/module.h>
    54.9  #include <linux/kernel.h>
   54.10 +#include <linux/spinlock.h>
   54.11  #include <xen/evtchn.h>
   54.12  #include <xen/interface/hvm/ioreq.h>
   54.13  #include <xen/features.h>
   54.14 @@ -41,29 +43,37 @@
   54.15  
   54.16  void *shared_info_area;
   54.17  
   54.18 -static DEFINE_MUTEX(irq_evtchn_mutex);
   54.19 -
   54.20  #define is_valid_evtchn(x)	((x) != 0)
   54.21  #define evtchn_from_irq(x)	(irq_evtchn[irq].evtchn)
   54.22  
   54.23  static struct {
   54.24 +	spinlock_t lock;
   54.25  	irqreturn_t(*handler) (int, void *, struct pt_regs *);
   54.26  	void *dev_id;
   54.27  	int evtchn;
   54.28  	int close:1; /* close on unbind_from_irqhandler()? */
   54.29  	int inuse:1;
   54.30 +	int in_handler:1;
   54.31  } irq_evtchn[256];
   54.32  static int evtchn_to_irq[NR_EVENT_CHANNELS] = {
   54.33  	[0 ...  NR_EVENT_CHANNELS-1] = -1 };
   54.34  
   54.35 -static int find_unbound_irq(void)
   54.36 +static DEFINE_SPINLOCK(irq_alloc_lock);
   54.37 +
   54.38 +static int alloc_xen_irq(void)
   54.39  {
   54.40  	static int warned;
   54.41  	int irq;
   54.42  
   54.43 -	for (irq = 0; irq < ARRAY_SIZE(irq_evtchn); irq++)
   54.44 -		if (!irq_evtchn[irq].inuse)
   54.45 -			return irq;
   54.46 +	spin_lock(&irq_alloc_lock);
   54.47 +
   54.48 +	for (irq = 1; irq < ARRAY_SIZE(irq_evtchn); irq++) {
   54.49 +		if (irq_evtchn[irq].inuse) 
   54.50 +			continue;
   54.51 +		irq_evtchn[irq].inuse = 1;
   54.52 +		spin_unlock(&irq_alloc_lock);
   54.53 +		return irq;
   54.54 +	}
   54.55  
   54.56  	if (!warned) {
   54.57  		warned = 1;
   54.58 @@ -71,9 +81,18 @@ static int find_unbound_irq(void)
   54.59  		       "increase irq_evtchn[] size in evtchn.c.\n");
   54.60  	}
   54.61  
   54.62 +	spin_unlock(&irq_alloc_lock);
   54.63 +
   54.64  	return -ENOSPC;
   54.65  }
   54.66  
   54.67 +static void free_xen_irq(int irq)
   54.68 +{
   54.69 +	spin_lock(&irq_alloc_lock);
   54.70 +	irq_evtchn[irq].inuse = 0;
   54.71 +	spin_unlock(&irq_alloc_lock);
   54.72 +}
   54.73 +
   54.74  int irq_to_evtchn_port(int irq)
   54.75  {
   54.76  	return irq_evtchn[irq].evtchn;
   54.77 @@ -93,8 +112,7 @@ void unmask_evtchn(int port)
   54.78  	shared_info_t *s = shared_info_area;
   54.79  	vcpu_info_t *vcpu_info;
   54.80  
   54.81 -	preempt_disable();
   54.82 -	cpu = smp_processor_id();
   54.83 +	cpu = get_cpu();
   54.84  	vcpu_info = &s->vcpu_info[cpu];
   54.85  
   54.86  	/* Slow path (hypercall) if this is a non-local port.  We only
   54.87 @@ -103,7 +121,7 @@ void unmask_evtchn(int port)
   54.88  		evtchn_unmask_t op = { .port = port };
   54.89  		(void)HYPERVISOR_event_channel_op(EVTCHNOP_unmask,
   54.90  						  &op);
   54.91 -		preempt_enable();
   54.92 +		put_cpu();
   54.93  		return;
   54.94  	}
   54.95  
   54.96 @@ -121,7 +139,8 @@ void unmask_evtchn(int port)
   54.97  		if (!vcpu_info->evtchn_upcall_mask)
   54.98  			force_evtchn_callback();
   54.99  	}
  54.100 -	preempt_enable();
  54.101 +
  54.102 +	put_cpu();
  54.103  }
  54.104  EXPORT_SYMBOL(unmask_evtchn);
  54.105  
  54.106 @@ -135,20 +154,19 @@ int bind_listening_port_to_irqhandler(
  54.107  	struct evtchn_alloc_unbound alloc_unbound;
  54.108  	int err, irq;
  54.109  
  54.110 -	mutex_lock(&irq_evtchn_mutex);
  54.111 +	irq = alloc_xen_irq();
  54.112 +	if (irq < 0)
  54.113 +		return irq;
  54.114  
  54.115 -	irq = find_unbound_irq();
  54.116 -	if (irq < 0) {
  54.117 -		mutex_unlock(&irq_evtchn_mutex);
  54.118 -		return irq;
  54.119 -	}
  54.120 +	spin_lock_irq(&irq_evtchn[irq].lock);
  54.121  
  54.122  	alloc_unbound.dom        = DOMID_SELF;
  54.123  	alloc_unbound.remote_dom = remote_domain;
  54.124  	err = HYPERVISOR_event_channel_op(EVTCHNOP_alloc_unbound,
  54.125  					  &alloc_unbound);
  54.126  	if (err) {
  54.127 -		mutex_unlock(&irq_evtchn_mutex);
  54.128 +		spin_unlock_irq(&irq_evtchn[irq].lock);
  54.129 +		free_xen_irq(irq);
  54.130  		return err;
  54.131  	}
  54.132  
  54.133 @@ -156,13 +174,13 @@ int bind_listening_port_to_irqhandler(
  54.134  	irq_evtchn[irq].dev_id  = dev_id;
  54.135  	irq_evtchn[irq].evtchn  = alloc_unbound.port;
  54.136  	irq_evtchn[irq].close   = 1;
  54.137 -	irq_evtchn[irq].inuse   = 1;
  54.138  
  54.139  	evtchn_to_irq[alloc_unbound.port] = irq;
  54.140  
  54.141  	unmask_evtchn(alloc_unbound.port);
  54.142  
  54.143 -	mutex_unlock(&irq_evtchn_mutex);
  54.144 +	spin_unlock_irq(&irq_evtchn[irq].lock);
  54.145 +
  54.146  	return irq;
  54.147  }
  54.148  EXPORT_SYMBOL(bind_listening_port_to_irqhandler);
  54.149 @@ -176,34 +194,34 @@ int bind_caller_port_to_irqhandler(
  54.150  {
  54.151  	int irq;
  54.152  
  54.153 -	mutex_lock(&irq_evtchn_mutex);
  54.154 +	irq = alloc_xen_irq();
  54.155 +	if (irq < 0)
  54.156 +		return irq;
  54.157  
  54.158 -	irq = find_unbound_irq();
  54.159 -	if (irq < 0) {
  54.160 -		mutex_unlock(&irq_evtchn_mutex);
  54.161 -		return irq;
  54.162 -	}
  54.163 +	spin_lock_irq(&irq_evtchn[irq].lock);
  54.164  
  54.165  	irq_evtchn[irq].handler = handler;
  54.166  	irq_evtchn[irq].dev_id  = dev_id;
  54.167  	irq_evtchn[irq].evtchn  = caller_port;
  54.168  	irq_evtchn[irq].close   = 0;
  54.169 -	irq_evtchn[irq].inuse   = 1;
  54.170  
  54.171  	evtchn_to_irq[caller_port] = irq;
  54.172  
  54.173  	unmask_evtchn(caller_port);
  54.174  
  54.175 -	mutex_unlock(&irq_evtchn_mutex);
  54.176 +	spin_unlock_irq(&irq_evtchn[irq].lock);
  54.177 +
  54.178  	return irq;
  54.179  }
  54.180  EXPORT_SYMBOL(bind_caller_port_to_irqhandler);
  54.181  
  54.182  void unbind_from_irqhandler(unsigned int irq, void *dev_id)
  54.183  {
  54.184 -	int evtchn = evtchn_from_irq(irq);
  54.185 +	int evtchn;
  54.186  
  54.187 -	mutex_lock(&irq_evtchn_mutex);
  54.188 +	spin_lock_irq(&irq_evtchn[irq].lock);
  54.189 +
  54.190 +	evtchn = evtchn_from_irq(irq);
  54.191  
  54.192  	if (is_valid_evtchn(evtchn)) {
  54.193  		evtchn_to_irq[irq] = -1;
  54.194 @@ -216,21 +234,28 @@ void unbind_from_irqhandler(unsigned int
  54.195  
  54.196  	irq_evtchn[irq].handler = NULL;
  54.197  	irq_evtchn[irq].evtchn  = 0;
  54.198 -	irq_evtchn[irq].inuse   = 0;
  54.199 +
  54.200 +	spin_unlock_irq(&irq_evtchn[irq].lock);
  54.201  
  54.202 -	mutex_unlock(&irq_evtchn_mutex);
  54.203 +	while (irq_evtchn[irq].in_handler)
  54.204 +		cpu_relax();
  54.205 +
  54.206 +	free_xen_irq(irq);
  54.207  }
  54.208  EXPORT_SYMBOL(unbind_from_irqhandler);
  54.209  
  54.210  void notify_remote_via_irq(int irq)
  54.211  {
  54.212 -	int evtchn = evtchn_from_irq(irq);
  54.213 +	int evtchn;
  54.214 +
  54.215 +	evtchn = evtchn_from_irq(irq);
  54.216  	if (is_valid_evtchn(evtchn))
  54.217  		notify_remote_via_evtchn(evtchn);
  54.218  }
  54.219  EXPORT_SYMBOL(notify_remote_via_irq);
  54.220  
  54.221 -irqreturn_t evtchn_interrupt(int irq, void *dev_id, struct pt_regs *regs)
  54.222 +static irqreturn_t evtchn_interrupt(int irq, void *dev_id,
  54.223 +				    struct pt_regs *regs)
  54.224  {
  54.225  	unsigned int l1i, port;
  54.226  	/* XXX: All events are bound to vcpu0 but irq may be redirected. */
  54.227 @@ -249,13 +274,30 @@ irqreturn_t evtchn_interrupt(int irq, vo
  54.228  		while ((l2 = s->evtchn_pending[l1i] & ~s->evtchn_mask[l1i])) {
  54.229  			port = (l1i * BITS_PER_LONG) + __ffs(l2);
  54.230  			synch_clear_bit(port, &s->evtchn_pending[0]);
  54.231 +
  54.232  			irq = evtchn_to_irq[port];
  54.233 -			if ((irq >= 0) &&
  54.234 -			    ((handler = irq_evtchn[irq].handler) != NULL))
  54.235 -				handler(irq, irq_evtchn[irq].dev_id, regs);
  54.236 -			else
  54.237 -				printk(KERN_WARNING "unexpected event channel "
  54.238 -				       "upcall on port %d!\n", port);
  54.239 +			if (irq < 0)
  54.240 +				continue;
  54.241 +
  54.242 +			spin_lock(&irq_evtchn[irq].lock);
  54.243 +			handler = irq_evtchn[irq].handler;
  54.244 +			dev_id  = irq_evtchn[irq].dev_id;
  54.245 +			if (unlikely(handler == NULL)) {
  54.246 +				printk("Xen IRQ%d (port %d) has no handler!\n",
  54.247 +				       irq, port);
  54.248 +				spin_unlock(&irq_evtchn[irq].lock);
  54.249 +				continue;
  54.250 +			}
  54.251 +			irq_evtchn[irq].in_handler = 1;
  54.252 +			spin_unlock(&irq_evtchn[irq].lock);
  54.253 +
  54.254 +			local_irq_enable();
  54.255 +			handler(irq, irq_evtchn[irq].dev_id, regs);
  54.256 +			local_irq_disable();
  54.257 +
  54.258 +			spin_lock(&irq_evtchn[irq].lock);
  54.259 +			irq_evtchn[irq].in_handler = 0;
  54.260 +			spin_unlock(&irq_evtchn[irq].lock);
  54.261  		}
  54.262  	}
  54.263  
  54.264 @@ -268,16 +310,6 @@ void force_evtchn_callback(void)
  54.265  }
  54.266  EXPORT_SYMBOL(force_evtchn_callback);
  54.267  
  54.268 -void irq_suspend(void)
  54.269 -{
  54.270 -	mutex_lock(&irq_evtchn_mutex);
  54.271 -}
  54.272 -
  54.273 -void irq_suspend_cancel(void)
  54.274 -{
  54.275 -	mutex_unlock(&irq_evtchn_mutex);
  54.276 -}
  54.277 -
  54.278  void irq_resume(void)
  54.279  {
  54.280  	int evtchn, irq;
  54.281 @@ -289,6 +321,16 @@ void irq_resume(void)
  54.282  
  54.283  	for (irq = 0; irq < ARRAY_SIZE(irq_evtchn); irq++)
  54.284  		irq_evtchn[irq].evtchn = 0;
  54.285 +}
  54.286  
  54.287 -	mutex_unlock(&irq_evtchn_mutex);
  54.288 +int xen_irq_init(struct pci_dev *pdev)
  54.289 +{
  54.290 +	int irq;
  54.291 +
  54.292 +	for (irq = 0; irq < ARRAY_SIZE(irq_evtchn); irq++)
  54.293 +		spin_lock_init(&irq_evtchn[irq].lock);
  54.294 +
  54.295 +	return request_irq(pdev->irq, evtchn_interrupt,
  54.296 +			   SA_SHIRQ | SA_SAMPLE_RANDOM | SA_INTERRUPT,
  54.297 +			   "xen-platform-pci", pdev);
  54.298  }
    55.1 --- a/unmodified_drivers/linux-2.6/platform-pci/machine_reboot.c	Thu Apr 12 10:26:42 2007 -0600
    55.2 +++ b/unmodified_drivers/linux-2.6/platform-pci/machine_reboot.c	Thu Apr 12 10:30:12 2007 -0600
    55.3 @@ -1,24 +1,101 @@
    55.4  #include <linux/config.h>
    55.5 +#include <linux/stop_machine.h>
    55.6 +#include <xen/evtchn.h>
    55.7 +#include <xen/gnttab.h>
    55.8  #include <xen/xenbus.h>
    55.9  #include "platform-pci.h"
   55.10  #include <asm/hypervisor.h>
   55.11  
   55.12 -int __xen_suspend(int fast_suspend)
   55.13 +struct ap_suspend_info {
   55.14 +	int      do_spin;
   55.15 +	atomic_t nr_spinning;
   55.16 +};
   55.17 +
   55.18 +/*
   55.19 + * Spinning prevents, for example, APs touching grant table entries while
   55.20 + * the shared grant table is not mapped into the address space imemdiately
   55.21 + * after resume.
   55.22 + */
   55.23 +static void ap_suspend(void *_info)
   55.24 +{
   55.25 +	struct ap_suspend_info *info = _info;
   55.26 +
   55.27 +	BUG_ON(!irqs_disabled());
   55.28 +
   55.29 +	atomic_inc(&info->nr_spinning);
   55.30 +	mb();
   55.31 +
   55.32 +	while (info->do_spin) {
   55.33 +		cpu_relax();
   55.34 +		HYPERVISOR_yield();
   55.35 +	}
   55.36 +
   55.37 +	mb();
   55.38 +	atomic_dec(&info->nr_spinning);
   55.39 +}
   55.40 +
   55.41 +static int bp_suspend(void)
   55.42  {
   55.43  	int suspend_cancelled;
   55.44  
   55.45 -	xenbus_suspend();
   55.46 -	platform_pci_suspend();
   55.47 +	BUG_ON(!irqs_disabled());
   55.48  
   55.49  	suspend_cancelled = HYPERVISOR_shutdown(SHUTDOWN_suspend);
   55.50  
   55.51 -	if (suspend_cancelled) {
   55.52 -		platform_pci_suspend_cancel();
   55.53 +	if (!suspend_cancelled) {
   55.54 +		platform_pci_resume();
   55.55 +		gnttab_resume();
   55.56 +		irq_resume();
   55.57 +	}
   55.58 +
   55.59 +	return suspend_cancelled;
   55.60 +}
   55.61 +
   55.62 +int __xen_suspend(int fast_suspend)
   55.63 +{
   55.64 +	int err, suspend_cancelled, nr_cpus;
   55.65 +	struct ap_suspend_info info;
   55.66 +
   55.67 +	xenbus_suspend();
   55.68 +
   55.69 +	preempt_disable();
   55.70 +
   55.71 +	/* Prevent any races with evtchn_interrupt() handler. */
   55.72 +	disable_irq(xen_platform_pdev->irq);
   55.73 +
   55.74 +	info.do_spin = 1;
   55.75 +	atomic_set(&info.nr_spinning, 0);
   55.76 +	smp_mb();
   55.77 +
   55.78 +	nr_cpus = num_online_cpus() - 1;
   55.79 +
   55.80 +	err = smp_call_function(ap_suspend, &info, 0, 0);
   55.81 +	if (err < 0) {
   55.82 +		preempt_enable();
   55.83  		xenbus_suspend_cancel();
   55.84 -	} else {
   55.85 -		platform_pci_resume();
   55.86 +		return err;
   55.87 +	}
   55.88 +
   55.89 +	while (atomic_read(&info.nr_spinning) != nr_cpus)
   55.90 +		cpu_relax();
   55.91 +
   55.92 +	local_irq_disable();
   55.93 +	suspend_cancelled = bp_suspend();
   55.94 +	local_irq_enable();
   55.95 +
   55.96 +	smp_mb();
   55.97 +	info.do_spin = 0;
   55.98 +	while (atomic_read(&info.nr_spinning) != 0)
   55.99 +		cpu_relax();
  55.100 +
  55.101 +	enable_irq(xen_platform_pdev->irq);
  55.102 +
  55.103 +	preempt_enable();
  55.104 +
  55.105 +	if (!suspend_cancelled)
  55.106  		xenbus_resume();
  55.107 -	}
  55.108 +	else
  55.109 +		xenbus_suspend_cancel();
  55.110  
  55.111  	return 0;
  55.112  }
    56.1 --- a/unmodified_drivers/linux-2.6/platform-pci/platform-compat.c	Thu Apr 12 10:26:42 2007 -0600
    56.2 +++ b/unmodified_drivers/linux-2.6/platform-pci/platform-compat.c	Thu Apr 12 10:30:12 2007 -0600
    56.3 @@ -12,11 +12,10 @@ static int system_state = 1;
    56.4  EXPORT_SYMBOL(system_state);
    56.5  #endif
    56.6  
    56.7 -static inline void ctrl_alt_del(void)
    56.8 +void ctrl_alt_del(void)
    56.9  {
   56.10  	kill_proc(1, SIGINT, 1); /* interrupt init */
   56.11  }
   56.12 -EXPORT_SYMBOL(ctrl_alt_del);
   56.13  
   56.14  #if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,8)
   56.15  size_t strcspn(const char *s, const char *reject)
    57.1 --- a/unmodified_drivers/linux-2.6/platform-pci/platform-pci.c	Thu Apr 12 10:26:42 2007 -0600
    57.2 +++ b/unmodified_drivers/linux-2.6/platform-pci/platform-pci.c	Thu Apr 12 10:30:12 2007 -0600
    57.3 @@ -40,7 +40,6 @@
    57.4  #include <xen/interface/hvm/params.h>
    57.5  #include <xen/features.h>
    57.6  #include <xen/evtchn.h>
    57.7 -#include <xen/gnttab.h>
    57.8  #ifdef __ia64__
    57.9  #include <asm/xen/xencomm.h>
   57.10  #endif
   57.11 @@ -62,6 +61,8 @@ MODULE_AUTHOR("ssmith@xensource.com");
   57.12  MODULE_DESCRIPTION("Xen platform PCI device");
   57.13  MODULE_LICENSE("GPL");
   57.14  
   57.15 +struct pci_dev *xen_platform_pdev;
   57.16 +
   57.17  static unsigned long shared_info_frame;
   57.18  static uint64_t callback_via;
   57.19  
   57.20 @@ -89,8 +90,6 @@ static int __devinit init_xen_info(void)
   57.21  	if (shared_info_area == NULL)
   57.22  		panic("can't map shared info\n");
   57.23  
   57.24 -	gnttab_init();
   57.25 -
   57.26  	return 0;
   57.27  }
   57.28  
   57.29 @@ -199,8 +198,10 @@ static int set_callback_via(uint64_t via
   57.30  	return HYPERVISOR_hvm_op(HVMOP_set_param, &a);
   57.31  }
   57.32  
   57.33 +int xen_irq_init(struct pci_dev *pdev);
   57.34  int xenbus_init(void);
   57.35  int xen_reboot_init(void);
   57.36 +int gnttab_init(void);
   57.37  
   57.38  static int __devinit platform_pci_init(struct pci_dev *pdev,
   57.39  				       const struct pci_device_id *ent)
   57.40 @@ -209,6 +210,10 @@ static int __devinit platform_pci_init(s
   57.41  	long ioaddr, iolen;
   57.42  	long mmio_addr, mmio_len;
   57.43  
   57.44 +	if (xen_platform_pdev)
   57.45 +		return -EBUSY;
   57.46 +	xen_platform_pdev = pdev;
   57.47 +
   57.48  	i = pci_enable_device(pdev);
   57.49  	if (i)
   57.50  		return i;
   57.51 @@ -249,9 +254,10 @@ static int __devinit platform_pci_init(s
   57.52  	if ((ret = init_xen_info()))
   57.53  		goto out;
   57.54  
   57.55 -	if ((ret = request_irq(pdev->irq, evtchn_interrupt,
   57.56 -			       SA_SHIRQ | SA_SAMPLE_RANDOM,
   57.57 -			       "xen-platform-pci", pdev)))
   57.58 +	if ((ret = gnttab_init()))
   57.59 +		goto out;
   57.60 +
   57.61 +	if ((ret = xen_irq_init(pdev)))
   57.62  		goto out;
   57.63  
   57.64  	if ((ret = set_callback_via(callback_via)))
   57.65 @@ -292,18 +298,6 @@ static struct pci_driver platform_driver
   57.66  
   57.67  static int pci_device_registered;
   57.68  
   57.69 -void platform_pci_suspend(void)
   57.70 -{
   57.71 -	gnttab_suspend();
   57.72 -	irq_suspend();
   57.73 -}
   57.74 -
   57.75 -void platform_pci_suspend_cancel(void)
   57.76 -{
   57.77 -	irq_suspend_cancel();
   57.78 -	gnttab_resume();
   57.79 -}
   57.80 -
   57.81  void platform_pci_resume(void)
   57.82  {
   57.83  	struct xen_add_to_physmap xatp;
   57.84 @@ -319,12 +313,8 @@ void platform_pci_resume(void)
   57.85  	if (HYPERVISOR_memory_op(XENMEM_add_to_physmap, &xatp))
   57.86  		BUG();
   57.87  
   57.88 -	irq_resume();
   57.89 -
   57.90  	if (set_callback_via(callback_via))
   57.91  		printk("platform_pci_resume failure!\n");
   57.92 -
   57.93 -	gnttab_resume();
   57.94  }
   57.95  
   57.96  static int __init platform_pci_module_init(void)
    58.1 --- a/unmodified_drivers/linux-2.6/platform-pci/platform-pci.h	Thu Apr 12 10:26:42 2007 -0600
    58.2 +++ b/unmodified_drivers/linux-2.6/platform-pci/platform-pci.h	Thu Apr 12 10:30:12 2007 -0600
    58.3 @@ -22,16 +22,11 @@
    58.4  #ifndef _XEN_PLATFORM_PCI_H
    58.5  #define _XEN_PLATFORM_PCI_H
    58.6  
    58.7 -#include <linux/interrupt.h>
    58.8 +#include <linux/pci.h>
    58.9  
   58.10  unsigned long alloc_xen_mmio(unsigned long len);
   58.11 -int gnttab_init(void);
   58.12 -irqreturn_t evtchn_interrupt(int irq, void *dev_id, struct pt_regs *regs);
   58.13 -void irq_suspend(void);
   58.14 -void irq_suspend_cancel(void);
   58.15 -
   58.16 -void platform_pci_suspend(void);
   58.17 -void platform_pci_suspend_cancel(void);
   58.18  void platform_pci_resume(void);
   58.19  
   58.20 +extern struct pci_dev *xen_platform_pdev;
   58.21 +
   58.22  #endif /* _XEN_PLATFORM_PCI_H */