ia64/xen-unstable

changeset 14842:f92a79e39da8

Merge with xenppc-unstable-merge.hg
author kfraser@localhost.localdomain
date Fri Apr 13 11:14:26 2007 +0100 (2007-04-13)
parents 5bda20f0723d fdbbc6aa2cbf
children 52d1022c431a
files tools/libxc/xc_hvm_save.c tools/libxc/xc_linux_save.c
line diff
     1.1 --- a/README	Thu Apr 12 16:37:32 2007 -0500
     1.2 +++ b/README	Fri Apr 13 11:14:26 2007 +0100
     1.3 @@ -199,3 +199,7 @@ Xend (the Xen daemon) has the following 
     1.4      * For optional PAM support, PyPAM:
     1.5            URL:    http://www.pangalactic.org/PyPAM/
     1.6            Debian: python-pam
     1.7 +
     1.8 +    * For optional XenAPI support in XM, PyXML:
     1.9 +          URL:    http://pyxml.sourceforge.net
    1.10 +          YUM:    PyXML
     2.1 --- a/docs/xen-api/xenapi-datamodel.tex	Thu Apr 12 16:37:32 2007 -0500
     2.2 +++ b/docs/xen-api/xenapi-datamodel.tex	Fri Apr 13 11:14:26 2007 +0100
     2.3 @@ -1558,6 +1558,111 @@ void
     2.4  \vspace{0.3cm}
     2.5  \vspace{0.3cm}
     2.6  \vspace{0.3cm}
     2.7 +\subsubsection{RPC name:~add\_to\_VCPUs\_params\_live}
     2.8 +
     2.9 +{\bf Overview:} 
    2.10 +Add the given key-value pair to VM.VCPUs\_params, and apply that value on
    2.11 +the running VM.
    2.12 +
    2.13 + \noindent {\bf Signature:} 
    2.14 +\begin{verbatim} void add_to_VCPUs_params_live (session_id s, VM ref self, string key, string value)\end{verbatim}
    2.15 +
    2.16 +
    2.17 +\noindent{\bf Arguments:}
    2.18 +
    2.19 + 
    2.20 +\vspace{0.3cm}
    2.21 +\begin{tabular}{|c|c|p{7cm}|}
    2.22 + \hline
    2.23 +{\bf type} & {\bf name} & {\bf description} \\ \hline
    2.24 +{\tt VM ref } & self & The VM \\ \hline 
    2.25 +
    2.26 +{\tt string } & key & The key \\ \hline 
    2.27 +
    2.28 +{\tt string } & value & The value \\ \hline 
    2.29 +
    2.30 +\end{tabular}
    2.31 +
    2.32 +\vspace{0.3cm}
    2.33 +
    2.34 + \noindent {\bf Return Type:} 
    2.35 +{\tt 
    2.36 +void
    2.37 +}
    2.38 +
    2.39 +
    2.40 +
    2.41 +\vspace{0.3cm}
    2.42 +\vspace{0.3cm}
    2.43 +\vspace{0.3cm}
    2.44 +\subsubsection{RPC name:~set\_memory\_dynamic\_max\_live}
    2.45 +
    2.46 +{\bf Overview:} 
    2.47 +Set memory\_dynamic\_max in database and on running VM.
    2.48 +
    2.49 + \noindent {\bf Signature:} 
    2.50 +\begin{verbatim} void set_memory_dynamic_max_live (session_id s, VM ref self, int max)\end{verbatim}
    2.51 +
    2.52 +
    2.53 +\noindent{\bf Arguments:}
    2.54 +
    2.55 + 
    2.56 +\vspace{0.3cm}
    2.57 +\begin{tabular}{|c|c|p{7cm}|}
    2.58 + \hline
    2.59 +{\bf type} & {\bf name} & {\bf description} \\ \hline
    2.60 +{\tt VM ref } & self & The VM \\ \hline 
    2.61 +
    2.62 +{\tt int } & max & The memory\_dynamic\_max value \\ \hline 
    2.63 +
    2.64 +\end{tabular}
    2.65 +
    2.66 +\vspace{0.3cm}
    2.67 +
    2.68 + \noindent {\bf Return Type:} 
    2.69 +{\tt 
    2.70 +void
    2.71 +}
    2.72 +
    2.73 +
    2.74 +
    2.75 +\vspace{0.3cm}
    2.76 +\vspace{0.3cm}
    2.77 +\vspace{0.3cm}
    2.78 +\subsubsection{RPC name:~set\_memory\_dynamic\_min\_live}
    2.79 +
    2.80 +{\bf Overview:} 
    2.81 +Set memory\_dynamic\_min in database and on running VM.
    2.82 +
    2.83 + \noindent {\bf Signature:} 
    2.84 +\begin{verbatim} void set_memory_dynamic_min_live (session_id s, VM ref self, int min)\end{verbatim}
    2.85 +
    2.86 +
    2.87 +\noindent{\bf Arguments:}
    2.88 +
    2.89 + 
    2.90 +\vspace{0.3cm}
    2.91 +\begin{tabular}{|c|c|p{7cm}|}
    2.92 + \hline
    2.93 +{\bf type} & {\bf name} & {\bf description} \\ \hline
    2.94 +{\tt VM ref } & self & The VM \\ \hline 
    2.95 +
    2.96 +{\tt int } & min & The memory\_dynamic\_min value \\ \hline 
    2.97 +
    2.98 +\end{tabular}
    2.99 +
   2.100 +\vspace{0.3cm}
   2.101 +
   2.102 + \noindent {\bf Return Type:} 
   2.103 +{\tt 
   2.104 +void
   2.105 +}
   2.106 +
   2.107 +
   2.108 +
   2.109 +\vspace{0.3cm}
   2.110 +\vspace{0.3cm}
   2.111 +\vspace{0.3cm}
   2.112  \subsubsection{RPC name:~send\_sysrq}
   2.113  
   2.114  {\bf Overview:} 
   2.115 @@ -4184,6 +4289,7 @@ Quals & Field & Type & Description \\
   2.116  $\mathit{RO}_\mathit{run}$ &  {\tt VCPUs/utilisation} & (int $\rightarrow$ float) Map & Utilisation for all of guest's current VCPUs \\
   2.117  $\mathit{RO}_\mathit{run}$ &  {\tt VCPUs/CPU} & (int $\rightarrow$ int) Map & VCPU to PCPU map \\
   2.118  $\mathit{RO}_\mathit{run}$ &  {\tt VCPUs/params} & (string $\rightarrow$ string) Map & The live equivalent to VM.VCPUs\_params \\
   2.119 +$\mathit{RO}_\mathit{run}$ &  {\tt state} & string Set & The state of the guest, eg blocked, dying etc \\
   2.120  $\mathit{RO}_\mathit{run}$ &  {\tt start\_time} & datetime & Time at which this VM was last booted \\
   2.121  $\mathit{RO}_\mathit{run}$ &  {\tt last\_updated} & datetime & Time at which this information was last updated \\
   2.122  \hline
   2.123 @@ -4402,6 +4508,38 @@ value of the field
   2.124  \vspace{0.3cm}
   2.125  \vspace{0.3cm}
   2.126  \vspace{0.3cm}
   2.127 +\subsubsection{RPC name:~get\_state}
   2.128 +
   2.129 +{\bf Overview:} 
   2.130 +Get the state field of the given VM\_metrics.
   2.131 +
   2.132 + \noindent {\bf Signature:} 
   2.133 +\begin{verbatim} (string Set) get_state (session_id s, VM_metrics ref self)\end{verbatim}
   2.134 +
   2.135 +
   2.136 +\noindent{\bf Arguments:}
   2.137 +
   2.138 + 
   2.139 +\vspace{0.3cm}
   2.140 +\begin{tabular}{|c|c|p{7cm}|}
   2.141 + \hline
   2.142 +{\bf type} & {\bf name} & {\bf description} \\ \hline
   2.143 +{\tt VM\_metrics ref } & self & reference to the object \\ \hline 
   2.144 +
   2.145 +\end{tabular}
   2.146 +
   2.147 +\vspace{0.3cm}
   2.148 +
   2.149 + \noindent {\bf Return Type:} 
   2.150 +{\tt 
   2.151 +string Set
   2.152 +}
   2.153 +
   2.154 +
   2.155 +value of the field
   2.156 +\vspace{0.3cm}
   2.157 +\vspace{0.3cm}
   2.158 +\vspace{0.3cm}
   2.159  \subsubsection{RPC name:~get\_start\_time}
   2.160  
   2.161  {\bf Overview:} 
   2.162 @@ -6601,7 +6739,8 @@ Quals & Field & Type & Description \\
   2.163  $\mathit{RO}_\mathit{run}$ &  {\tt speed} & int & the speed of the physical CPU \\
   2.164  $\mathit{RO}_\mathit{run}$ &  {\tt modelname} & string & the model name of the physical CPU \\
   2.165  $\mathit{RO}_\mathit{run}$ &  {\tt stepping} & string & the stepping of the physical CPU \\
   2.166 -$\mathit{RO}_\mathit{run}$ &  {\tt flags} & string & the flags of the physical CPU \\
   2.167 +$\mathit{RO}_\mathit{run}$ &  {\tt flags} & string & the flags of the physical CPU (a decoded version of the features field) \\
   2.168 +$\mathit{RO}_\mathit{run}$ &  {\tt features} & string & the physical CPU feature bitmap \\
   2.169  $\mathit{RO}_\mathit{run}$ &  {\tt utilisation} & float & the current CPU utilisation \\
   2.170  \hline
   2.171  \end{longtable}
   2.172 @@ -6883,6 +7022,38 @@ value of the field
   2.173  \vspace{0.3cm}
   2.174  \vspace{0.3cm}
   2.175  \vspace{0.3cm}
   2.176 +\subsubsection{RPC name:~get\_features}
   2.177 +
   2.178 +{\bf Overview:} 
   2.179 +Get the features field of the given host\_cpu.
   2.180 +
   2.181 + \noindent {\bf Signature:} 
   2.182 +\begin{verbatim} string get_features (session_id s, host_cpu ref self)\end{verbatim}
   2.183 +
   2.184 +
   2.185 +\noindent{\bf Arguments:}
   2.186 +
   2.187 + 
   2.188 +\vspace{0.3cm}
   2.189 +\begin{tabular}{|c|c|p{7cm}|}
   2.190 + \hline
   2.191 +{\bf type} & {\bf name} & {\bf description} \\ \hline
   2.192 +{\tt host\_cpu ref } & self & reference to the object \\ \hline 
   2.193 +
   2.194 +\end{tabular}
   2.195 +
   2.196 +\vspace{0.3cm}
   2.197 +
   2.198 + \noindent {\bf Return Type:} 
   2.199 +{\tt 
   2.200 +string
   2.201 +}
   2.202 +
   2.203 +
   2.204 +value of the field
   2.205 +\vspace{0.3cm}
   2.206 +\vspace{0.3cm}
   2.207 +\vspace{0.3cm}
   2.208  \subsubsection{RPC name:~get\_utilisation}
   2.209  
   2.210  {\bf Overview:} 
     3.1 --- a/linux-2.6-xen-sparse/arch/ia64/kernel/asm-offsets.c	Thu Apr 12 16:37:32 2007 -0500
     3.2 +++ b/linux-2.6-xen-sparse/arch/ia64/kernel/asm-offsets.c	Fri Apr 13 11:14:26 2007 +0100
     3.3 @@ -290,5 +290,7 @@ void foo(void)
     3.4  	DEFINE_MAPPED_REG_OFS(XSI_BANKNUM_OFS, banknum);
     3.5  	DEFINE_MAPPED_REG_OFS(XSI_BANK0_R16_OFS, bank0_regs[0]);
     3.6  	DEFINE_MAPPED_REG_OFS(XSI_BANK1_R16_OFS, bank1_regs[0]);
     3.7 +	DEFINE_MAPPED_REG_OFS(XSI_B0NATS_OFS, vbnat);
     3.8 +	DEFINE_MAPPED_REG_OFS(XSI_B1NATS_OFS, vnat);    
     3.9  #endif /* CONFIG_XEN */
    3.10  }
     4.1 --- a/linux-2.6-xen-sparse/arch/ia64/kernel/setup.c	Thu Apr 12 16:37:32 2007 -0500
     4.2 +++ b/linux-2.6-xen-sparse/arch/ia64/kernel/setup.c	Fri Apr 13 11:14:26 2007 +0100
     4.3 @@ -594,6 +594,10 @@ setup_arch (char **cmdline_p)
     4.4  
     4.5  
     4.6  	/* enable IA-64 Machine Check Abort Handling unless disabled */
     4.7 +#ifdef CONFIG_XEN
     4.8 +	if (is_running_on_xen() && !is_initial_xendomain())
     4.9 +		nomca = 1;
    4.10 +#endif
    4.11  	if (!nomca)
    4.12  		ia64_mca_init();
    4.13  
     5.1 --- a/linux-2.6-xen-sparse/arch/ia64/xen/hypervisor.c	Thu Apr 12 16:37:32 2007 -0500
     5.2 +++ b/linux-2.6-xen-sparse/arch/ia64/xen/hypervisor.c	Fri Apr 13 11:14:26 2007 +0100
     5.3 @@ -852,6 +852,9 @@ time_resume(void)
     5.4  
     5.5  	/* Just trigger a tick.  */
     5.6  	ia64_cpu_local_tick();
     5.7 +
     5.8 +	/* Time interpolator remembers the last timer status.  Forget it */
     5.9 +	time_interpolator_reset();
    5.10  }
    5.11  
    5.12  ///////////////////////////////////////////////////////////////////////////
     6.1 --- a/linux-2.6-xen-sparse/arch/ia64/xen/xcom_mini.c	Thu Apr 12 16:37:32 2007 -0500
     6.2 +++ b/linux-2.6-xen-sparse/arch/ia64/xen/xcom_mini.c	Fri Apr 13 11:14:26 2007 +0100
     6.3 @@ -418,3 +418,39 @@ xencomm_mini_hypercall_perfmon_op(unsign
     6.4  	return xencomm_arch_hypercall_perfmon_op(cmd, desc, count);
     6.5  }
     6.6  EXPORT_SYMBOL_GPL(xencomm_mini_hypercall_perfmon_op);
     6.7 +
     6.8 +int
     6.9 +xencomm_mini_hypercall_sched_op(int cmd, void *arg)
    6.10 +{
    6.11 +	int rc, nbr_area = 2;
    6.12 +	struct xencomm_mini xc_area[2];
    6.13 +	struct xencomm_handle *desc;
    6.14 +	unsigned int argsize;
    6.15 +
    6.16 +	switch (cmd) {
    6.17 +	case SCHEDOP_yield:
    6.18 +	case SCHEDOP_block:
    6.19 +		argsize = 0;
    6.20 +		break;
    6.21 +	case SCHEDOP_shutdown:
    6.22 +		argsize = sizeof(sched_shutdown_t);
    6.23 +		break;
    6.24 +	case SCHEDOP_poll:
    6.25 +		argsize = sizeof(sched_poll_t);
    6.26 +		break;
    6.27 +	case SCHEDOP_remote_shutdown:
    6.28 +		argsize = sizeof(sched_remote_shutdown_t);
    6.29 +		break;
    6.30 +
    6.31 +	default:
    6.32 +		printk("%s: unknown sched op %d\n", __func__, cmd);
    6.33 +		return -ENOSYS;
    6.34 +	}
    6.35 +
    6.36 +	rc = xencomm_create_mini(xc_area, &nbr_area, arg, argsize, &desc);
    6.37 +	if (rc)
    6.38 +		return rc;
    6.39 +
    6.40 +	return xencomm_arch_hypercall_sched_op(cmd, desc);
    6.41 +}
    6.42 +EXPORT_SYMBOL_GPL(xencomm_mini_hypercall_sched_op);
     7.1 --- a/linux-2.6-xen-sparse/arch/ia64/xen/xenentry.S	Thu Apr 12 16:37:32 2007 -0500
     7.2 +++ b/linux-2.6-xen-sparse/arch/ia64/xen/xenentry.S	Fri Apr 13 11:14:26 2007 +0100
     7.3 @@ -614,6 +614,7 @@ GLOBAL_ENTRY(ia64_leave_kernel)
     7.4  #ifdef CONFIG_XEN
     7.5  	;;
     7.6  	// r16-r31 all now hold bank1 values
     7.7 +	mov r15=ar.unat
     7.8  	movl r2=XSI_BANK1_R16
     7.9  	movl r3=XSI_BANK1_R16+8
    7.10  	;;
    7.11 @@ -641,6 +642,11 @@ GLOBAL_ENTRY(ia64_leave_kernel)
    7.12  .mem.offset 0,0; st8.spill [r2]=r30,16
    7.13  .mem.offset 8,0; st8.spill [r3]=r31,16
    7.14  	;;
    7.15 +	mov r3=ar.unat
    7.16 +	movl r2=XSI_B1NAT
    7.17 +	;;
    7.18 +	st8 [r2]=r3
    7.19 +	mov ar.unat=r15
    7.20  	movl r2=XSI_BANKNUM;;
    7.21  	st4 [r2]=r0;
    7.22  #else
     8.1 --- a/linux-2.6-xen-sparse/arch/ia64/xen/xenivt.S	Thu Apr 12 16:37:32 2007 -0500
     8.2 +++ b/linux-2.6-xen-sparse/arch/ia64/xen/xenivt.S	Fri Apr 13 11:14:26 2007 +0100
     8.3 @@ -2013,33 +2013,6 @@ END(ia32_interrupt)
     8.4  	DBG_FAULT(66)
     8.5  	FAULT(66)
     8.6  
     8.7 -#ifdef CONFIG_XEN
     8.8 -	/*
     8.9 -	 * There is no particular reason for this code to be here, other than that
    8.10 -	 * there happens to be space here that would go unused otherwise.  If this
    8.11 -	 * fault ever gets "unreserved", simply moved the following code to a more
    8.12 -	 * suitable spot...
    8.13 -	 */
    8.14 -
    8.15 -GLOBAL_ENTRY(xen_bsw1)
    8.16 -	/* FIXME: THIS CODE IS NOT NaT SAFE! */
    8.17 -	movl r30=XSI_BANKNUM;
    8.18 -	mov r31=1;;
    8.19 -	st4 [r30]=r31;
    8.20 -	movl r30=XSI_BANK1_R16;
    8.21 -	movl r31=XSI_BANK1_R16+8;;
    8.22 -	ld8 r16=[r30],16; ld8 r17=[r31],16;;
    8.23 -	ld8 r18=[r30],16; ld8 r19=[r31],16;;
    8.24 -	ld8 r20=[r30],16; ld8 r21=[r31],16;;
    8.25 -	ld8 r22=[r30],16; ld8 r23=[r31],16;;
    8.26 -	ld8 r24=[r30],16; ld8 r25=[r31],16;;
    8.27 -	ld8 r26=[r30],16; ld8 r27=[r31],16;;
    8.28 -	ld8 r28=[r30],16; ld8 r29=[r31],16;;
    8.29 -	ld8 r30=[r30]; ld8 r31=[r31];;
    8.30 -	br.ret.sptk.many b0
    8.31 -END(xen_bsw1)
    8.32 -#endif
    8.33 -
    8.34  	.org ia64_ivt+0x7f00
    8.35  /////////////////////////////////////////////////////////////////////////////////////////
    8.36  // 0x7f00 Entry 67 (size 16 bundles) Reserved
    8.37 @@ -2167,4 +2140,38 @@ 1:
    8.38  	(p6) br.spnt.few 1b	// call evtchn_do_upcall again.
    8.39  	br.sptk.many ia64_leave_kernel   
    8.40  END(xen_event_callback)
    8.41 +
    8.42 +
    8.43 +	/*
    8.44 +	 * There is no particular reason for this code to be here, other than that
    8.45 +	 * there happens to be space here that would go unused otherwise.  If this
    8.46 +	 * fault ever gets "unreserved", simply moved the following code to a more
    8.47 +	 * suitable spot...
    8.48 +	 */
    8.49 +
    8.50 +GLOBAL_ENTRY(xen_bsw1)
    8.51 +	/* FIXME: THIS CODE IS NOT NaT SAFE! */
    8.52 +	mov r14=ar.unat
    8.53 +	movl r30=XSI_B1NAT
    8.54 +	;;
    8.55 +	ld8 r30=[r30];;
    8.56 +	mov ar.unat=r30
    8.57 +	movl r30=XSI_BANKNUM;
    8.58 +	mov r31=1;;
    8.59 +	st4 [r30]=r31;
    8.60 +	movl r30=XSI_BANK1_R16;
    8.61 +	movl r31=XSI_BANK1_R16+8;;
    8.62 +	ld8.fill r16=[r30],16; ld8.fill r17=[r31],16;;
    8.63 +	ld8.fill r18=[r30],16; ld8.fill r19=[r31],16;;
    8.64 +	ld8.fill r20=[r30],16; ld8.fill r21=[r31],16;;
    8.65 +	ld8.fill r22=[r30],16; ld8.fill r23=[r31],16;;
    8.66 +	ld8.fill r24=[r30],16; ld8.fill r25=[r31],16;;
    8.67 +	ld8.fill r26=[r30],16; ld8.fill r27=[r31],16;;
    8.68 +	ld8.fill r28=[r30],16; ld8.fill r29=[r31],16;;
    8.69 +	ld8.fill r30=[r30]; ld8.fill r31=[r31];;
    8.70 +	mov ar.unat=r14
    8.71 +	br.ret.sptk.many b0
    8.72 +END(xen_bsw1)
    8.73 +
    8.74 +   
    8.75  #endif
     9.1 --- a/linux-2.6-xen-sparse/drivers/xen/core/reboot.c	Thu Apr 12 16:37:32 2007 -0500
     9.2 +++ b/linux-2.6-xen-sparse/drivers/xen/core/reboot.c	Fri Apr 13 11:14:26 2007 +0100
     9.3 @@ -118,6 +118,7 @@ static void shutdown_handler(struct xenb
     9.4  	err = xenbus_transaction_start(&xbt);
     9.5  	if (err)
     9.6  		return;
     9.7 +
     9.8  	str = (char *)xenbus_read(xbt, "control", "shutdown", NULL);
     9.9  	/* Ignore read errors and empty reads. */
    9.10  	if (XENBUS_IS_ERR_READ(str)) {
    9.11 @@ -206,14 +207,12 @@ static int setup_shutdown_watcher(void)
    9.12  		printk(KERN_ERR "Failed to set shutdown watcher\n");
    9.13  		return err;
    9.14  	}
    9.15 -	xenbus_write(XBT_NIL, "control", "feature-reboot", "1");
    9.16  
    9.17  	err = register_xenbus_watch(&sysrq_watch);
    9.18  	if (err) {
    9.19  		printk(KERN_ERR "Failed to set sysrq watcher\n");
    9.20  		return err;
    9.21  	}
    9.22 -	xenbus_write(XBT_NIL, "control", "feature-sysrq", "1");
    9.23  
    9.24  	return 0;
    9.25  }
    10.1 --- a/linux-2.6-xen-sparse/include/asm-i386/mach-xen/asm/pgtable.h	Thu Apr 12 16:37:32 2007 -0500
    10.2 +++ b/linux-2.6-xen-sparse/include/asm-i386/mach-xen/asm/pgtable.h	Fri Apr 13 11:14:26 2007 +0100
    10.3 @@ -210,7 +210,7 @@ extern unsigned long pg0[];
    10.4  
    10.5  /* To avoid harmful races, pmd_none(x) should check only the lower when PAE */
    10.6  #define pmd_none(x)	(!(unsigned long)pmd_val(x))
    10.7 -#ifdef CONFIG_XEN_COMPAT_030002
    10.8 +#if CONFIG_XEN_COMPAT <= 0x030002
    10.9  /* pmd_present doesn't just test the _PAGE_PRESENT bit since wr.p.t.
   10.10     can temporarily clear it. */
   10.11  #define pmd_present(x)	(pmd_val(x))
    11.1 --- a/linux-2.6-xen-sparse/include/asm-ia64/hypervisor.h	Thu Apr 12 16:37:32 2007 -0500
    11.2 +++ b/linux-2.6-xen-sparse/include/asm-ia64/hypervisor.h	Fri Apr 13 11:14:26 2007 +0100
    11.3 @@ -64,7 +64,6 @@ extern start_info_t *xen_start_info;
    11.4  
    11.5  void force_evtchn_callback(void);
    11.6  
    11.7 -#ifndef CONFIG_VMX_GUEST
    11.8  /* Turn jiffies into Xen system time. XXX Implement me. */
    11.9  #define jiffies_to_st(j)	0
   11.10  
   11.11 @@ -116,6 +115,7 @@ HYPERVISOR_poll(
   11.12  	return rc;
   11.13  }
   11.14  
   11.15 +#ifndef CONFIG_VMX_GUEST
   11.16  // for drivers/xen/privcmd/privcmd.c
   11.17  #define machine_to_phys_mapping 0
   11.18  struct vm_area_struct;
    12.1 --- a/linux-2.6-xen-sparse/include/asm-ia64/xen/privop.h	Thu Apr 12 16:37:32 2007 -0500
    12.2 +++ b/linux-2.6-xen-sparse/include/asm-ia64/xen/privop.h	Fri Apr 13 11:14:26 2007 +0100
    12.3 @@ -57,6 +57,7 @@
    12.4  #define XSI_PSR_IC		(XSI_BASE + XSI_PSR_IC_OFS)
    12.5  #define XSI_IPSR		(XSI_BASE + XSI_IPSR_OFS)
    12.6  #define XSI_IIP			(XSI_BASE + XSI_IIP_OFS)
    12.7 +#define XSI_B1NAT		(XSI_BASE + XSI_B1NATS_OFS)
    12.8  #define XSI_BANK1_R16		(XSI_BASE + XSI_BANK1_R16_OFS)
    12.9  #define XSI_BANKNUM		(XSI_BASE + XSI_BANKNUM_OFS)
   12.10  #define XSI_IHA			(XSI_BASE + XSI_IHA_OFS)
    13.1 --- a/linux-2.6-xen-sparse/include/asm-x86_64/mach-xen/asm/pgtable.h	Thu Apr 12 16:37:32 2007 -0500
    13.2 +++ b/linux-2.6-xen-sparse/include/asm-x86_64/mach-xen/asm/pgtable.h	Fri Apr 13 11:14:26 2007 +0100
    13.3 @@ -411,7 +411,7 @@ static inline int pmd_large(pmd_t pte) {
    13.4  #define pmd_offset(dir, address) ((pmd_t *) pud_page(*(dir)) + \
    13.5                                    pmd_index(address))
    13.6  #define pmd_none(x)	(!pmd_val(x))
    13.7 -#ifdef CONFIG_XEN_COMPAT_030002
    13.8 +#if CONFIG_XEN_COMPAT <= 0x030002
    13.9  /* pmd_present doesn't just test the _PAGE_PRESENT bit since wr.p.t.
   13.10     can temporarily clear it. */
   13.11  #define pmd_present(x)	(pmd_val(x))
    14.1 --- a/tools/examples/network-bridge	Thu Apr 12 16:37:32 2007 -0500
    14.2 +++ b/tools/examples/network-bridge	Fri Apr 13 11:14:26 2007 +0100
    14.3 @@ -183,12 +183,12 @@ op_start () {
    14.4  	return
    14.5      fi
    14.6  
    14.7 -    if ! link_exists "$vdev"; then
    14.8 -        if link_exists "$pdev"; then
    14.9 -            # The device is already up.
   14.10 -            return
   14.11 -        else
   14.12 -            echo "
   14.13 +    if link_exists "$pdev"; then
   14.14 +	# The device is already up.
   14.15 +	return
   14.16 +    fi
   14.17 +    if link_exists veth0 && ! link_exists "$vdev"; then
   14.18 +	echo "
   14.19  Link $vdev is missing.
   14.20  This may be because you have reached the limit of the number of interfaces
   14.21  that the loopback driver supports.  If the loopback driver is a module, you
   14.22 @@ -196,8 +196,7 @@ may raise this limit by passing it as a 
   14.23  driver is compiled statically into the kernel, then you may set the parameter
   14.24  using netloop.nloopbacks=<N> on the domain 0 kernel command line.
   14.25  " >&2
   14.26 -            exit 1
   14.27 -        fi
   14.28 +	exit 1
   14.29      fi
   14.30  
   14.31      create_bridge ${bridge}
   14.32 @@ -224,9 +223,13 @@ using netloop.nloopbacks=<N> on the doma
   14.33  	add_to_bridge2 ${bridge} ${pdev}
   14.34  	do_ifup ${netdev}
   14.35      else
   14.36 +	ip link set ${bridge} arp on
   14.37 +	ip link set ${bridge} multicast on
   14.38  	# old style without ${vdev}
   14.39  	transfer_addrs  ${netdev} ${bridge}
   14.40  	transfer_routes ${netdev} ${bridge}
   14.41 +	# Attach the real interface to the bridge.
   14.42 +	add_to_bridge ${bridge} ${netdev}
   14.43      fi
   14.44  
   14.45      if [ ${antispoof} = 'yes' ] ; then
    15.1 --- a/tools/examples/xmexample.hvm	Thu Apr 12 16:37:32 2007 -0500
    15.2 +++ b/tools/examples/xmexample.hvm	Fri Apr 13 11:14:26 2007 +0100
    15.3 @@ -170,6 +170,12 @@ serial='pty'
    15.4  
    15.5  
    15.6  #-----------------------------------------------------------------------------
    15.7 +#   Qemu Monitor, default is disable
    15.8 +#   Use ctrl-alt-2 to connect
    15.9 +#monitor=1
   15.10 +
   15.11 +
   15.12 +#-----------------------------------------------------------------------------
   15.13  #   enable sound card support, [sb16|es1370|all|..,..], default none
   15.14  #soundhw='sb16'
   15.15  
    16.1 --- a/tools/examples/xmexample.vti	Thu Apr 12 16:37:32 2007 -0500
    16.2 +++ b/tools/examples/xmexample.vti	Fri Apr 13 11:14:26 2007 +0100
    16.3 @@ -113,6 +113,11 @@ stdvga=0
    16.4  serial='pty'
    16.5  
    16.6  #-----------------------------------------------------------------------------
    16.7 +#   Qemu Monitor, default is disable
    16.8 +#   Use ctrl-alt-2 to connect
    16.9 +#monitor=1
   16.10 +
   16.11 +#-----------------------------------------------------------------------------
   16.12  #   enable sound card support, [sb16|es1370|all|..,..], default none
   16.13  #soundhw='sb16'
   16.14  
    17.1 --- a/tools/ioemu/hw/pc.c	Thu Apr 12 16:37:32 2007 -0500
    17.2 +++ b/tools/ioemu/hw/pc.c	Fri Apr 13 11:14:26 2007 +0100
    17.3 @@ -902,7 +902,6 @@ static void pc_init1(uint64_t ram_size, 
    17.4      if (pci_enabled && acpi_enabled) {
    17.5          piix4_pm_init(pci_bus, piix3_devfn + 3);
    17.6      }
    17.7 -#endif /* !CONFIG_DM */
    17.8  
    17.9  #if 0
   17.10      /* ??? Need to figure out some way for the user to
   17.11 @@ -921,6 +920,17 @@ static void pc_init1(uint64_t ram_size, 
   17.12          lsi_scsi_attach(scsi, bdrv, -1);
   17.13      }
   17.14  #endif
   17.15 +#else
   17.16 +    if (pci_enabled) {
   17.17 +        void *scsi;
   17.18 +
   17.19 +        scsi = lsi_scsi_init(pci_bus, -1);
   17.20 +        for (i = 0; i < MAX_SCSI_DISKS ; i++) {
   17.21 +            if (bs_table[i + MAX_DISKS]) 
   17.22 +                lsi_scsi_attach(scsi, bs_table[i + MAX_DISKS], -1);
   17.23 +        }
   17.24 +    }
   17.25 +#endif /* !CONFIG_DM */
   17.26      /* must be done after all PCI devices are instanciated */
   17.27      /* XXX: should be done in the Bochs BIOS */
   17.28      if (pci_enabled) {
    18.1 --- a/tools/ioemu/monitor.c	Thu Apr 12 16:37:32 2007 -0500
    18.2 +++ b/tools/ioemu/monitor.c	Fri Apr 13 11:14:26 2007 +0100
    18.3 @@ -180,7 +180,7 @@ static void do_commit(void)
    18.4  {
    18.5      int i;
    18.6  
    18.7 -    for (i = 0; i < MAX_DISKS; i++) {
    18.8 +    for (i = 0; i < MAX_DISKS + MAX_SCSI_DISKS; i++) {
    18.9          if (bs_table[i]) {
   18.10              bdrv_commit(bs_table[i]);
   18.11          }
    19.1 --- a/tools/ioemu/vl.c	Thu Apr 12 16:37:32 2007 -0500
    19.2 +++ b/tools/ioemu/vl.c	Fri Apr 13 11:14:26 2007 +0100
    19.3 @@ -116,7 +116,7 @@ char phys_ram_file[1024];
    19.4  void *ioport_opaque[MAX_IOPORTS];
    19.5  IOPortReadFunc *ioport_read_table[3][MAX_IOPORTS];
    19.6  IOPortWriteFunc *ioport_write_table[3][MAX_IOPORTS];
    19.7 -BlockDriverState *bs_table[MAX_DISKS], *fd_table[MAX_FD];
    19.8 +BlockDriverState *bs_table[MAX_DISKS + MAX_SCSI_DISKS], *fd_table[MAX_FD];
    19.9  int vga_ram_size;
   19.10  int bios_size;
   19.11  static DisplayState display_state;
   19.12 @@ -1396,7 +1396,7 @@ static void stdio_received_byte(int ch)
   19.13          case 's': 
   19.14              {
   19.15                  int i;
   19.16 -                for (i = 0; i < MAX_DISKS; i++) {
   19.17 +                for (i = 0; i < MAX_DISKS + MAX_SCSI_DISKS; i++) {
   19.18                      if (bs_table[i])
   19.19                          bdrv_commit(bs_table[i]);
   19.20                  }
   19.21 @@ -6057,7 +6057,7 @@ int main(int argc, char **argv)
   19.22      int snapshot, linux_boot;
   19.23      const char *initrd_filename;
   19.24  #ifndef CONFIG_DM
   19.25 -    const char *hd_filename[MAX_DISKS];
   19.26 +    const char *hd_filename[MAX_DISKS + MAX_SCSI_DISKS];
   19.27  #endif /* !CONFIG_DM */
   19.28      const char *fd_filename[MAX_FD];
   19.29      const char *kernel_filename, *kernel_cmdline;
   19.30 @@ -6126,7 +6126,7 @@ int main(int argc, char **argv)
   19.31      for(i = 0; i < MAX_FD; i++)
   19.32          fd_filename[i] = NULL;
   19.33  #ifndef CONFIG_DM
   19.34 -    for(i = 0; i < MAX_DISKS; i++)
   19.35 +    for(i = 0; i < MAX_DISKS + MAX_SCSI_DISKS; i++)
   19.36          hd_filename[i] = NULL;
   19.37  #endif /* !CONFIG_DM */
   19.38      ram_size = DEFAULT_RAM_SIZE * 1024 * 1024;
   19.39 @@ -6724,7 +6724,7 @@ int main(int argc, char **argv)
   19.40      }
   19.41  
   19.42      /* open the virtual block devices */
   19.43 -    for(i = 0; i < MAX_DISKS; i++) {
   19.44 +    for(i = 0; i < MAX_DISKS + MAX_SCSI_DISKS; i++) {
   19.45          if (hd_filename[i]) {
   19.46              if (!bs_table[i]) {
   19.47                  char buf[64];
    20.1 --- a/tools/ioemu/vl.h	Thu Apr 12 16:37:32 2007 -0500
    20.2 +++ b/tools/ioemu/vl.h	Fri Apr 13 11:14:26 2007 +0100
    20.3 @@ -818,8 +818,9 @@ int vnc_start_viewer(int port);
    20.4  
    20.5  /* ide.c */
    20.6  #define MAX_DISKS 4
    20.7 +#define MAX_SCSI_DISKS 7
    20.8  
    20.9 -extern BlockDriverState *bs_table[MAX_DISKS];
   20.10 +extern BlockDriverState *bs_table[MAX_DISKS + MAX_SCSI_DISKS];
   20.11  
   20.12  void isa_ide_init(int iobase, int iobase2, int irq,
   20.13                    BlockDriverState *hd0, BlockDriverState *hd1);
    21.1 --- a/tools/ioemu/xenstore.c	Thu Apr 12 16:37:32 2007 -0500
    21.2 +++ b/tools/ioemu/xenstore.c	Fri Apr 13 11:14:26 2007 +0100
    21.3 @@ -18,7 +18,7 @@
    21.4  #include <fcntl.h>
    21.5  
    21.6  static struct xs_handle *xsh = NULL;
    21.7 -static char *media_filename[MAX_DISKS];
    21.8 +static char *media_filename[MAX_DISKS + MAX_SCSI_DISKS];
    21.9  static QEMUTimer *insert_timer = NULL;
   21.10  
   21.11  #define UWAIT_MAX (30*1000000) /* thirty seconds */
   21.12 @@ -30,11 +30,11 @@ static int pasprintf(char **buf, const c
   21.13      int ret = 0;
   21.14  
   21.15      if (*buf)
   21.16 -	free(*buf);
   21.17 +        free(*buf);
   21.18      va_start(ap, fmt);
   21.19      if (vasprintf(buf, fmt, ap) == -1) {
   21.20 -	buf = NULL;
   21.21 -	ret = -1;
   21.22 +        buf = NULL;
   21.23 +        ret = -1;
   21.24      }
   21.25      va_end(ap);
   21.26      return ret;
   21.27 @@ -44,12 +44,12 @@ static void insert_media(void *opaque)
   21.28  {
   21.29      int i;
   21.30  
   21.31 -    for (i = 0; i < MAX_DISKS; i++) {
   21.32 -	if (media_filename[i] && bs_table[i]) {
   21.33 -	    do_change(bs_table[i]->device_name, media_filename[i]);
   21.34 -	    free(media_filename[i]);
   21.35 -	    media_filename[i] = NULL;
   21.36 -	}
   21.37 +    for (i = 0; i < MAX_DISKS + MAX_SCSI_DISKS; i++) {
   21.38 +        if (media_filename[i] && bs_table[i]) {
   21.39 +            do_change(bs_table[i]->device_name, media_filename[i]);
   21.40 +            free(media_filename[i]);
   21.41 +            media_filename[i] = NULL;
   21.42 +        }
   21.43      }
   21.44  }
   21.45  
   21.46 @@ -57,7 +57,7 @@ void xenstore_check_new_media_present(in
   21.47  {
   21.48  
   21.49      if (insert_timer == NULL)
   21.50 -	insert_timer = qemu_new_timer(rt_clock, insert_media, NULL);
   21.51 +        insert_timer = qemu_new_timer(rt_clock, insert_media, NULL);
   21.52      qemu_mod_timer(insert_timer, qemu_get_clock(rt_clock) + timeout);
   21.53  }
   21.54  
   21.55 @@ -82,17 +82,17 @@ void xenstore_parse_domain_config(int do
   21.56      char **e = NULL;
   21.57      char *buf = NULL, *path;
   21.58      char *fpath = NULL, *bpath = NULL,
   21.59 -         *dev = NULL, *params = NULL, *type = NULL;
   21.60 -    int i;
   21.61 +        *dev = NULL, *params = NULL, *type = NULL;
   21.62 +    int i, is_scsi;
   21.63      unsigned int len, num, hd_index;
   21.64  
   21.65 -    for(i = 0; i < MAX_DISKS; i++)
   21.66 +    for(i = 0; i < MAX_DISKS + MAX_SCSI_DISKS; i++)
   21.67          media_filename[i] = NULL;
   21.68  
   21.69      xsh = xs_daemon_open();
   21.70      if (xsh == NULL) {
   21.71 -	fprintf(logfile, "Could not contact xenstore for domain config\n");
   21.72 -	return;
   21.73 +        fprintf(logfile, "Could not contact xenstore for domain config\n");
   21.74 +        return;
   21.75      }
   21.76  
   21.77      path = xs_get_domain_path(xsh, domid);
   21.78 @@ -102,59 +102,60 @@ void xenstore_parse_domain_config(int do
   21.79      }
   21.80  
   21.81      if (pasprintf(&buf, "%s/device/vbd", path) == -1)
   21.82 -	goto out;
   21.83 +        goto out;
   21.84  
   21.85      e = xs_directory(xsh, XBT_NULL, buf, &num);
   21.86      if (e == NULL)
   21.87 -	goto out;
   21.88 +        goto out;
   21.89  
   21.90      for (i = 0; i < num; i++) {
   21.91 -	/* read the backend path */
   21.92 -	if (pasprintf(&buf, "%s/device/vbd/%s/backend", path, e[i]) == -1)
   21.93 -	    continue;
   21.94 -	free(bpath);
   21.95 +        /* read the backend path */
   21.96 +        if (pasprintf(&buf, "%s/device/vbd/%s/backend", path, e[i]) == -1)
   21.97 +            continue;
   21.98 +        free(bpath);
   21.99          bpath = xs_read(xsh, XBT_NULL, buf, &len);
  21.100 -	if (bpath == NULL)
  21.101 -	    continue;
  21.102 -	/* read the name of the device */
  21.103 -	if (pasprintf(&buf, "%s/dev", bpath) == -1)
  21.104 -	    continue;
  21.105 -	free(dev);
  21.106 -	dev = xs_read(xsh, XBT_NULL, buf, &len);
  21.107 -	if (dev == NULL)
  21.108 -	    continue;
  21.109 -	if (strncmp(dev, "hd", 2) || strlen(dev) != 3)
  21.110 -	    continue;
  21.111 -	hd_index = dev[2] - 'a';
  21.112 -	if (hd_index >= MAX_DISKS)
  21.113 -	    continue;
  21.114 -	/* read the type of the device */
  21.115 -	if (pasprintf(&buf, "%s/device/vbd/%s/device-type", path, e[i]) == -1)
  21.116 -	    continue;
  21.117 -	free(type);
  21.118 -	type = xs_read(xsh, XBT_NULL, buf, &len);
  21.119 -	if (pasprintf(&buf, "%s/params", bpath) == -1)
  21.120 -	    continue;
  21.121 -	free(params);
  21.122 -	params = xs_read(xsh, XBT_NULL, buf, &len);
  21.123 -	if (params == NULL)
  21.124 -	    continue;
  21.125 +        if (bpath == NULL)
  21.126 +            continue;
  21.127 +        /* read the name of the device */
  21.128 +        if (pasprintf(&buf, "%s/dev", bpath) == -1)
  21.129 +            continue;
  21.130 +        free(dev);
  21.131 +        dev = xs_read(xsh, XBT_NULL, buf, &len);
  21.132 +        if (dev == NULL)
  21.133 +            continue;
  21.134 +        is_scsi = !strncmp(dev, "sd", 2);
  21.135 +        if ((strncmp(dev, "hd", 2) && !is_scsi) || strlen(dev) != 3 )
  21.136 +            continue;
  21.137 +        hd_index = dev[2] - 'a';
  21.138 +        if (hd_index >= (is_scsi ? MAX_SCSI_DISKS : MAX_DISKS))
  21.139 +            continue;
  21.140 +        /* read the type of the device */
  21.141 +        if (pasprintf(&buf, "%s/device/vbd/%s/device-type", path, e[i]) == -1)
  21.142 +            continue;
  21.143 +        free(type);
  21.144 +        type = xs_read(xsh, XBT_NULL, buf, &len);
  21.145 +        if (pasprintf(&buf, "%s/params", bpath) == -1)
  21.146 +            continue;
  21.147 +        free(params);
  21.148 +        params = xs_read(xsh, XBT_NULL, buf, &len);
  21.149 +        if (params == NULL)
  21.150 +            continue;
  21.151          /* 
  21.152           * check if device has a phantom vbd; the phantom is hooked
  21.153           * to the frontend device (for ease of cleanup), so lookup 
  21.154           * the frontend device, and see if there is a phantom_vbd
  21.155           * if there is, we will use resolution as the filename
  21.156           */
  21.157 -	if (pasprintf(&buf, "%s/device/vbd/%s/phantom_vbd", path, e[i]) == -1)
  21.158 -	    continue;
  21.159 -	free(fpath);
  21.160 +        if (pasprintf(&buf, "%s/device/vbd/%s/phantom_vbd", path, e[i]) == -1)
  21.161 +            continue;
  21.162 +        free(fpath);
  21.163          fpath = xs_read(xsh, XBT_NULL, buf, &len);
  21.164 -	if (fpath) {
  21.165 -	    if (pasprintf(&buf, "%s/dev", fpath) == -1)
  21.166 -	        continue;
  21.167 -	    free(params);
  21.168 +        if (fpath) {
  21.169 +            if (pasprintf(&buf, "%s/dev", fpath) == -1)
  21.170 +                continue;
  21.171 +            free(params);
  21.172              params = xs_read(xsh, XBT_NULL, buf , &len);
  21.173 -	    if (params) {
  21.174 +            if (params) {
  21.175                  /* 
  21.176                   * wait for device, on timeout silently fail because we will 
  21.177                   * fail to open below
  21.178 @@ -163,19 +164,20 @@ void xenstore_parse_domain_config(int do
  21.179              }
  21.180          }
  21.181  
  21.182 -	bs_table[hd_index] = bdrv_new(dev);
  21.183 -	/* check if it is a cdrom */
  21.184 -	if (type && !strcmp(type, "cdrom")) {
  21.185 -	    bdrv_set_type_hint(bs_table[hd_index], BDRV_TYPE_CDROM);
  21.186 -	    if (pasprintf(&buf, "%s/params", bpath) != -1)
  21.187 -		xs_watch(xsh, buf, dev);
  21.188 -	}
  21.189 -	/* open device now if media present */
  21.190 -	if (params[0]) {
  21.191 -            if (bdrv_open(bs_table[hd_index], params, 0 /* snapshot */) < 0)
  21.192 +        bs_table[hd_index + (is_scsi ? MAX_DISKS : 0)] = bdrv_new(dev);
  21.193 +        /* check if it is a cdrom */
  21.194 +        if (type && !strcmp(type, "cdrom")) {
  21.195 +            bdrv_set_type_hint(bs_table[hd_index], BDRV_TYPE_CDROM);
  21.196 +            if (pasprintf(&buf, "%s/params", bpath) != -1)
  21.197 +                xs_watch(xsh, buf, dev);
  21.198 +        }
  21.199 +        /* open device now if media present */
  21.200 +        if (params[0]) {
  21.201 +            if (bdrv_open(bs_table[hd_index + (is_scsi ? MAX_DISKS : 0)],
  21.202 +                          params, 0 /* snapshot */) < 0)
  21.203                  fprintf(stderr, "qemu: could not open hard disk image '%s'\n",
  21.204                          params);
  21.205 -	}
  21.206 +        }
  21.207      }
  21.208  
  21.209      /* Set a watch for log-dirty requests from the migration tools */
  21.210 @@ -199,7 +201,7 @@ void xenstore_parse_domain_config(int do
  21.211  int xenstore_fd(void)
  21.212  {
  21.213      if (xsh)
  21.214 -	return xs_fileno(xsh);
  21.215 +        return xs_fileno(xsh);
  21.216      return -1;
  21.217  }
  21.218  
  21.219 @@ -316,7 +318,7 @@ void xenstore_process_event(void *opaque
  21.220  
  21.221      vec = xs_read_watch(xsh, &num);
  21.222      if (!vec)
  21.223 -	return;
  21.224 +        return;
  21.225  
  21.226      if (!strcmp(vec[XS_WATCH_TOKEN], "logdirty")) {
  21.227          xenstore_process_logdirty_event();
  21.228 @@ -324,23 +326,23 @@ void xenstore_process_event(void *opaque
  21.229      }
  21.230  
  21.231      if (strncmp(vec[XS_WATCH_TOKEN], "hd", 2) ||
  21.232 -	strlen(vec[XS_WATCH_TOKEN]) != 3)
  21.233 -	goto out;
  21.234 +        strlen(vec[XS_WATCH_TOKEN]) != 3)
  21.235 +        goto out;
  21.236      hd_index = vec[XS_WATCH_TOKEN][2] - 'a';
  21.237      image = xs_read(xsh, XBT_NULL, vec[XS_WATCH_PATH], &len);
  21.238      if (image == NULL || !strcmp(image, bs_table[hd_index]->filename))
  21.239 -	goto out;		/* gone or identical */
  21.240 +        goto out;  /* gone or identical */
  21.241  
  21.242      do_eject(0, vec[XS_WATCH_TOKEN]);
  21.243      bs_table[hd_index]->filename[0] = 0;
  21.244      if (media_filename[hd_index]) {
  21.245 -	free(media_filename[hd_index]);
  21.246 -	media_filename[hd_index] = NULL;
  21.247 +        free(media_filename[hd_index]);
  21.248 +        media_filename[hd_index] = NULL;
  21.249      }
  21.250  
  21.251      if (image[0]) {
  21.252 -	media_filename[hd_index] = strdup(image);
  21.253 -	xenstore_check_new_media_present(5000);
  21.254 +        media_filename[hd_index] = strdup(image);
  21.255 +        xenstore_check_new_media_present(5000);
  21.256      }
  21.257  
  21.258   out:
  21.259 @@ -354,7 +356,7 @@ void xenstore_write_vncport(int display)
  21.260      char *portstr = NULL;
  21.261  
  21.262      if (xsh == NULL)
  21.263 -	return;
  21.264 +        return;
  21.265  
  21.266      path = xs_get_domain_path(xsh, domid);
  21.267      if (path == NULL) {
  21.268 @@ -363,10 +365,10 @@ void xenstore_write_vncport(int display)
  21.269      }
  21.270  
  21.271      if (pasprintf(&buf, "%s/console/vnc-port", path) == -1)
  21.272 -	goto out;
  21.273 +        goto out;
  21.274  
  21.275      if (pasprintf(&portstr, "%d", 5900 + display) == -1)
  21.276 -	goto out;
  21.277 +        goto out;
  21.278  
  21.279      if (xs_write(xsh, XBT_NULL, buf, portstr, strlen(portstr)) == 0)
  21.280          fprintf(logfile, "xs_write() vncport failed\n");
  21.281 @@ -383,41 +385,41 @@ int xenstore_read_vncpasswd(int domid)
  21.282      unsigned int i, len, rc = 0;
  21.283  
  21.284      if (xsh == NULL) {
  21.285 -	return -1;
  21.286 +        return -1;
  21.287      }
  21.288  
  21.289      path = xs_get_domain_path(xsh, domid);
  21.290      if (path == NULL) {
  21.291 -	fprintf(logfile, "xs_get_domain_path() error. domid %d.\n", domid);
  21.292 -	return -1;
  21.293 +        fprintf(logfile, "xs_get_domain_path() error. domid %d.\n", domid);
  21.294 +        return -1;
  21.295      }
  21.296  
  21.297      pasprintf(&buf, "%s/vm", path);
  21.298      uuid = xs_read(xsh, XBT_NULL, buf, &len);
  21.299      if (uuid == NULL) {
  21.300 -	fprintf(logfile, "xs_read(): uuid get error. %s.\n", buf);
  21.301 -	free(path);
  21.302 -	return -1;
  21.303 +        fprintf(logfile, "xs_read(): uuid get error. %s.\n", buf);
  21.304 +        free(path);
  21.305 +        return -1;
  21.306      }
  21.307  
  21.308      pasprintf(&buf, "%s/vncpasswd", uuid);
  21.309      passwd = xs_read(xsh, XBT_NULL, buf, &len);
  21.310      if (passwd == NULL) {
  21.311 -	fprintf(logfile, "xs_read(): vncpasswd get error. %s.\n", buf);
  21.312 -	free(uuid);
  21.313 -	free(path);
  21.314 -	return rc;
  21.315 +        fprintf(logfile, "xs_read(): vncpasswd get error. %s.\n", buf);
  21.316 +        free(uuid);
  21.317 +        free(path);
  21.318 +        return rc;
  21.319      }
  21.320  
  21.321      for (i=0; i<len && i<63; i++) {
  21.322 -	vncpasswd[i] = passwd[i];
  21.323 -	passwd[i] = '\0';
  21.324 +        vncpasswd[i] = passwd[i];
  21.325 +        passwd[i] = '\0';
  21.326      }
  21.327      vncpasswd[len] = '\0';
  21.328      pasprintf(&buf, "%s/vncpasswd", uuid);
  21.329      if (xs_write(xsh, XBT_NULL, buf, passwd, len) == 0) {
  21.330 -	fprintf(logfile, "xs_write() vncpasswd failed.\n");
  21.331 -	rc = -1;
  21.332 +        fprintf(logfile, "xs_write() vncpasswd failed.\n");
  21.333 +        rc = -1;
  21.334      }
  21.335  
  21.336      free(passwd);
  21.337 @@ -443,7 +445,7 @@ char **xenstore_domain_get_devices(struc
  21.338          goto out;
  21.339  
  21.340      if (pasprintf(&buf, "%s/device/%s", path,devtype) == -1)
  21.341 -	goto out;
  21.342 +        goto out;
  21.343  
  21.344      e = xs_directory(handle, XBT_NULL, buf, num);
  21.345  
  21.346 @@ -496,13 +498,13 @@ char *xenstore_backend_read_variable(str
  21.347  
  21.348      buf = get_device_variable_path(devtype, inst, var);
  21.349      if (NULL == buf)
  21.350 -	goto out;
  21.351 +        goto out;
  21.352  
  21.353      value = xs_read(handle, XBT_NULL, buf, &len);
  21.354  
  21.355      free(buf);
  21.356  
  21.357 -out:
  21.358 + out:
  21.359      return value;
  21.360  }
  21.361  
  21.362 @@ -569,27 +571,27 @@ char *xenstore_vm_read(int domid, char *
  21.363      char *buf = NULL, *path = NULL, *value = NULL;
  21.364  
  21.365      if (xsh == NULL)
  21.366 -	goto out;
  21.367 +        goto out;
  21.368  
  21.369      path = xs_get_domain_path(xsh, domid);
  21.370      if (path == NULL) {
  21.371 -	fprintf(logfile, "xs_get_domain_path(%d): error\n", domid);
  21.372 -	goto out;
  21.373 +        fprintf(logfile, "xs_get_domain_path(%d): error\n", domid);
  21.374 +        goto out;
  21.375      }
  21.376  
  21.377      pasprintf(&buf, "%s/vm", path);
  21.378      free(path);
  21.379      path = xs_read(xsh, XBT_NULL, buf, NULL);
  21.380      if (path == NULL) {
  21.381 -	fprintf(logfile, "xs_read(%s): read error\n", buf);
  21.382 -	goto out;
  21.383 +        fprintf(logfile, "xs_read(%s): read error\n", buf);
  21.384 +        goto out;
  21.385      }
  21.386  
  21.387      pasprintf(&buf, "%s/%s", path, key);
  21.388      value = xs_read(xsh, XBT_NULL, buf, len);
  21.389      if (value == NULL) {
  21.390 -	fprintf(logfile, "xs_read(%s): read error\n", buf);
  21.391 -	goto out;
  21.392 +        fprintf(logfile, "xs_read(%s): read error\n", buf);
  21.393 +        goto out;
  21.394      }
  21.395  
  21.396   out:
  21.397 @@ -604,27 +606,27 @@ int xenstore_vm_write(int domid, char *k
  21.398      int rc = -1;
  21.399  
  21.400      if (xsh == NULL)
  21.401 -	goto out;
  21.402 +        goto out;
  21.403  
  21.404      path = xs_get_domain_path(xsh, domid);
  21.405      if (path == NULL) {
  21.406 -	fprintf(logfile, "xs_get_domain_path: error\n");
  21.407 -	goto out;
  21.408 +        fprintf(logfile, "xs_get_domain_path: error\n");
  21.409 +        goto out;
  21.410      }
  21.411  
  21.412      pasprintf(&buf, "%s/vm", path);
  21.413      free(path);
  21.414      path = xs_read(xsh, XBT_NULL, buf, NULL);
  21.415      if (path == NULL) {
  21.416 -	fprintf(logfile, "xs_read(%s): read error\n", buf);
  21.417 -	goto out;
  21.418 +        fprintf(logfile, "xs_read(%s): read error\n", buf);
  21.419 +        goto out;
  21.420      }
  21.421  
  21.422      pasprintf(&buf, "%s/%s", path, key);
  21.423      rc = xs_write(xsh, XBT_NULL, buf, value, strlen(value));
  21.424      if (rc) {
  21.425 -	fprintf(logfile, "xs_write(%s, %s): write error\n", buf, key);
  21.426 -	goto out;
  21.427 +        fprintf(logfile, "xs_write(%s, %s): write error\n", buf, key);
  21.428 +        goto out;
  21.429      }
  21.430  
  21.431   out:
    22.1 --- a/tools/libfsimage/fat/fat.h	Thu Apr 12 16:37:32 2007 -0500
    22.2 +++ b/tools/libfsimage/fat/fat.h	Fri Apr 13 11:14:26 2007 +0100
    22.3 @@ -84,17 +84,17 @@ struct fat_bpb {
    22.4  #define FAT_DIRENTRY_LENGTH       32
    22.5  
    22.6  #define FAT_DIRENTRY_ATTRIB(entry) \
    22.7 -  (*((unsigned char *) (entry+11)))
    22.8 +  (*((__u8 *) (entry+11)))
    22.9  #define FAT_DIRENTRY_VALID(entry) \
   22.10 -  ( ((*((unsigned char *) entry)) != 0) \
   22.11 -    && ((*((unsigned char *) entry)) != 0xE5) \
   22.12 +  ( ((*((__u8 *) entry)) != 0) \
   22.13 +    && ((*((__u8 *) entry)) != 0xE5) \
   22.14      && !(FAT_DIRENTRY_ATTRIB(entry) & FAT_ATTRIB_NOT_OK_MASK) )
   22.15  #define FAT_DIRENTRY_FIRST_CLUSTER(entry) \
   22.16 -  ((*((unsigned short *) (entry+26)))+(*((unsigned short *) (entry+20)) << 16))
   22.17 +  ((*((__u16 *) (entry+26)))+(*((__u16 *) (entry+20)) << 16))
   22.18  #define FAT_DIRENTRY_FILELENGTH(entry) \
   22.19 -  (*((unsigned long *) (entry+28)))
   22.20 +  (*((__u32 *) (entry+28)))
   22.21  
   22.22  #define FAT_LONGDIR_ID(entry) \
   22.23 -  (*((unsigned char *) (entry)))
   22.24 +  (*((__u8 *) (entry)))
   22.25  #define FAT_LONGDIR_ALIASCHECKSUM(entry) \
   22.26 -  (*((unsigned char *) (entry+13)))
   22.27 +  (*((__u8 *) (entry+13)))
    23.1 --- a/tools/libxc/Makefile	Thu Apr 12 16:37:32 2007 -0500
    23.2 +++ b/tools/libxc/Makefile	Fri Apr 13 11:14:26 2007 +0100
    23.3 @@ -26,8 +26,8 @@ CTRL_SRCS-$(CONFIG_X86_Linux) += xc_ptra
    23.4  
    23.5  GUEST_SRCS-y :=
    23.6  GUEST_SRCS-y += xg_private.c
    23.7 -GUEST_SRCS-$(CONFIG_MIGRATE) += xc_domain_restore.c xc_linux_save.c
    23.8 -GUEST_SRCS-$(CONFIG_HVM) += xc_hvm_build.c xc_hvm_save.c
    23.9 +GUEST_SRCS-$(CONFIG_MIGRATE) += xc_domain_restore.c xc_domain_save.c
   23.10 +GUEST_SRCS-$(CONFIG_HVM) += xc_hvm_build.c
   23.11  
   23.12  # symlink libelf from xen/common/libelf/
   23.13  LIBELF_SRCS := libelf-tools.c libelf-loader.c
    24.1 --- a/tools/libxc/ia64/xc_ia64_linux_save.c	Thu Apr 12 16:37:32 2007 -0500
    24.2 +++ b/tools/libxc/ia64/xc_ia64_linux_save.c	Fri Apr 13 11:14:26 2007 +0100
    24.3 @@ -134,8 +134,10 @@ retry:
    24.4  }
    24.5  
    24.6  int
    24.7 -xc_linux_save(int xc_handle, int io_fd, uint32_t dom, uint32_t max_iters,
    24.8 -              uint32_t max_factor, uint32_t flags, int (*suspend)(int))
    24.9 +xc_domain_save(int xc_handle, int io_fd, uint32_t dom, uint32_t max_iters,
   24.10 +               uint32_t max_factor, uint32_t flags, int (*suspend)(int),
   24.11 +               int hvm, void *(*init_qemu_maps)(int, unsigned),
   24.12 +               void (*qemu_flip_buffer)(int, int))
   24.13  {
   24.14      DECLARE_DOMCTL;
   24.15      xc_dominfo_t info;
    25.1 --- a/tools/libxc/xc_domain.c	Thu Apr 12 16:37:32 2007 -0500
    25.2 +++ b/tools/libxc/xc_domain.c	Fri Apr 13 11:14:26 2007 +0100
    25.3 @@ -8,6 +8,7 @@
    25.4  
    25.5  #include "xc_private.h"
    25.6  #include <xen/memory.h>
    25.7 +#include <xen/hvm/hvm_op.h>
    25.8  
    25.9  int xc_domain_create(int xc_handle,
   25.10                       uint32_t ssidref,
   25.11 @@ -657,6 +658,44 @@ int xc_domain_send_trigger(int xc_handle
   25.12      return do_domctl(xc_handle, &domctl);
   25.13  }
   25.14  
   25.15 +int xc_set_hvm_param(int handle, domid_t dom, int param, unsigned long value)
   25.16 +{
   25.17 +    DECLARE_HYPERCALL;
   25.18 +    xen_hvm_param_t arg;
   25.19 +    int rc;
   25.20 +
   25.21 +    hypercall.op     = __HYPERVISOR_hvm_op;
   25.22 +    hypercall.arg[0] = HVMOP_set_param;
   25.23 +    hypercall.arg[1] = (unsigned long)&arg;
   25.24 +    arg.domid = dom;
   25.25 +    arg.index = param;
   25.26 +    arg.value = value;
   25.27 +    if ( lock_pages(&arg, sizeof(arg)) != 0 )
   25.28 +        return -1;
   25.29 +    rc = do_xen_hypercall(handle, &hypercall);
   25.30 +    unlock_pages(&arg, sizeof(arg));
   25.31 +    return rc;
   25.32 +}
   25.33 +
   25.34 +int xc_get_hvm_param(int handle, domid_t dom, int param, unsigned long *value)
   25.35 +{
   25.36 +    DECLARE_HYPERCALL;
   25.37 +    xen_hvm_param_t arg;
   25.38 +    int rc;
   25.39 +
   25.40 +    hypercall.op     = __HYPERVISOR_hvm_op;
   25.41 +    hypercall.arg[0] = HVMOP_get_param;
   25.42 +    hypercall.arg[1] = (unsigned long)&arg;
   25.43 +    arg.domid = dom;
   25.44 +    arg.index = param;
   25.45 +    if ( lock_pages(&arg, sizeof(arg)) != 0 )
   25.46 +        return -1;
   25.47 +    rc = do_xen_hypercall(handle, &hypercall);
   25.48 +    unlock_pages(&arg, sizeof(arg));
   25.49 +    *value = arg.value;
   25.50 +    return rc;
   25.51 +}
   25.52 +
   25.53  /*
   25.54   * Local variables:
   25.55   * mode: C
    26.1 --- a/tools/libxc/xc_domain_restore.c	Thu Apr 12 16:37:32 2007 -0500
    26.2 +++ b/tools/libxc/xc_domain_restore.c	Fri Apr 13 11:14:26 2007 +0100
    26.3 @@ -688,33 +688,22 @@ int xc_domain_restore(int xc_handle, int
    26.4              ERROR("error zeroing magic pages");
    26.5              goto out;
    26.6          }
    26.7 -        
    26.8 -        xc_set_hvm_param(xc_handle, dom, HVM_PARAM_IOREQ_PFN, magic_pfns[0]);
    26.9 -        xc_set_hvm_param(xc_handle, dom, HVM_PARAM_BUFIOREQ_PFN, magic_pfns[1]);
   26.10 -        xc_set_hvm_param(xc_handle, dom, HVM_PARAM_STORE_PFN, magic_pfns[2]);
   26.11 -        xc_set_hvm_param(xc_handle, dom, HVM_PARAM_PAE_ENABLED, pae);
   26.12 -        xc_set_hvm_param(xc_handle, dom, HVM_PARAM_STORE_EVTCHN, store_evtchn);
   26.13 -        *store_mfn = magic_pfns[2];
   26.14 -
   26.15 -        /* Read vcpu contexts */
   26.16 -        for ( i = 0; i <= max_vcpu_id; i++ )
   26.17 +                
   26.18 +        if ( (rc = xc_set_hvm_param(xc_handle, dom, 
   26.19 +                                    HVM_PARAM_IOREQ_PFN, magic_pfns[0]))
   26.20 +             || (rc = xc_set_hvm_param(xc_handle, dom, 
   26.21 +                                       HVM_PARAM_BUFIOREQ_PFN, magic_pfns[1]))
   26.22 +             || (rc = xc_set_hvm_param(xc_handle, dom, 
   26.23 +                                       HVM_PARAM_STORE_PFN, magic_pfns[2]))
   26.24 +             || (rc = xc_set_hvm_param(xc_handle, dom, 
   26.25 +                                       HVM_PARAM_PAE_ENABLED, pae))
   26.26 +             || (rc = xc_set_hvm_param(xc_handle, dom, 
   26.27 +                                       HVM_PARAM_STORE_EVTCHN, store_evtchn)) )
   26.28          {
   26.29 -            if ( !(vcpumap & (1ULL << i)) )
   26.30 -                continue;
   26.31 -
   26.32 -            if ( !read_exact(io_fd, &(ctxt), sizeof(ctxt)) )
   26.33 -            {
   26.34 -                ERROR("error read vcpu context.\n");
   26.35 -                goto out;
   26.36 -            }
   26.37 -            
   26.38 -            if ( (rc = xc_vcpu_setcontext(xc_handle, dom, i, &ctxt)) )
   26.39 -            {
   26.40 -                ERROR("Could not set vcpu context, rc=%d", rc);
   26.41 -                goto out;
   26.42 -            }
   26.43 -            rc = 1;
   26.44 +            ERROR("error setting HVM params: %i", rc);
   26.45 +            goto out;
   26.46          }
   26.47 +        *store_mfn = magic_pfns[2];
   26.48  
   26.49          /* Read HVM context */
   26.50          if ( !read_exact(io_fd, &rec_len, sizeof(uint32_t)) )
    27.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
    27.2 +++ b/tools/libxc/xc_domain_save.c	Fri Apr 13 11:14:26 2007 +0100
    27.3 @@ -0,0 +1,1587 @@
    27.4 +/******************************************************************************
    27.5 + * xc_linux_save.c
    27.6 + *
    27.7 + * Save the state of a running Linux session.
    27.8 + *
    27.9 + * Copyright (c) 2003, K A Fraser.
   27.10 + */
   27.11 +
   27.12 +#include <inttypes.h>
   27.13 +#include <time.h>
   27.14 +#include <stdlib.h>
   27.15 +#include <unistd.h>
   27.16 +#include <sys/time.h>
   27.17 +
   27.18 +#include "xc_private.h"
   27.19 +#include "xc_dom.h"
   27.20 +#include "xg_private.h"
   27.21 +#include "xg_save_restore.h"
   27.22 +
   27.23 +#include <xen/hvm/params.h>
   27.24 +#include <xen/hvm/e820.h>
   27.25 +
   27.26 +/*
   27.27 +** Default values for important tuning parameters. Can override by passing
   27.28 +** non-zero replacement values to xc_domain_save().
   27.29 +**
   27.30 +** XXX SMH: should consider if want to be able to override MAX_MBIT_RATE too.
   27.31 +**
   27.32 +*/
   27.33 +#define DEF_MAX_ITERS   29   /* limit us to 30 times round loop   */
   27.34 +#define DEF_MAX_FACTOR   3   /* never send more than 3x p2m_size  */
   27.35 +
   27.36 +/* max mfn of the whole machine */
   27.37 +static unsigned long max_mfn;
   27.38 +
   27.39 +/* virtual starting address of the hypervisor */
   27.40 +static unsigned long hvirt_start;
   27.41 +
   27.42 +/* #levels of page tables used by the current guest */
   27.43 +static unsigned int pt_levels;
   27.44 +
   27.45 +/* HVM: shared-memory bitmaps for getting log-dirty bits from qemu-dm */
   27.46 +static unsigned long *qemu_bitmaps[2];
   27.47 +static int qemu_active;
   27.48 +static int qemu_non_active;
   27.49 +
   27.50 +/* number of pfns this guest has (i.e. number of entries in the P2M) */
   27.51 +static unsigned long p2m_size;
   27.52 +
   27.53 +/* Live mapping of the table mapping each PFN to its current MFN. */
   27.54 +static xen_pfn_t *live_p2m = NULL;
   27.55 +
   27.56 +/* Live mapping of system MFN to PFN table. */
   27.57 +static xen_pfn_t *live_m2p = NULL;
   27.58 +static unsigned long m2p_mfn0;
   27.59 +
   27.60 +/* grep fodder: machine_to_phys */
   27.61 +
   27.62 +#define mfn_to_pfn(_mfn) live_m2p[(_mfn)]
   27.63 +
   27.64 +/*
   27.65 + * Returns TRUE if the given machine frame number has a unique mapping
   27.66 + * in the guest's pseudophysical map.
   27.67 + */
   27.68 +#define MFN_IS_IN_PSEUDOPHYS_MAP(_mfn)          \
   27.69 +    (((_mfn) < (max_mfn)) &&                    \
   27.70 +     ((mfn_to_pfn(_mfn) < (p2m_size)) &&        \
   27.71 +      (live_p2m[mfn_to_pfn(_mfn)] == (_mfn))))
   27.72 +
   27.73 +/* Returns TRUE if MFN is successfully converted to a PFN. */
   27.74 +#define translate_mfn_to_pfn(_pmfn)                             \
   27.75 +({                                                              \
   27.76 +    unsigned long mfn = *(_pmfn);                               \
   27.77 +    int _res = 1;                                               \
   27.78 +    if ( !MFN_IS_IN_PSEUDOPHYS_MAP(mfn) )                       \
   27.79 +        _res = 0;                                               \
   27.80 +    else                                                        \
   27.81 +        *(_pmfn) = mfn_to_pfn(mfn);                             \
   27.82 +    _res;                                                       \
   27.83 +})
   27.84 +
   27.85 +/*
   27.86 +** During (live) save/migrate, we maintain a number of bitmaps to track
   27.87 +** which pages we have to send, to fixup, and to skip.
   27.88 +*/
   27.89 +
   27.90 +#define BITS_PER_LONG (sizeof(unsigned long) * 8)
   27.91 +#define BITS_TO_LONGS(bits) (((bits)+BITS_PER_LONG-1)/BITS_PER_LONG)
   27.92 +#define BITMAP_SIZE   (BITS_TO_LONGS(p2m_size) * sizeof(unsigned long))
   27.93 +
   27.94 +#define BITMAP_ENTRY(_nr,_bmap) \
   27.95 +   ((volatile unsigned long *)(_bmap))[(_nr)/BITS_PER_LONG]
   27.96 +
   27.97 +#define BITMAP_SHIFT(_nr) ((_nr) % BITS_PER_LONG)
   27.98 +
   27.99 +static inline int test_bit (int nr, volatile void * addr)
  27.100 +{
  27.101 +    return (BITMAP_ENTRY(nr, addr) >> BITMAP_SHIFT(nr)) & 1;
  27.102 +}
  27.103 +
  27.104 +static inline void clear_bit (int nr, volatile void * addr)
  27.105 +{
  27.106 +    BITMAP_ENTRY(nr, addr) &= ~(1UL << BITMAP_SHIFT(nr));
  27.107 +}
  27.108 +
  27.109 +static inline void set_bit ( int nr, volatile void * addr)
  27.110 +{
  27.111 +    BITMAP_ENTRY(nr, addr) |= (1UL << BITMAP_SHIFT(nr));
  27.112 +}
  27.113 +
  27.114 +/* Returns the hamming weight (i.e. the number of bits set) in a N-bit word */
  27.115 +static inline unsigned int hweight32(unsigned int w)
  27.116 +{
  27.117 +    unsigned int res = (w & 0x55555555) + ((w >> 1) & 0x55555555);
  27.118 +    res = (res & 0x33333333) + ((res >> 2) & 0x33333333);
  27.119 +    res = (res & 0x0F0F0F0F) + ((res >> 4) & 0x0F0F0F0F);
  27.120 +    res = (res & 0x00FF00FF) + ((res >> 8) & 0x00FF00FF);
  27.121 +    return (res & 0x0000FFFF) + ((res >> 16) & 0x0000FFFF);
  27.122 +}
  27.123 +
  27.124 +static inline int count_bits ( int nr, volatile void *addr)
  27.125 +{
  27.126 +    int i, count = 0;
  27.127 +    volatile unsigned long *p = (volatile unsigned long *)addr;
  27.128 +    /* We know that the array is padded to unsigned long. */
  27.129 +    for ( i = 0; i < (nr / (sizeof(unsigned long)*8)); i++, p++ )
  27.130 +        count += hweight32(*p);
  27.131 +    return count;
  27.132 +}
  27.133 +
  27.134 +static inline int permute( int i, int nr, int order_nr  )
  27.135 +{
  27.136 +    /* Need a simple permutation function so that we scan pages in a
  27.137 +       pseudo random order, enabling us to get a better estimate of
  27.138 +       the domain's page dirtying rate as we go (there are often
  27.139 +       contiguous ranges of pfns that have similar behaviour, and we
  27.140 +       want to mix them up. */
  27.141 +
  27.142 +    /* e.g. nr->oder 15->4 16->4 17->5 */
  27.143 +    /* 512MB domain, 128k pages, order 17 */
  27.144 +
  27.145 +    /*
  27.146 +      QPONMLKJIHGFEDCBA
  27.147 +             QPONMLKJIH
  27.148 +      GFEDCBA
  27.149 +     */
  27.150 +
  27.151 +    /*
  27.152 +      QPONMLKJIHGFEDCBA
  27.153 +                  EDCBA
  27.154 +             QPONM
  27.155 +      LKJIHGF
  27.156 +      */
  27.157 +
  27.158 +    do { i = ((i>>(order_nr-10)) | ( i<<10 ) ) & ((1<<order_nr)-1); }
  27.159 +    while ( i >= nr ); /* this won't ever loop if nr is a power of 2 */
  27.160 +
  27.161 +    return i;
  27.162 +}
  27.163 +
  27.164 +static uint64_t tv_to_us(struct timeval *new)
  27.165 +{
  27.166 +    return (new->tv_sec * 1000000) + new->tv_usec;
  27.167 +}
  27.168 +
  27.169 +static uint64_t llgettimeofday(void)
  27.170 +{
  27.171 +    struct timeval now;
  27.172 +    gettimeofday(&now, NULL);
  27.173 +    return tv_to_us(&now);
  27.174 +}
  27.175 +
  27.176 +static uint64_t tv_delta(struct timeval *new, struct timeval *old)
  27.177 +{
  27.178 +    return (((new->tv_sec - old->tv_sec)*1000000) +
  27.179 +            (new->tv_usec - old->tv_usec));
  27.180 +}
  27.181 +
  27.182 +static int noncached_write(int fd, int live, void *buffer, int len) 
  27.183 +{
  27.184 +    static int write_count = 0;
  27.185 +
  27.186 +    int rc = write(fd,buffer,len);
  27.187 +
  27.188 +    write_count += len;
  27.189 +    if ( write_count >= (MAX_PAGECACHE_USAGE * PAGE_SIZE) )
  27.190 +    {
  27.191 +        /* Time to discard cache - dont care if this fails */
  27.192 +        discard_file_cache(fd, 0 /* no flush */);
  27.193 +        write_count = 0;
  27.194 +    }
  27.195 +
  27.196 +    return rc;
  27.197 +}
  27.198 +
  27.199 +#ifdef ADAPTIVE_SAVE
  27.200 +
  27.201 +/*
  27.202 +** We control the rate at which we transmit (or save) to minimize impact
  27.203 +** on running domains (including the target if we're doing live migrate).
  27.204 +*/
  27.205 +
  27.206 +#define MAX_MBIT_RATE    500      /* maximum transmit rate for migrate */
  27.207 +#define START_MBIT_RATE  100      /* initial transmit rate for migrate */
  27.208 +
  27.209 +/* Scaling factor to convert between a rate (in Mb/s) and time (in usecs) */
  27.210 +#define RATE_TO_BTU      781250
  27.211 +
  27.212 +/* Amount in bytes we allow ourselves to send in a burst */
  27.213 +#define BURST_BUDGET (100*1024)
  27.214 +
  27.215 +/* We keep track of the current and previous transmission rate */
  27.216 +static int mbit_rate, ombit_rate = 0;
  27.217 +
  27.218 +/* Have we reached the maximum transmission rate? */
  27.219 +#define RATE_IS_MAX() (mbit_rate == MAX_MBIT_RATE)
  27.220 +
  27.221 +static inline void initialize_mbit_rate()
  27.222 +{
  27.223 +    mbit_rate = START_MBIT_RATE;
  27.224 +}
  27.225 +
  27.226 +static int ratewrite(int io_fd, int live, void *buf, int n)
  27.227 +{
  27.228 +    static int budget = 0;
  27.229 +    static int burst_time_us = -1;
  27.230 +    static struct timeval last_put = { 0 };
  27.231 +    struct timeval now;
  27.232 +    struct timespec delay;
  27.233 +    long long delta;
  27.234 +
  27.235 +    if ( START_MBIT_RATE == 0 )
  27.236 +        return noncached_write(io_fd, live, buf, n);
  27.237 +
  27.238 +    budget -= n;
  27.239 +    if ( budget < 0 )
  27.240 +    {
  27.241 +        if ( mbit_rate != ombit_rate )
  27.242 +        {
  27.243 +            burst_time_us = RATE_TO_BTU / mbit_rate;
  27.244 +            ombit_rate = mbit_rate;
  27.245 +            DPRINTF("rate limit: %d mbit/s burst budget %d slot time %d\n",
  27.246 +                    mbit_rate, BURST_BUDGET, burst_time_us);
  27.247 +        }
  27.248 +        if ( last_put.tv_sec == 0 )
  27.249 +        {
  27.250 +            budget += BURST_BUDGET;
  27.251 +            gettimeofday(&last_put, NULL);
  27.252 +        }
  27.253 +        else
  27.254 +        {
  27.255 +            while ( budget < 0 )
  27.256 +            {
  27.257 +                gettimeofday(&now, NULL);
  27.258 +                delta = tv_delta(&now, &last_put);
  27.259 +                while ( delta > burst_time_us )
  27.260 +                {
  27.261 +                    budget += BURST_BUDGET;
  27.262 +                    last_put.tv_usec += burst_time_us;
  27.263 +                    if ( last_put.tv_usec > 1000000 
  27.264 +                    {
  27.265 +                        last_put.tv_usec -= 1000000;
  27.266 +                        last_put.tv_sec++;
  27.267 +                    }
  27.268 +                    delta -= burst_time_us;
  27.269 +                }
  27.270 +                if ( budget > 0 )
  27.271 +                    break;
  27.272 +                delay.tv_sec = 0;
  27.273 +                delay.tv_nsec = 1000 * (burst_time_us - delta);
  27.274 +                while ( delay.tv_nsec > 0 )
  27.275 +                    if ( nanosleep(&delay, &delay) == 0 )
  27.276 +                        break;
  27.277 +            }
  27.278 +        }
  27.279 +    }
  27.280 +    return noncached_write(io_fd, live, buf, n);
  27.281 +}
  27.282 +
  27.283 +#else /* ! ADAPTIVE SAVE */
  27.284 +
  27.285 +#define RATE_IS_MAX() (0)
  27.286 +#define ratewrite(_io_fd, _live, _buf, _n) noncached_write((_io_fd), (_live), (_buf), (_n))
  27.287 +#define initialize_mbit_rate()
  27.288 +
  27.289 +#endif
  27.290 +
  27.291 +static inline ssize_t write_exact(int fd, void *buf, size_t count)
  27.292 +{
  27.293 +    return (write(fd, buf, count) == count);
  27.294 +}
  27.295 +
  27.296 +static int print_stats(int xc_handle, uint32_t domid, int pages_sent,
  27.297 +                       xc_shadow_op_stats_t *stats, int print)
  27.298 +{
  27.299 +    static struct timeval wall_last;
  27.300 +    static long long      d0_cpu_last;
  27.301 +    static long long      d1_cpu_last;
  27.302 +
  27.303 +    struct timeval        wall_now;
  27.304 +    long long             wall_delta;
  27.305 +    long long             d0_cpu_now, d0_cpu_delta;
  27.306 +    long long             d1_cpu_now, d1_cpu_delta;
  27.307 +
  27.308 +    gettimeofday(&wall_now, NULL);
  27.309 +
  27.310 +    d0_cpu_now = xc_domain_get_cpu_usage(xc_handle, 0, /* FIXME */ 0)/1000;
  27.311 +    d1_cpu_now = xc_domain_get_cpu_usage(xc_handle, domid, /* FIXME */ 0)/1000;
  27.312 +
  27.313 +    if ( (d0_cpu_now == -1) || (d1_cpu_now == -1) )
  27.314 +        DPRINTF("ARRHHH!!\n");
  27.315 +
  27.316 +    wall_delta = tv_delta(&wall_now,&wall_last)/1000;
  27.317 +    if ( wall_delta == 0 )
  27.318 +        wall_delta = 1;
  27.319 +
  27.320 +    d0_cpu_delta = (d0_cpu_now - d0_cpu_last)/1000;
  27.321 +    d1_cpu_delta = (d1_cpu_now - d1_cpu_last)/1000;
  27.322 +
  27.323 +    if ( print )
  27.324 +        DPRINTF("delta %lldms, dom0 %d%%, target %d%%, sent %dMb/s, "
  27.325 +                "dirtied %dMb/s %" PRId32 " pages\n",
  27.326 +                wall_delta,
  27.327 +                (int)((d0_cpu_delta*100)/wall_delta),
  27.328 +                (int)((d1_cpu_delta*100)/wall_delta),
  27.329 +                (int)((pages_sent*PAGE_SIZE)/(wall_delta*(1000/8))),
  27.330 +                (int)((stats->dirty_count*PAGE_SIZE)/(wall_delta*(1000/8))),
  27.331 +                stats->dirty_count);
  27.332 +
  27.333 +#ifdef ADAPTIVE_SAVE
  27.334 +    if ( ((stats->dirty_count*PAGE_SIZE)/(wall_delta*(1000/8))) > mbit_rate )
  27.335 +    {
  27.336 +        mbit_rate = (int)((stats->dirty_count*PAGE_SIZE)/(wall_delta*(1000/8)))
  27.337 +            + 50;
  27.338 +        if ( mbit_rate > MAX_MBIT_RATE )
  27.339 +            mbit_rate = MAX_MBIT_RATE;
  27.340 +    }
  27.341 +#endif
  27.342 +
  27.343 +    d0_cpu_last = d0_cpu_now;
  27.344 +    d1_cpu_last = d1_cpu_now;
  27.345 +    wall_last   = wall_now;
  27.346 +
  27.347 +    return 0;
  27.348 +}
  27.349 +
  27.350 +
  27.351 +static int analysis_phase(int xc_handle, uint32_t domid, int p2m_size,
  27.352 +                          unsigned long *arr, int runs)
  27.353 +{
  27.354 +    long long start, now;
  27.355 +    xc_shadow_op_stats_t stats;
  27.356 +    int j;
  27.357 +
  27.358 +    start = llgettimeofday();
  27.359 +
  27.360 +    for ( j = 0; j < runs; j++ )
  27.361 +    {
  27.362 +        int i;
  27.363 +
  27.364 +        xc_shadow_control(xc_handle, domid, XEN_DOMCTL_SHADOW_OP_CLEAN,
  27.365 +                          arr, p2m_size, NULL, 0, NULL);
  27.366 +        DPRINTF("#Flush\n");
  27.367 +        for ( i = 0; i < 40; i++ )
  27.368 +        {
  27.369 +            usleep(50000);
  27.370 +            now = llgettimeofday();
  27.371 +            xc_shadow_control(xc_handle, domid, XEN_DOMCTL_SHADOW_OP_PEEK,
  27.372 +                              NULL, 0, NULL, 0, &stats);
  27.373 +            DPRINTF("now= %lld faults= %"PRId32" dirty= %"PRId32"\n",
  27.374 +                    ((now-start)+500)/1000,
  27.375 +                    stats.fault_count, stats.dirty_count);
  27.376 +        }
  27.377 +    }
  27.378 +
  27.379 +    return -1;
  27.380 +}
  27.381 +
  27.382 +
  27.383 +static int suspend_and_state(int (*suspend)(int), int xc_handle, int io_fd,
  27.384 +                             int dom, xc_dominfo_t *info)
  27.385 +{
  27.386 +    int i = 0;
  27.387 +
  27.388 +    if ( !(*suspend)(dom) )
  27.389 +    {
  27.390 +        ERROR("Suspend request failed");
  27.391 +        return -1;
  27.392 +    }
  27.393 +
  27.394 + retry:
  27.395 +
  27.396 +    if ( xc_domain_getinfo(xc_handle, dom, 1, info) != 1 )
  27.397 +    {
  27.398 +        ERROR("Could not get domain info");
  27.399 +        return -1;
  27.400 +    }
  27.401 +
  27.402 +    if ( info->dying )
  27.403 +    {
  27.404 +        ERROR("domain is dying");
  27.405 +        return -1;
  27.406 +    }
  27.407 +
  27.408 +    if ( info->crashed )
  27.409 +    {
  27.410 +        ERROR("domain has crashed");
  27.411 +        return -1;
  27.412 +    }
  27.413 +
  27.414 +    if ( info->shutdown )
  27.415 +    {
  27.416 +        switch ( info->shutdown_reason )
  27.417 +        {
  27.418 +        case SHUTDOWN_poweroff:
  27.419 +        case SHUTDOWN_reboot:
  27.420 +            ERROR("domain has shut down");
  27.421 +            return -1;
  27.422 +        case SHUTDOWN_suspend:
  27.423 +            return 0;
  27.424 +        case SHUTDOWN_crash:
  27.425 +            ERROR("domain has crashed");
  27.426 +            return -1;
  27.427 +        }
  27.428 +    }
  27.429 +
  27.430 +    if ( info->paused )
  27.431 +    {
  27.432 +        /* Try unpausing domain, wait, and retest. */
  27.433 +        xc_domain_unpause( xc_handle, dom );
  27.434 +        ERROR("Domain was paused. Wait and re-test.");
  27.435 +        usleep(10000); /* 10ms */
  27.436 +        goto retry;
  27.437 +    }
  27.438 +
  27.439 +    if ( ++i < 100 )
  27.440 +    {
  27.441 +        ERROR("Retry suspend domain");
  27.442 +        usleep(10000); /* 10ms */
  27.443 +        goto retry;
  27.444 +    }
  27.445 +
  27.446 +    ERROR("Unable to suspend domain.");
  27.447 +
  27.448 +    return -1;
  27.449 +}
  27.450 +
  27.451 +/*
  27.452 +** Map the top-level page of MFNs from the guest. The guest might not have
  27.453 +** finished resuming from a previous restore operation, so we wait a while for
  27.454 +** it to update the MFN to a reasonable value.
  27.455 +*/
  27.456 +static void *map_frame_list_list(int xc_handle, uint32_t dom,
  27.457 +                                 shared_info_t *shinfo)
  27.458 +{
  27.459 +    int count = 100;
  27.460 +    void *p;
  27.461 +
  27.462 +    while ( count-- && (shinfo->arch.pfn_to_mfn_frame_list_list == 0) )
  27.463 +        usleep(10000);
  27.464 +
  27.465 +    if ( shinfo->arch.pfn_to_mfn_frame_list_list == 0 )
  27.466 +    {
  27.467 +        ERROR("Timed out waiting for frame list updated.");
  27.468 +        return NULL;
  27.469 +    }
  27.470 +
  27.471 +    p = xc_map_foreign_range(xc_handle, dom, PAGE_SIZE, PROT_READ,
  27.472 +                             shinfo->arch.pfn_to_mfn_frame_list_list);
  27.473 +    if ( p == NULL )
  27.474 +        ERROR("Couldn't map p2m_frame_list_list (errno %d)", errno);
  27.475 +
  27.476 +    return p;
  27.477 +}
  27.478 +
  27.479 +/*
  27.480 +** During transfer (or in the state file), all page-table pages must be
  27.481 +** converted into a 'canonical' form where references to actual mfns
  27.482 +** are replaced with references to the corresponding pfns.
  27.483 +**
  27.484 +** This function performs the appropriate conversion, taking into account
  27.485 +** which entries do not require canonicalization (in particular, those
  27.486 +** entries which map the virtual address reserved for the hypervisor).
  27.487 +*/
  27.488 +static int canonicalize_pagetable(unsigned long type, unsigned long pfn,
  27.489 +                           const void *spage, void *dpage)
  27.490 +{
  27.491 +
  27.492 +    int i, pte_last, xen_start, xen_end, race = 0; 
  27.493 +    uint64_t pte;
  27.494 +
  27.495 +    /*
  27.496 +    ** We need to determine which entries in this page table hold
  27.497 +    ** reserved hypervisor mappings. This depends on the current
  27.498 +    ** page table type as well as the number of paging levels.
  27.499 +    */
  27.500 +    xen_start = xen_end = pte_last = PAGE_SIZE / ((pt_levels == 2) ? 4 : 8);
  27.501 +
  27.502 +    if ( (pt_levels == 2) && (type == XEN_DOMCTL_PFINFO_L2TAB) )
  27.503 +        xen_start = (hvirt_start >> L2_PAGETABLE_SHIFT);
  27.504 +
  27.505 +    if ( (pt_levels == 3) && (type == XEN_DOMCTL_PFINFO_L3TAB) )
  27.506 +        xen_start = L3_PAGETABLE_ENTRIES_PAE;
  27.507 +
  27.508 +    /*
  27.509 +    ** in PAE only the L2 mapping the top 1GB contains Xen mappings.
  27.510 +    ** We can spot this by looking for the guest linear mapping which
  27.511 +    ** Xen always ensures is present in that L2. Guests must ensure
  27.512 +    ** that this check will fail for other L2s.
  27.513 +    */
  27.514 +    if ( (pt_levels == 3) && (type == XEN_DOMCTL_PFINFO_L2TAB) )
  27.515 +    {
  27.516 +        int hstart;
  27.517 +        uint64_t he;
  27.518 +
  27.519 +        hstart = (hvirt_start >> L2_PAGETABLE_SHIFT_PAE) & 0x1ff;
  27.520 +        he = ((const uint64_t *) spage)[hstart];
  27.521 +
  27.522 +        if ( ((he >> PAGE_SHIFT) & MFN_MASK_X86) == m2p_mfn0 )
  27.523 +        {
  27.524 +            /* hvirt starts with xen stuff... */
  27.525 +            xen_start = hstart;
  27.526 +        }
  27.527 +        else if ( hvirt_start != 0xf5800000 )
  27.528 +        {
  27.529 +            /* old L2s from before hole was shrunk... */
  27.530 +            hstart = (0xf5800000 >> L2_PAGETABLE_SHIFT_PAE) & 0x1ff;
  27.531 +            he = ((const uint64_t *) spage)[hstart];
  27.532 +            if ( ((he >> PAGE_SHIFT) & MFN_MASK_X86) == m2p_mfn0 )
  27.533 +                xen_start = hstart;
  27.534 +        }
  27.535 +    }
  27.536 +
  27.537 +    if ( (pt_levels == 4) && (type == XEN_DOMCTL_PFINFO_L4TAB) )
  27.538 +    {
  27.539 +        /*
  27.540 +        ** XXX SMH: should compute these from hvirt_start (which we have)
  27.541 +        ** and hvirt_end (which we don't)
  27.542 +        */
  27.543 +        xen_start = 256;
  27.544 +        xen_end   = 272;
  27.545 +    }
  27.546 +
  27.547 +    /* Now iterate through the page table, canonicalizing each PTE */
  27.548 +    for (i = 0; i < pte_last; i++ )
  27.549 +    {
  27.550 +        unsigned long pfn, mfn;
  27.551 +
  27.552 +        if ( pt_levels == 2 )
  27.553 +            pte = ((const uint32_t*)spage)[i];
  27.554 +        else
  27.555 +            pte = ((const uint64_t*)spage)[i];
  27.556 +
  27.557 +        if ( (i >= xen_start) && (i < xen_end) )
  27.558 +            pte = 0;
  27.559 +
  27.560 +        if ( pte & _PAGE_PRESENT )
  27.561 +        {
  27.562 +            mfn = (pte >> PAGE_SHIFT) & MFN_MASK_X86;
  27.563 +            if ( !MFN_IS_IN_PSEUDOPHYS_MAP(mfn) )
  27.564 +            {
  27.565 +                /* This will happen if the type info is stale which
  27.566 +                   is quite feasible under live migration */
  27.567 +                pfn  = 0;  /* zap it - we'll retransmit this page later */
  27.568 +                race = 1;  /* inform the caller of race; fatal if !live */ 
  27.569 +            }
  27.570 +            else
  27.571 +                pfn = mfn_to_pfn(mfn);
  27.572 +
  27.573 +            pte &= ~MADDR_MASK_X86;
  27.574 +            pte |= (uint64_t)pfn << PAGE_SHIFT;
  27.575 +
  27.576 +            /*
  27.577 +             * PAE guest L3Es can contain these flags when running on
  27.578 +             * a 64bit hypervisor. We zap these here to avoid any
  27.579 +             * surprise at restore time...
  27.580 +             */
  27.581 +            if ( (pt_levels == 3) &&
  27.582 +                 (type == XEN_DOMCTL_PFINFO_L3TAB) &&
  27.583 +                 (pte & (_PAGE_USER|_PAGE_RW|_PAGE_ACCESSED)) )
  27.584 +                pte &= ~(_PAGE_USER|_PAGE_RW|_PAGE_ACCESSED);
  27.585 +        }
  27.586 +
  27.587 +        if ( pt_levels == 2 )
  27.588 +            ((uint32_t*)dpage)[i] = pte;
  27.589 +        else
  27.590 +            ((uint64_t*)dpage)[i] = pte;
  27.591 +    }
  27.592 +
  27.593 +    return race;
  27.594 +}
  27.595 +
  27.596 +static xen_pfn_t *xc_map_m2p(int xc_handle,
  27.597 +                                 unsigned long max_mfn,
  27.598 +                                 int prot)
  27.599 +{
  27.600 +    struct xen_machphys_mfn_list xmml;
  27.601 +    privcmd_mmap_entry_t *entries;
  27.602 +    unsigned long m2p_chunks, m2p_size;
  27.603 +    xen_pfn_t *m2p;
  27.604 +    xen_pfn_t *extent_start;
  27.605 +    int i, rc;
  27.606 +
  27.607 +    m2p_size   = M2P_SIZE(max_mfn);
  27.608 +    m2p_chunks = M2P_CHUNKS(max_mfn);
  27.609 +
  27.610 +    xmml.max_extents = m2p_chunks;
  27.611 +    if ( !(extent_start = malloc(m2p_chunks * sizeof(xen_pfn_t))) )
  27.612 +    {
  27.613 +        ERROR("failed to allocate space for m2p mfns");
  27.614 +        return NULL;
  27.615 +    }
  27.616 +    set_xen_guest_handle(xmml.extent_start, extent_start);
  27.617 +
  27.618 +    if ( xc_memory_op(xc_handle, XENMEM_machphys_mfn_list, &xmml) ||
  27.619 +         (xmml.nr_extents != m2p_chunks) )
  27.620 +    {
  27.621 +        ERROR("xc_get_m2p_mfns");
  27.622 +        return NULL;
  27.623 +    }
  27.624 +
  27.625 +    if ( (m2p = mmap(NULL, m2p_size, prot,
  27.626 +                     MAP_SHARED, xc_handle, 0)) == MAP_FAILED )
  27.627 +    {
  27.628 +        ERROR("failed to mmap m2p");
  27.629 +        return NULL;
  27.630 +    }
  27.631 +
  27.632 +    if ( !(entries = malloc(m2p_chunks * sizeof(privcmd_mmap_entry_t))) )
  27.633 +    {
  27.634 +        ERROR("failed to allocate space for mmap entries");
  27.635 +        return NULL;
  27.636 +    }
  27.637 +
  27.638 +    for ( i = 0; i < m2p_chunks; i++ )
  27.639 +    {
  27.640 +        entries[i].va = (unsigned long)(((void *)m2p) + (i * M2P_CHUNK_SIZE));
  27.641 +        entries[i].mfn = extent_start[i];
  27.642 +        entries[i].npages = M2P_CHUNK_SIZE >> PAGE_SHIFT;
  27.643 +    }
  27.644 +
  27.645 +    if ( (rc = xc_map_foreign_ranges(xc_handle, DOMID_XEN,
  27.646 +                                     entries, m2p_chunks)) < 0 )
  27.647 +    {
  27.648 +        ERROR("xc_mmap_foreign_ranges failed (rc = %d)", rc);
  27.649 +        return NULL;
  27.650 +    }
  27.651 +
  27.652 +    m2p_mfn0 = entries[0].mfn;
  27.653 +
  27.654 +    free(extent_start);
  27.655 +    free(entries);
  27.656 +
  27.657 +    return m2p;
  27.658 +}
  27.659 +
  27.660 +
  27.661 +static xen_pfn_t *map_and_save_p2m_table(int xc_handle, 
  27.662 +                                         int io_fd, 
  27.663 +                                         uint32_t dom,
  27.664 +                                         unsigned long p2m_size,
  27.665 +                                         shared_info_t *live_shinfo)
  27.666 +{
  27.667 +    vcpu_guest_context_t ctxt;
  27.668 +
  27.669 +    /* Double and single indirect references to the live P2M table */
  27.670 +    xen_pfn_t *live_p2m_frame_list_list = NULL;
  27.671 +    xen_pfn_t *live_p2m_frame_list = NULL;
  27.672 +
  27.673 +    /* A copy of the pfn-to-mfn table frame list. */
  27.674 +    xen_pfn_t *p2m_frame_list = NULL;
  27.675 +
  27.676 +    /* The mapping of the live p2m table itself */
  27.677 +    xen_pfn_t *p2m = NULL;
  27.678 +
  27.679 +    int i, success = 0;
  27.680 +
  27.681 +    live_p2m_frame_list_list = map_frame_list_list(xc_handle, dom,
  27.682 +                                                   live_shinfo);
  27.683 +    if ( !live_p2m_frame_list_list )
  27.684 +        goto out;
  27.685 +
  27.686 +    live_p2m_frame_list =
  27.687 +        xc_map_foreign_batch(xc_handle, dom, PROT_READ,
  27.688 +                             live_p2m_frame_list_list,
  27.689 +                             P2M_FLL_ENTRIES);
  27.690 +    if ( !live_p2m_frame_list )
  27.691 +    {
  27.692 +        ERROR("Couldn't map p2m_frame_list");
  27.693 +        goto out;
  27.694 +    }
  27.695 +
  27.696 +
  27.697 +    /* Map all the frames of the pfn->mfn table. For migrate to succeed,
  27.698 +       the guest must not change which frames are used for this purpose.
  27.699 +       (its not clear why it would want to change them, and we'll be OK
  27.700 +       from a safety POV anyhow. */
  27.701 +
  27.702 +    p2m = xc_map_foreign_batch(xc_handle, dom, PROT_READ,
  27.703 +                               live_p2m_frame_list,
  27.704 +                               P2M_FL_ENTRIES);
  27.705 +    if ( !p2m )
  27.706 +    {
  27.707 +        ERROR("Couldn't map p2m table");
  27.708 +        goto out;
  27.709 +    }
  27.710 +    live_p2m = p2m; /* So that translation macros will work */
  27.711 +    
  27.712 +    /* Get a local copy of the live_P2M_frame_list */
  27.713 +    if ( !(p2m_frame_list = malloc(P2M_FL_SIZE)) )
  27.714 +    {
  27.715 +        ERROR("Couldn't allocate p2m_frame_list array");
  27.716 +        goto out;
  27.717 +    }
  27.718 +    memcpy(p2m_frame_list, live_p2m_frame_list, P2M_FL_SIZE);
  27.719 +
  27.720 +    /* Canonicalise the pfn-to-mfn table frame-number list. */
  27.721 +    for ( i = 0; i < p2m_size; i += fpp )
  27.722 +    {
  27.723 +        if ( !translate_mfn_to_pfn(&p2m_frame_list[i/fpp]) )
  27.724 +        {
  27.725 +            ERROR("Frame# in pfn-to-mfn frame list is not in pseudophys");
  27.726 +            ERROR("entry %d: p2m_frame_list[%ld] is 0x%"PRIx64, i, i/fpp,
  27.727 +                  (uint64_t)p2m_frame_list[i/fpp]);
  27.728 +            goto out;
  27.729 +        }
  27.730 +    }
  27.731 +
  27.732 +    if ( xc_vcpu_getcontext(xc_handle, dom, 0, &ctxt) )
  27.733 +    {
  27.734 +        ERROR("Could not get vcpu context");
  27.735 +        goto out;
  27.736 +    }
  27.737 +
  27.738 +    /*
  27.739 +     * Write an extended-info structure to inform the restore code that
  27.740 +     * a PAE guest understands extended CR3 (PDPTs above 4GB). Turns off
  27.741 +     * slow paths in the restore code.
  27.742 +     */
  27.743 +    if ( (pt_levels == 3) &&
  27.744 +         (ctxt.vm_assist & (1UL << VMASST_TYPE_pae_extended_cr3)) )
  27.745 +    {
  27.746 +        unsigned long signature = ~0UL;
  27.747 +        uint32_t tot_sz   = sizeof(struct vcpu_guest_context) + 8;
  27.748 +        uint32_t chunk_sz = sizeof(struct vcpu_guest_context);
  27.749 +        char chunk_sig[]  = "vcpu";
  27.750 +        if ( !write_exact(io_fd, &signature, sizeof(signature)) ||
  27.751 +             !write_exact(io_fd, &tot_sz,    sizeof(tot_sz)) ||
  27.752 +             !write_exact(io_fd, &chunk_sig, 4) ||
  27.753 +             !write_exact(io_fd, &chunk_sz,  sizeof(chunk_sz)) ||
  27.754 +             !write_exact(io_fd, &ctxt,      sizeof(ctxt)) )
  27.755 +        {
  27.756 +            ERROR("write: extended info");
  27.757 +            goto out;
  27.758 +        }
  27.759 +    }
  27.760 +
  27.761 +    if ( !write_exact(io_fd, p2m_frame_list, P2M_FL_SIZE) )
  27.762 +    {
  27.763 +        ERROR("write: p2m_frame_list");
  27.764 +        goto out;
  27.765 +    }    
  27.766 +
  27.767 +    success = 1;
  27.768 +
  27.769 + out:
  27.770 +    
  27.771 +    if ( !success && p2m )
  27.772 +        munmap(p2m, ROUNDUP(p2m_size * sizeof(xen_pfn_t), PAGE_SHIFT));
  27.773 +
  27.774 +    if ( live_p2m_frame_list_list )
  27.775 +        munmap(live_p2m_frame_list_list, PAGE_SIZE);
  27.776 +
  27.777 +    if ( live_p2m_frame_list )
  27.778 +        munmap(live_p2m_frame_list, P2M_FLL_ENTRIES * PAGE_SIZE);
  27.779 +
  27.780 +    if ( p2m_frame_list ) 
  27.781 +        free(p2m_frame_list);
  27.782 +
  27.783 +    return success ? p2m : NULL;
  27.784 +}
  27.785 +
  27.786 +
  27.787 +
  27.788 +int xc_domain_save(int xc_handle, int io_fd, uint32_t dom, uint32_t max_iters,
  27.789 +                   uint32_t max_factor, uint32_t flags, int (*suspend)(int),
  27.790 +                   int hvm, void *(*init_qemu_maps)(int, unsigned), 
  27.791 +                   void (*qemu_flip_buffer)(int, int))
  27.792 +{
  27.793 +    xc_dominfo_t info;
  27.794 +
  27.795 +    int rc = 1, i, j, last_iter, iter = 0;
  27.796 +    int live  = (flags & XCFLAGS_LIVE);
  27.797 +    int debug = (flags & XCFLAGS_DEBUG);
  27.798 +    int race = 0, sent_last_iter, skip_this_iter;
  27.799 +
  27.800 +    /* The new domain's shared-info frame number. */
  27.801 +    unsigned long shared_info_frame;
  27.802 +
  27.803 +    /* A copy of the CPU context of the guest. */
  27.804 +    vcpu_guest_context_t ctxt;
  27.805 +
  27.806 +    /* A table containing the type of each PFN (/not/ MFN!). */
  27.807 +    unsigned long *pfn_type = NULL;
  27.808 +    unsigned long *pfn_batch = NULL;
  27.809 +
  27.810 +    /* A copy of one frame of guest memory. */
  27.811 +    char page[PAGE_SIZE];
  27.812 +
  27.813 +    /* Live mapping of shared info structure */
  27.814 +    shared_info_t *live_shinfo = NULL;
  27.815 +
  27.816 +    /* base of the region in which domain memory is mapped */
  27.817 +    unsigned char *region_base = NULL;
  27.818 +
  27.819 +    /* power of 2 order of p2m_size */
  27.820 +    int order_nr;
  27.821 +
  27.822 +    /* bitmap of pages:
  27.823 +       - that should be sent this iteration (unless later marked as skip);
  27.824 +       - to skip this iteration because already dirty;
  27.825 +       - to fixup by sending at the end if not already resent; */
  27.826 +    unsigned long *to_send = NULL, *to_skip = NULL, *to_fix = NULL;
  27.827 +
  27.828 +    xc_shadow_op_stats_t stats;
  27.829 +
  27.830 +    unsigned long needed_to_fix = 0;
  27.831 +    unsigned long total_sent    = 0;
  27.832 +
  27.833 +    uint64_t vcpumap = 1ULL;
  27.834 +
  27.835 +    /* HVM: a buffer for holding HVM context */
  27.836 +    uint32_t hvm_buf_size = 0;
  27.837 +    uint8_t *hvm_buf = NULL;
  27.838 +
  27.839 +    /* HVM: magic frames for ioreqs and xenstore comms. */
  27.840 +    uint64_t magic_pfns[3]; /* ioreq_pfn, bufioreq_pfn, store_pfn */
  27.841 +
  27.842 +    /* If no explicit control parameters given, use defaults */
  27.843 +    max_iters  = max_iters  ? : DEF_MAX_ITERS;
  27.844 +    max_factor = max_factor ? : DEF_MAX_FACTOR;
  27.845 +
  27.846 +    initialize_mbit_rate();
  27.847 +
  27.848 +    if ( !get_platform_info(xc_handle, dom,
  27.849 +                            &max_mfn, &hvirt_start, &pt_levels) )
  27.850 +    {
  27.851 +        ERROR("Unable to get platform info.");
  27.852 +        return 1;
  27.853 +    }
  27.854 +
  27.855 +    if ( xc_domain_getinfo(xc_handle, dom, 1, &info) != 1 )
  27.856 +    {
  27.857 +        ERROR("Could not get domain info");
  27.858 +        return 1;
  27.859 +    }
  27.860 +
  27.861 +    shared_info_frame = info.shared_info_frame;
  27.862 +
  27.863 +    /* Map the shared info frame */
  27.864 +    if ( !hvm )
  27.865 +    {
  27.866 +        live_shinfo = xc_map_foreign_range(xc_handle, dom, PAGE_SIZE,
  27.867 +                                           PROT_READ, shared_info_frame);
  27.868 +        if ( !live_shinfo )
  27.869 +        {
  27.870 +            ERROR("Couldn't map live_shinfo");
  27.871 +            goto out;
  27.872 +        }
  27.873 +    }
  27.874 +
  27.875 +    /* Get the size of the P2M table */
  27.876 +    p2m_size = xc_memory_op(xc_handle, XENMEM_maximum_gpfn, &dom);
  27.877 +
  27.878 +    /* Domain is still running at this point */
  27.879 +    if ( live )
  27.880 +    {
  27.881 +        /* Live suspend. Enable log-dirty mode. */
  27.882 +        if ( xc_shadow_control(xc_handle, dom,
  27.883 +                               XEN_DOMCTL_SHADOW_OP_ENABLE_LOGDIRTY,
  27.884 +                               NULL, 0, NULL, 0, NULL) < 0 )
  27.885 +        {
  27.886 +            ERROR("Couldn't enable shadow mode");
  27.887 +            goto out;
  27.888 +        }
  27.889 +
  27.890 +        if ( hvm )
  27.891 +        {
  27.892 +            /* Get qemu-dm logging dirty pages too */
  27.893 +            void *seg = init_qemu_maps(dom, BITMAP_SIZE);
  27.894 +            qemu_bitmaps[0] = seg;
  27.895 +            qemu_bitmaps[1] = seg + BITMAP_SIZE;
  27.896 +            qemu_active = 0;
  27.897 +            qemu_non_active = 1;
  27.898 +        }
  27.899 +    }
  27.900 +    else
  27.901 +    {
  27.902 +        /* This is a non-live suspend. Suspend the domain .*/
  27.903 +        if ( suspend_and_state(suspend, xc_handle, io_fd, dom, &info) )
  27.904 +        {
  27.905 +            ERROR("Domain appears not to have suspended");
  27.906 +            goto out;
  27.907 +        }
  27.908 +    }
  27.909 +
  27.910 +    last_iter = !live;
  27.911 +
  27.912 +    /* pretend we sent all the pages last iteration */
  27.913 +    sent_last_iter = p2m_size;
  27.914 +
  27.915 +    /* calculate the power of 2 order of p2m_size, e.g.
  27.916 +       15->4 16->4 17->5 */
  27.917 +    for ( i = p2m_size-1, order_nr = 0; i ; i >>= 1, order_nr++ )
  27.918 +        continue;
  27.919 +
  27.920 +    /* Setup to_send / to_fix and to_skip bitmaps */
  27.921 +    to_send = malloc(BITMAP_SIZE);
  27.922 +    to_fix  = calloc(1, BITMAP_SIZE);
  27.923 +    to_skip = malloc(BITMAP_SIZE);
  27.924 +
  27.925 +    if ( !to_send || !to_fix || !to_skip )
  27.926 +    {
  27.927 +        ERROR("Couldn't allocate to_send array");
  27.928 +        goto out;
  27.929 +    }
  27.930 +
  27.931 +    memset(to_send, 0xff, BITMAP_SIZE);
  27.932 +
  27.933 +    if ( lock_pages(to_send, BITMAP_SIZE) )
  27.934 +    {
  27.935 +        ERROR("Unable to lock to_send");
  27.936 +        return 1;
  27.937 +    }
  27.938 +
  27.939 +    /* (to fix is local only) */
  27.940 +    if ( lock_pages(to_skip, BITMAP_SIZE) )
  27.941 +    {
  27.942 +        ERROR("Unable to lock to_skip");
  27.943 +        return 1;
  27.944 +    }
  27.945 +
  27.946 +    if ( hvm ) 
  27.947 +    {
  27.948 +        /* Need another buffer for HVM context */
  27.949 +        hvm_buf_size = xc_domain_hvm_getcontext(xc_handle, dom, 0, 0);
  27.950 +        if ( hvm_buf_size == -1 )
  27.951 +        {
  27.952 +            ERROR("Couldn't get HVM context size from Xen");
  27.953 +            goto out;
  27.954 +        }
  27.955 +        hvm_buf = malloc(hvm_buf_size);
  27.956 +        if ( !hvm_buf )
  27.957 +        {
  27.958 +            ERROR("Couldn't allocate memory");
  27.959 +            goto out;
  27.960 +        }
  27.961 +    }
  27.962 +
  27.963 +    analysis_phase(xc_handle, dom, p2m_size, to_skip, 0);
  27.964 +
  27.965 +    /* We want zeroed memory so use calloc rather than malloc. */
  27.966 +    pfn_type   = calloc(MAX_BATCH_SIZE, sizeof(*pfn_type));
  27.967 +    pfn_batch  = calloc(MAX_BATCH_SIZE, sizeof(*pfn_batch));
  27.968 +    if ( (pfn_type == NULL) || (pfn_batch == NULL) )
  27.969 +    {
  27.970 +        ERROR("failed to alloc memory for pfn_type and/or pfn_batch arrays");
  27.971 +        errno = ENOMEM;
  27.972 +        goto out;
  27.973 +    }
  27.974 +
  27.975 +    if ( lock_pages(pfn_type, MAX_BATCH_SIZE * sizeof(*pfn_type)) )
  27.976 +    {
  27.977 +        ERROR("Unable to lock");
  27.978 +        goto out;
  27.979 +    }
  27.980 +
  27.981 +    /* Setup the mfn_to_pfn table mapping */
  27.982 +    if ( !(live_m2p = xc_map_m2p(xc_handle, max_mfn, PROT_READ)) )
  27.983 +    {
  27.984 +        ERROR("Failed to map live M2P table");
  27.985 +        goto out;
  27.986 +    }
  27.987 +
  27.988 +    /* Start writing out the saved-domain record. */
  27.989 +    if ( !write_exact(io_fd, &p2m_size, sizeof(unsigned long)) )
  27.990 +    {
  27.991 +        ERROR("write: p2m_size");
  27.992 +        goto out;
  27.993 +    }
  27.994 +
  27.995 +    if ( !hvm )
  27.996 +    {
  27.997 +        int err = 0;
  27.998 +        unsigned long mfn;
  27.999 +
 27.1000 +        /* Map the P2M table, and write the list of P2M frames */
 27.1001 +        live_p2m = map_and_save_p2m_table(xc_handle, io_fd, dom, 
 27.1002 +                                          p2m_size, live_shinfo);
 27.1003 +        if ( live_p2m == NULL )
 27.1004 +        {
 27.1005 +            ERROR("Failed to map/save the p2m frame list");
 27.1006 +            goto out;
 27.1007 +        }
 27.1008 +
 27.1009 +        /*
 27.1010 +         * Quick belt and braces sanity check.
 27.1011 +         */
 27.1012 +        
 27.1013 +        for ( i = 0; i < p2m_size; i++ )
 27.1014 +        {
 27.1015 +            mfn = live_p2m[i];
 27.1016 +            if( (mfn != INVALID_P2M_ENTRY) && (mfn_to_pfn(mfn) != i) )
 27.1017 +            {
 27.1018 +                DPRINTF("i=0x%x mfn=%lx live_m2p=%lx\n", i,
 27.1019 +                        mfn, mfn_to_pfn(mfn));
 27.1020 +                err++;
 27.1021 +            }
 27.1022 +        }
 27.1023 +        DPRINTF("Had %d unexplained entries in p2m table\n", err);
 27.1024 +    }
 27.1025 +
 27.1026 +    print_stats(xc_handle, dom, 0, &stats, 0);
 27.1027 +
 27.1028 +    /* Now write out each data page, canonicalising page tables as we go... */
 27.1029 +    for ( ; ; )
 27.1030 +    {
 27.1031 +        unsigned int prev_pc, sent_this_iter, N, batch;
 27.1032 +
 27.1033 +        iter++;
 27.1034 +        sent_this_iter = 0;
 27.1035 +        skip_this_iter = 0;
 27.1036 +        prev_pc = 0;
 27.1037 +        N = 0;
 27.1038 +
 27.1039 +        DPRINTF("Saving memory pages: iter %d   0%%", iter);
 27.1040 +
 27.1041 +        while ( N < p2m_size )
 27.1042 +        {
 27.1043 +            unsigned int this_pc = (N * 100) / p2m_size;
 27.1044 +            int rc;
 27.1045 +
 27.1046 +            if ( (this_pc - prev_pc) >= 5 )
 27.1047 +            {
 27.1048 +                DPRINTF("\b\b\b\b%3d%%", this_pc);
 27.1049 +                prev_pc = this_pc;
 27.1050 +            }
 27.1051 +
 27.1052 +            if ( !last_iter )
 27.1053 +            {
 27.1054 +                /* Slightly wasteful to peek the whole array evey time,
 27.1055 +                   but this is fast enough for the moment. */
 27.1056 +                rc = xc_shadow_control(
 27.1057 +                    xc_handle, dom, XEN_DOMCTL_SHADOW_OP_PEEK, to_skip, 
 27.1058 +                    p2m_size, NULL, 0, NULL);
 27.1059 +                if ( rc != p2m_size )
 27.1060 +                {
 27.1061 +                    ERROR("Error peeking shadow bitmap");
 27.1062 +                    goto out;
 27.1063 +                }
 27.1064 +            }
 27.1065 +
 27.1066 +            /* load pfn_type[] with the mfn of all the pages we're doing in
 27.1067 +               this batch. */
 27.1068 +            for  ( batch = 0;
 27.1069 +                   (batch < MAX_BATCH_SIZE) && (N < p2m_size);
 27.1070 +                   N++ )
 27.1071 +            {
 27.1072 +                int n = permute(N, p2m_size, order_nr);
 27.1073 +
 27.1074 +                if ( debug )
 27.1075 +                    DPRINTF("%d pfn= %08lx mfn= %08lx %d  [mfn]= %08lx\n",
 27.1076 +                            iter, (unsigned long)n, hvm ? 0 : live_p2m[n],
 27.1077 +                            test_bit(n, to_send),
 27.1078 +                            hvm ? 0 : mfn_to_pfn(live_p2m[n]&0xFFFFF));
 27.1079 +
 27.1080 +                if ( !last_iter &&
 27.1081 +                     test_bit(n, to_send) &&
 27.1082 +                     test_bit(n, to_skip) )
 27.1083 +                    skip_this_iter++; /* stats keeping */
 27.1084 +
 27.1085 +                if ( !((test_bit(n, to_send) && !test_bit(n, to_skip)) ||
 27.1086 +                       (test_bit(n, to_send) && last_iter) ||
 27.1087 +                       (test_bit(n, to_fix)  && last_iter)) )
 27.1088 +                    continue;
 27.1089 +
 27.1090 +                /* Skip PFNs that aren't really there */
 27.1091 +                if ( hvm && ((n >= 0xa0 && n < 0xc0) /* VGA hole */
 27.1092 +                             || (n >= (HVM_BELOW_4G_MMIO_START >> PAGE_SHIFT) 
 27.1093 +                                 && n < (1ULL<<32) >> PAGE_SHIFT)) /* MMIO */ )
 27.1094 +                    continue;
 27.1095 +
 27.1096 +                /*
 27.1097 +                ** we get here if:
 27.1098 +                **  1. page is marked to_send & hasn't already been re-dirtied
 27.1099 +                **  2. (ignore to_skip in last iteration)
 27.1100 +                **  3. add in pages that still need fixup (net bufs)
 27.1101 +                */
 27.1102 +
 27.1103 +                pfn_batch[batch] = n;
 27.1104 +
 27.1105 +                /* Hypercall interfaces operate in PFNs for HVM guests
 27.1106 +                * and MFNs for PV guests */
 27.1107 +                if ( hvm ) 
 27.1108 +                    pfn_type[batch] = n;
 27.1109 +                else
 27.1110 +                    pfn_type[batch] = live_p2m[n];
 27.1111 +                    
 27.1112 +                if ( !is_mapped(pfn_type[batch]) )
 27.1113 +                {
 27.1114 +                    /*
 27.1115 +                    ** not currently in psuedo-physical map -- set bit
 27.1116 +                    ** in to_fix since we must send this page in last_iter
 27.1117 +                    ** unless its sent sooner anyhow, or it never enters
 27.1118 +                    ** pseudo-physical map (e.g. for ballooned down doms)
 27.1119 +                    */
 27.1120 +                    set_bit(n, to_fix);
 27.1121 +                    continue;
 27.1122 +                }
 27.1123 +
 27.1124 +                if ( last_iter &&
 27.1125 +                     test_bit(n, to_fix) &&
 27.1126 +                     !test_bit(n, to_send) )
 27.1127 +                {
 27.1128 +                    needed_to_fix++;
 27.1129 +                    DPRINTF("Fix! iter %d, pfn %x. mfn %lx\n",
 27.1130 +                            iter, n, pfn_type[batch]);
 27.1131 +                }
 27.1132 +                
 27.1133 +                clear_bit(n, to_fix);
 27.1134 +                
 27.1135 +                batch++;
 27.1136 +            }
 27.1137 +
 27.1138 +            if ( batch == 0 )
 27.1139 +                goto skip; /* vanishingly unlikely... */
 27.1140 +
 27.1141 +            region_base = xc_map_foreign_batch(
 27.1142 +                xc_handle, dom, PROT_READ, pfn_type, batch);
 27.1143 +            if ( region_base == NULL )
 27.1144 +            {
 27.1145 +                ERROR("map batch failed");
 27.1146 +                goto out;
 27.1147 +            }
 27.1148 +
 27.1149 +            if ( !hvm )
 27.1150 +            {
 27.1151 +                /* Get page types */
 27.1152 +                for ( j = 0; j < batch; j++ )
 27.1153 +                    ((uint32_t *)pfn_type)[j] = pfn_type[j];
 27.1154 +                if ( xc_get_pfn_type_batch(xc_handle, dom, batch,
 27.1155 +                                           (uint32_t *)pfn_type) )
 27.1156 +                {
 27.1157 +                    ERROR("get_pfn_type_batch failed");
 27.1158 +                    goto out;
 27.1159 +                }
 27.1160 +                for ( j = batch-1; j >= 0; j-- )
 27.1161 +                    pfn_type[j] = ((uint32_t *)pfn_type)[j];
 27.1162 +
 27.1163 +                for ( j = 0; j < batch; j++ )
 27.1164 +                {
 27.1165 +                    
 27.1166 +                    if ( (pfn_type[j] & XEN_DOMCTL_PFINFO_LTAB_MASK) ==
 27.1167 +                         XEN_DOMCTL_PFINFO_XTAB )
 27.1168 +                    {
 27.1169 +                        DPRINTF("type fail: page %i mfn %08lx\n", 
 27.1170 +                                j, pfn_type[j]);
 27.1171 +                        continue;
 27.1172 +                    }
 27.1173 +                    
 27.1174 +                    if ( debug )
 27.1175 +                        DPRINTF("%d pfn= %08lx mfn= %08lx [mfn]= %08lx"
 27.1176 +                                " sum= %08lx\n",
 27.1177 +                                iter,
 27.1178 +                                (pfn_type[j] & XEN_DOMCTL_PFINFO_LTAB_MASK) |
 27.1179 +                                pfn_batch[j],
 27.1180 +                                pfn_type[j],
 27.1181 +                                mfn_to_pfn(pfn_type[j] &
 27.1182 +                                           ~XEN_DOMCTL_PFINFO_LTAB_MASK),
 27.1183 +                                csum_page(region_base + (PAGE_SIZE*j)));
 27.1184 +                    
 27.1185 +                    /* canonicalise mfn->pfn */
 27.1186 +                    pfn_type[j] = (pfn_type[j] & XEN_DOMCTL_PFINFO_LTAB_MASK) |
 27.1187 +                        pfn_batch[j];
 27.1188 +                }
 27.1189 +            }
 27.1190 +
 27.1191 +            if ( !write_exact(io_fd, &batch, sizeof(unsigned int)) )
 27.1192 +            {
 27.1193 +                ERROR("Error when writing to state file (2) (errno %d)",
 27.1194 +                      errno);
 27.1195 +                goto out;
 27.1196 +            }
 27.1197 +
 27.1198 +            if ( !write_exact(io_fd, pfn_type, sizeof(unsigned long)*batch) )
 27.1199 +            {
 27.1200 +                ERROR("Error when writing to state file (3) (errno %d)",
 27.1201 +                      errno);
 27.1202 +                goto out;
 27.1203 +            }
 27.1204 +
 27.1205 +            /* entering this loop, pfn_type is now in pfns (Not mfns) */
 27.1206 +            for ( j = 0; j < batch; j++ )
 27.1207 +            {
 27.1208 +                unsigned long pfn, pagetype;
 27.1209 +                void *spage = (char *)region_base + (PAGE_SIZE*j);
 27.1210 +
 27.1211 +                pfn      = pfn_type[j] & ~XEN_DOMCTL_PFINFO_LTAB_MASK;
 27.1212 +                pagetype = pfn_type[j] &  XEN_DOMCTL_PFINFO_LTAB_MASK;
 27.1213 +
 27.1214 +                /* write out pages in batch */
 27.1215 +                if ( pagetype == XEN_DOMCTL_PFINFO_XTAB )
 27.1216 +                    continue;
 27.1217 +
 27.1218 +                pagetype &= XEN_DOMCTL_PFINFO_LTABTYPE_MASK;
 27.1219 +
 27.1220 +                if ( (pagetype >= XEN_DOMCTL_PFINFO_L1TAB) &&
 27.1221 +                     (pagetype <= XEN_DOMCTL_PFINFO_L4TAB) )
 27.1222 +                {
 27.1223 +                    /* We have a pagetable page: need to rewrite it. */
 27.1224 +                    race = 
 27.1225 +                        canonicalize_pagetable(pagetype, pfn, spage, page); 
 27.1226 +
 27.1227 +                    if ( race && !live )
 27.1228 +                    {
 27.1229 +                        ERROR("Fatal PT race (pfn %lx, type %08lx)", pfn,
 27.1230 +                              pagetype);
 27.1231 +                        goto out;
 27.1232 +                    }
 27.1233 +
 27.1234 +                    if ( ratewrite(io_fd, live, page, PAGE_SIZE) != PAGE_SIZE )
 27.1235 +                    {
 27.1236 +                        ERROR("Error when writing to state file (4)"
 27.1237 +                              " (errno %d)", errno);
 27.1238 +                        goto out;
 27.1239 +                    }
 27.1240 +                }
 27.1241 +                else
 27.1242 +                {
 27.1243 +                    /* We have a normal page: just write it directly. */
 27.1244 +                    if ( ratewrite(io_fd, live, spage, PAGE_SIZE) !=
 27.1245 +                         PAGE_SIZE )
 27.1246 +                    {
 27.1247 +                        ERROR("Error when writing to state file (5)"
 27.1248 +                              " (errno %d)", errno);
 27.1249 +                        goto out;
 27.1250 +                    }
 27.1251 +                }
 27.1252 +            } /* end of the write out for this batch */
 27.1253 +
 27.1254 +            sent_this_iter += batch;
 27.1255 +
 27.1256 +            munmap(region_base, batch*PAGE_SIZE);
 27.1257 +
 27.1258 +        } /* end of this while loop for this iteration */
 27.1259 +
 27.1260 +      skip:
 27.1261 +
 27.1262 +        total_sent += sent_this_iter;
 27.1263 +
 27.1264 +        DPRINTF("\r %d: sent %d, skipped %d, ",
 27.1265 +                iter, sent_this_iter, skip_this_iter );
 27.1266 +
 27.1267 +        if ( last_iter )
 27.1268 +        {
 27.1269 +            print_stats( xc_handle, dom, sent_this_iter, &stats, 1);
 27.1270 +
 27.1271 +            DPRINTF("Total pages sent= %ld (%.2fx)\n",
 27.1272 +                    total_sent, ((float)total_sent)/p2m_size );
 27.1273 +            DPRINTF("(of which %ld were fixups)\n", needed_to_fix  );
 27.1274 +        }
 27.1275 +
 27.1276 +        if ( last_iter && debug )
 27.1277 +        {
 27.1278 +            int minusone = -1;
 27.1279 +            memset(to_send, 0xff, BITMAP_SIZE);
 27.1280 +            debug = 0;
 27.1281 +            DPRINTF("Entering debug resend-all mode\n");
 27.1282 +
 27.1283 +            /* send "-1" to put receiver into debug mode */
 27.1284 +            if ( !write_exact(io_fd, &minusone, sizeof(int)) )
 27.1285 +            {
 27.1286 +                ERROR("Error when writing to state file (6) (errno %d)",
 27.1287 +                      errno);
 27.1288 +                goto out;
 27.1289 +            }
 27.1290 +
 27.1291 +            continue;
 27.1292 +        }
 27.1293 +
 27.1294 +        if ( last_iter )
 27.1295 +            break;
 27.1296 +
 27.1297 +        if ( live )
 27.1298 +        {
 27.1299 +            if ( ((sent_this_iter > sent_last_iter) && RATE_IS_MAX()) ||
 27.1300 +                 (iter >= max_iters) ||
 27.1301 +                 (sent_this_iter+skip_this_iter < 50) ||
 27.1302 +                 (total_sent > p2m_size*max_factor) )
 27.1303 +            {
 27.1304 +                DPRINTF("Start last iteration\n");
 27.1305 +                last_iter = 1;
 27.1306 +
 27.1307 +                if ( suspend_and_state(suspend, xc_handle, io_fd, dom, &info) )
 27.1308 +                {
 27.1309 +                    ERROR("Domain appears not to have suspended");
 27.1310 +                    goto out;
 27.1311 +                }
 27.1312 +
 27.1313 +                DPRINTF("SUSPEND shinfo %08lx\n", info.shared_info_frame);
 27.1314 +            }
 27.1315 +
 27.1316 +            if ( xc_shadow_control(xc_handle, dom, 
 27.1317 +                                   XEN_DOMCTL_SHADOW_OP_CLEAN, to_send, 
 27.1318 +                                   p2m_size, NULL, 0, &stats) != p2m_size )
 27.1319 +            {
 27.1320 +                ERROR("Error flushing shadow PT");
 27.1321 +                goto out;
 27.1322 +            }
 27.1323 +
 27.1324 +            if ( hvm ) 
 27.1325 +            {
 27.1326 +                /* Pull in the dirty bits from qemu-dm too */
 27.1327 +                if ( !last_iter )
 27.1328 +                {
 27.1329 +                    qemu_active = qemu_non_active;
 27.1330 +                    qemu_non_active = qemu_active ? 0 : 1;
 27.1331 +                    qemu_flip_buffer(dom, qemu_active);
 27.1332 +                    for ( j = 0; j < BITMAP_SIZE / sizeof(unsigned long); j++ )
 27.1333 +                    {
 27.1334 +                        to_send[j] |= qemu_bitmaps[qemu_non_active][j];
 27.1335 +                        qemu_bitmaps[qemu_non_active][j] = 0;
 27.1336 +                    }
 27.1337 +                }
 27.1338 +                else
 27.1339 +                {
 27.1340 +                    for ( j = 0; j < BITMAP_SIZE / sizeof(unsigned long); j++ )
 27.1341 +                        to_send[j] |= qemu_bitmaps[qemu_active][j];
 27.1342 +                }
 27.1343 +            }
 27.1344 +
 27.1345 +            sent_last_iter = sent_this_iter;
 27.1346 +
 27.1347 +            print_stats(xc_handle, dom, sent_this_iter, &stats, 1);
 27.1348 +
 27.1349 +        }
 27.1350 +    } /* end of infinite for loop */
 27.1351 +
 27.1352 +    DPRINTF("All memory is saved\n");
 27.1353 +
 27.1354 +    {
 27.1355 +        struct {
 27.1356 +            int minustwo;
 27.1357 +            int max_vcpu_id;
 27.1358 +            uint64_t vcpumap;
 27.1359 +        } chunk = { -2, info.max_vcpu_id };
 27.1360 +
 27.1361 +        if ( info.max_vcpu_id >= 64 )
 27.1362 +        {
 27.1363 +            ERROR("Too many VCPUS in guest!");
 27.1364 +            goto out;
 27.1365 +        }
 27.1366 +
 27.1367 +        for ( i = 1; i <= info.max_vcpu_id; i++ )
 27.1368 +        {
 27.1369 +            xc_vcpuinfo_t vinfo;
 27.1370 +            if ( (xc_vcpu_getinfo(xc_handle, dom, i, &vinfo) == 0) &&
 27.1371 +                 vinfo.online )
 27.1372 +                vcpumap |= 1ULL << i;
 27.1373 +        }
 27.1374 +
 27.1375 +        chunk.vcpumap = vcpumap;
 27.1376 +        if ( !write_exact(io_fd, &chunk, sizeof(chunk)) )
 27.1377 +        {
 27.1378 +            ERROR("Error when writing to state file (errno %d)", errno);
 27.1379 +            goto out;
 27.1380 +        }
 27.1381 +    }
 27.1382 +
 27.1383 +    /* Zero terminate */
 27.1384 +    i = 0;
 27.1385 +    if ( !write_exact(io_fd, &i, sizeof(int)) )
 27.1386 +    {
 27.1387 +        ERROR("Error when writing to state file (6') (errno %d)", errno);
 27.1388 +        goto out;
 27.1389 +    }
 27.1390 +
 27.1391 +    if ( hvm ) 
 27.1392 +    {
 27.1393 +        uint32_t rec_size;
 27.1394 +
 27.1395 +        /* Save magic-page locations. */
 27.1396 +        memset(magic_pfns, 0, sizeof(magic_pfns));
 27.1397 +        xc_get_hvm_param(xc_handle, dom, HVM_PARAM_IOREQ_PFN,
 27.1398 +                         (unsigned long *)&magic_pfns[0]);
 27.1399 +        xc_get_hvm_param(xc_handle, dom, HVM_PARAM_BUFIOREQ_PFN,
 27.1400 +                         (unsigned long *)&magic_pfns[1]);
 27.1401 +        xc_get_hvm_param(xc_handle, dom, HVM_PARAM_STORE_PFN,
 27.1402 +                         (unsigned long *)&magic_pfns[2]);
 27.1403 +        if ( !write_exact(io_fd, magic_pfns, sizeof(magic_pfns)) )
 27.1404 +        {
 27.1405 +            ERROR("Error when writing to state file (7)");
 27.1406 +            goto out;
 27.1407 +        }
 27.1408 +
 27.1409 +        /* Get HVM context from Xen and save it too */
 27.1410 +        if ( (rec_size = xc_domain_hvm_getcontext(xc_handle, dom, hvm_buf, 
 27.1411 +                                                  hvm_buf_size)) == -1 )
 27.1412 +        {
 27.1413 +            ERROR("HVM:Could not get hvm buffer");
 27.1414 +            goto out;
 27.1415 +        }
 27.1416 +        
 27.1417 +        if ( !write_exact(io_fd, &rec_size, sizeof(uint32_t)) )
 27.1418 +        {
 27.1419 +            ERROR("error write hvm buffer size");
 27.1420 +            goto out;
 27.1421 +        }
 27.1422 +        
 27.1423 +        if ( !write_exact(io_fd, hvm_buf, rec_size) )
 27.1424 +        {
 27.1425 +            ERROR("write HVM info failed!\n");
 27.1426 +            goto out;
 27.1427 +        }
 27.1428 +        
 27.1429 +        /* HVM guests are done now */
 27.1430 +        rc = 0;
 27.1431 +        goto out;
 27.1432 +    }
 27.1433 +
 27.1434 +    /* PV guests only from now on */
 27.1435 +
 27.1436 +    /* Send through a list of all the PFNs that were not in map at the close */
 27.1437 +    {
 27.1438 +        unsigned int i,j;
 27.1439 +        unsigned long pfntab[1024];
 27.1440 +
 27.1441 +        for ( i = 0, j = 0; i < p2m_size; i++ )
 27.1442 +        {
 27.1443 +            if ( !is_mapped(live_p2m[i]) )
 27.1444 +                j++;
 27.1445 +        }
 27.1446 +
 27.1447 +        if ( !write_exact(io_fd, &j, sizeof(unsigned int)) )
 27.1448 +        {
 27.1449 +            ERROR("Error when writing to state file (6a) (errno %d)", errno);
 27.1450 +            goto out;
 27.1451 +        }
 27.1452 +
 27.1453 +        for ( i = 0, j = 0; i < p2m_size; )
 27.1454 +        {
 27.1455 +            if ( !is_mapped(live_p2m[i]) )
 27.1456 +                pfntab[j++] = i;
 27.1457 +
 27.1458 +            i++;
 27.1459 +            if ( (j == 1024) || (i == p2m_size) )
 27.1460 +            {
 27.1461 +                if ( !write_exact(io_fd, &pfntab, sizeof(unsigned long)*j) )
 27.1462 +                {
 27.1463 +                    ERROR("Error when writing to state file (6b) (errno %d)",
 27.1464 +                          errno);
 27.1465 +                    goto out;
 27.1466 +                }
 27.1467 +                j = 0;
 27.1468 +            }
 27.1469 +        }
 27.1470 +    }
 27.1471 +
 27.1472 +    if ( xc_vcpu_getcontext(xc_handle, dom, 0, &ctxt) )
 27.1473 +    {
 27.1474 +        ERROR("Could not get vcpu context");
 27.1475 +        goto out;
 27.1476 +    }
 27.1477 +
 27.1478 +    /* Canonicalise the suspend-record frame number. */
 27.1479 +    if ( !translate_mfn_to_pfn(&ctxt.user_regs.edx) )
 27.1480 +    {
 27.1481 +        ERROR("Suspend record is not in range of pseudophys map");
 27.1482 +        goto out;
 27.1483 +    }
 27.1484 +
 27.1485 +    for ( i = 0; i <= info.max_vcpu_id; i++ )
 27.1486 +    {
 27.1487 +        if ( !(vcpumap & (1ULL << i)) )
 27.1488 +            continue;
 27.1489 +
 27.1490 +        if ( (i != 0) && xc_vcpu_getcontext(xc_handle, dom, i, &ctxt) )
 27.1491 +        {
 27.1492 +            ERROR("No context for VCPU%d", i);
 27.1493 +            goto out;
 27.1494 +        }
 27.1495 +
 27.1496 +        /* Canonicalise each GDT frame number. */
 27.1497 +        for ( j = 0; (512*j) < ctxt.gdt_ents; j++ )
 27.1498 +        {
 27.1499 +            if ( !translate_mfn_to_pfn(&ctxt.gdt_frames[j]) )
 27.1500 +            {
 27.1501 +                ERROR("GDT frame is not in range of pseudophys map");
 27.1502 +                goto out;
 27.1503 +            }
 27.1504 +        }
 27.1505 +
 27.1506 +        /* Canonicalise the page table base pointer. */
 27.1507 +        if ( !MFN_IS_IN_PSEUDOPHYS_MAP(xen_cr3_to_pfn(ctxt.ctrlreg[3])) )
 27.1508 +        {
 27.1509 +            ERROR("PT base is not in range of pseudophys map");
 27.1510 +            goto out;
 27.1511 +        }
 27.1512 +        ctxt.ctrlreg[3] = 
 27.1513 +            xen_pfn_to_cr3(mfn_to_pfn(xen_cr3_to_pfn(ctxt.ctrlreg[3])));
 27.1514 +
 27.1515 +        /* Guest pagetable (x86/64) stored in otherwise-unused CR1. */
 27.1516 +        if ( (pt_levels == 4) && ctxt.ctrlreg[1] )
 27.1517 +        {
 27.1518 +            if ( !MFN_IS_IN_PSEUDOPHYS_MAP(xen_cr3_to_pfn(ctxt.ctrlreg[1])) )
 27.1519 +            {
 27.1520 +                ERROR("PT base is not in range of pseudophys map");
 27.1521 +                goto out;
 27.1522 +            }
 27.1523 +            /* Least-significant bit means 'valid PFN'. */
 27.1524 +            ctxt.ctrlreg[1] = 1 |
 27.1525 +                xen_pfn_to_cr3(mfn_to_pfn(xen_cr3_to_pfn(ctxt.ctrlreg[1])));
 27.1526 +        }
 27.1527 +
 27.1528 +        if ( !write_exact(io_fd, &ctxt, sizeof(ctxt)) )
 27.1529 +        {
 27.1530 +            ERROR("Error when writing to state file (1) (errno %d)", errno);
 27.1531 +            goto out;
 27.1532 +        }
 27.1533 +    }
 27.1534 +
 27.1535 +    /*
 27.1536 +     * Reset the MFN to be a known-invalid value. See map_frame_list_list().
 27.1537 +     */
 27.1538 +    memcpy(page, live_shinfo, PAGE_SIZE);
 27.1539 +    ((shared_info_t *)page)->arch.pfn_to_mfn_frame_list_list = 0;
 27.1540 +    if ( !write_exact(io_fd, page, PAGE_SIZE) )
 27.1541 +    {
 27.1542 +        ERROR("Error when writing to state file (1) (errno %d)", errno);
 27.1543 +        goto out;
 27.1544 +    }
 27.1545 +
 27.1546 +    /* Success! */
 27.1547 +    rc = 0;
 27.1548 +
 27.1549 + out:
 27.1550 +
 27.1551 +    if ( live )
 27.1552 +    {
 27.1553 +        if ( xc_shadow_control(xc_handle, dom, 
 27.1554 +                               XEN_DOMCTL_SHADOW_OP_OFF,
 27.1555 +                               NULL, 0, NULL, 0, NULL) < 0 )
 27.1556 +            DPRINTF("Warning - couldn't disable shadow mode");
 27.1557 +    }
 27.1558 +
 27.1559 +    /* Flush last write and discard cache for file. */
 27.1560 +    discard_file_cache(io_fd, 1 /* flush */);
 27.1561 +
 27.1562 +    if ( live_shinfo )
 27.1563 +        munmap(live_shinfo, PAGE_SIZE);
 27.1564 +
 27.1565 +    if ( live_p2m )
 27.1566 +        munmap(live_p2m, ROUNDUP(p2m_size * sizeof(xen_pfn_t), PAGE_SHIFT));
 27.1567 +
 27.1568 +    if ( live_m2p )
 27.1569 +        munmap(live_m2p, M2P_SIZE(max_mfn));
 27.1570 +
 27.1571 +    free(pfn_type);
 27.1572 +    free(pfn_batch);
 27.1573 +    free(to_send);
 27.1574 +    free(to_fix);
 27.1575 +    free(to_skip);
 27.1576 +
 27.1577 +    DPRINTF("Save exit rc=%d\n",rc);
 27.1578 +
 27.1579 +    return !!rc;
 27.1580 +}
 27.1581 +
 27.1582 +/*
 27.1583 + * Local variables:
 27.1584 + * mode: C
 27.1585 + * c-set-style: "BSD"
 27.1586 + * c-basic-offset: 4
 27.1587 + * tab-width: 4
 27.1588 + * indent-tabs-mode: nil
 27.1589 + * End:
 27.1590 + */
    28.1 --- a/tools/libxc/xc_hvm_build.c	Thu Apr 12 16:37:32 2007 -0500
    28.2 +++ b/tools/libxc/xc_hvm_build.c	Fri Apr 13 11:14:26 2007 +0100
    28.3 @@ -29,47 +29,6 @@ typedef union
    28.4      vcpu_guest_context_t c;
    28.5  } vcpu_guest_context_either_t;
    28.6  
    28.7 -
    28.8 -int xc_set_hvm_param(
    28.9 -    int handle, domid_t dom, int param, unsigned long value)
   28.10 -{
   28.11 -    DECLARE_HYPERCALL;
   28.12 -    xen_hvm_param_t arg;
   28.13 -    int rc;
   28.14 -
   28.15 -    hypercall.op     = __HYPERVISOR_hvm_op;
   28.16 -    hypercall.arg[0] = HVMOP_set_param;
   28.17 -    hypercall.arg[1] = (unsigned long)&arg;
   28.18 -    arg.domid = dom;
   28.19 -    arg.index = param;
   28.20 -    arg.value = value;
   28.21 -    if ( lock_pages(&arg, sizeof(arg)) != 0 )
   28.22 -        return -1;
   28.23 -    rc = do_xen_hypercall(handle, &hypercall);
   28.24 -    unlock_pages(&arg, sizeof(arg));
   28.25 -    return rc;
   28.26 -}
   28.27 -
   28.28 -int xc_get_hvm_param(
   28.29 -    int handle, domid_t dom, int param, unsigned long *value)
   28.30 -{
   28.31 -    DECLARE_HYPERCALL;
   28.32 -    xen_hvm_param_t arg;
   28.33 -    int rc;
   28.34 -
   28.35 -    hypercall.op     = __HYPERVISOR_hvm_op;
   28.36 -    hypercall.arg[0] = HVMOP_get_param;
   28.37 -    hypercall.arg[1] = (unsigned long)&arg;
   28.38 -    arg.domid = dom;
   28.39 -    arg.index = param;
   28.40 -    if ( lock_pages(&arg, sizeof(arg)) != 0 )
   28.41 -        return -1;
   28.42 -    rc = do_xen_hypercall(handle, &hypercall);
   28.43 -    unlock_pages(&arg, sizeof(arg));
   28.44 -    *value = arg.value;
   28.45 -    return rc;
   28.46 -}
   28.47 -
   28.48  static void build_e820map(void *e820_page, unsigned long long mem_size)
   28.49  {
   28.50      struct e820entry *e820entry =
    29.1 --- a/tools/libxc/xc_hvm_save.c	Thu Apr 12 16:37:32 2007 -0500
    29.2 +++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
    29.3 @@ -1,755 +0,0 @@
    29.4 -/******************************************************************************
    29.5 - * xc_hvm_save.c
    29.6 - *
    29.7 - * Save the state of a running HVM guest.
    29.8 - *
    29.9 - * Copyright (c) 2003, K A Fraser.
   29.10 - * Copyright (c) 2006 Intel Corperation
   29.11 - * rewriten for hvm guest by Zhai Edwin <edwin.zhai@intel.com>
   29.12 - *
   29.13 - * This program is free software; you can redistribute it and/or modify it
   29.14 - * under the terms and conditions of the GNU General Public License,
   29.15 - * version 2, as published by the Free Software Foundation.
   29.16 - *
   29.17 - * This program is distributed in the hope it will be useful, but WITHOUT
   29.18 - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
   29.19 - * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
   29.20 - * more details.
   29.21 - *
   29.22 - * You should have received a copy of the GNU General Public License along with
   29.23 - * this program; if not, write to the Free Software Foundation, Inc., 59 Temple
   29.24 - * Place - Suite 330, Boston, MA 02111-1307 USA.
   29.25 - *
   29.26 - */
   29.27 -
   29.28 -#include <inttypes.h>
   29.29 -#include <time.h>
   29.30 -#include <stdlib.h>
   29.31 -#include <unistd.h>
   29.32 -#include <sys/time.h>
   29.33 -
   29.34 -#include "xc_private.h"
   29.35 -#include "xg_private.h"
   29.36 -#include "xg_save_restore.h"
   29.37 -
   29.38 -#include <xen/hvm/e820.h>
   29.39 -#include <xen/hvm/params.h>
   29.40 -
   29.41 -/*
   29.42 -** Default values for important tuning parameters. Can override by passing
   29.43 -** non-zero replacement values to xc_hvm_save().
   29.44 -**
   29.45 -** XXX SMH: should consider if want to be able to override MAX_MBIT_RATE too.
   29.46 -**
   29.47 -*/
   29.48 -#define DEF_MAX_ITERS   29   /* limit us to 30 times round loop   */
   29.49 -#define DEF_MAX_FACTOR   3   /* never send more than 3x nr_pfns   */
   29.50 -
   29.51 -/* Shared-memory bitmaps for getting log-dirty bits from qemu */
   29.52 -static unsigned long *qemu_bitmaps[2];
   29.53 -static int qemu_active;
   29.54 -static int qemu_non_active;
   29.55 -
   29.56 -/*
   29.57 -** During (live) save/migrate, we maintain a number of bitmaps to track
   29.58 -** which pages we have to send, to fixup, and to skip.
   29.59 -*/
   29.60 -
   29.61 -#define BITS_PER_LONG (sizeof(unsigned long) * 8)
   29.62 -#define BITS_TO_LONGS(bits) (((bits)+BITS_PER_LONG-1)/BITS_PER_LONG)
   29.63 -#define BITMAP_SIZE   (BITS_TO_LONGS(pfn_array_size) * sizeof(unsigned long))
   29.64 -
   29.65 -#define BITMAP_ENTRY(_nr,_bmap) \
   29.66 -   ((unsigned long *)(_bmap))[(_nr)/BITS_PER_LONG]
   29.67 -
   29.68 -#define BITMAP_SHIFT(_nr) ((_nr) % BITS_PER_LONG)
   29.69 -
   29.70 -static inline int test_bit (int nr, volatile void * addr)
   29.71 -{
   29.72 -    return (BITMAP_ENTRY(nr, addr) >> BITMAP_SHIFT(nr)) & 1;
   29.73 -}
   29.74 -
   29.75 -static inline void clear_bit (int nr, volatile void * addr)
   29.76 -{
   29.77 -    BITMAP_ENTRY(nr, addr) &= ~(1UL << BITMAP_SHIFT(nr));
   29.78 -}
   29.79 -
   29.80 -static inline int permute( int i, int nr, int order_nr  )
   29.81 -{
   29.82 -    /* Need a simple permutation function so that we scan pages in a
   29.83 -       pseudo random order, enabling us to get a better estimate of
   29.84 -       the domain's page dirtying rate as we go (there are often
   29.85 -       contiguous ranges of pfns that have similar behaviour, and we
   29.86 -       want to mix them up. */
   29.87 -
   29.88 -    /* e.g. nr->oder 15->4 16->4 17->5 */
   29.89 -    /* 512MB domain, 128k pages, order 17 */
   29.90 -
   29.91 -    /*
   29.92 -      QPONMLKJIHGFEDCBA
   29.93 -             QPONMLKJIH
   29.94 -      GFEDCBA
   29.95 -     */
   29.96 -
   29.97 -    /*
   29.98 -      QPONMLKJIHGFEDCBA
   29.99 -                  EDCBA
  29.100 -             QPONM
  29.101 -      LKJIHGF
  29.102 -      */
  29.103 -
  29.104 -    do { i = ((i>>(order_nr-10)) | ( i<<10 ) ) & ((1<<order_nr)-1); }
  29.105 -    while ( i >= nr ); /* this won't ever loop if nr is a power of 2 */
  29.106 -
  29.107 -    return i;
  29.108 -}
  29.109 -
  29.110 -
  29.111 -static uint64_t tv_to_us(struct timeval *new)
  29.112 -{
  29.113 -    return (new->tv_sec * 1000000) + new->tv_usec;
  29.114 -}
  29.115 -
  29.116 -static uint64_t llgettimeofday(void)
  29.117 -{
  29.118 -    struct timeval now;
  29.119 -    gettimeofday(&now, NULL);
  29.120 -    return tv_to_us(&now);
  29.121 -}
  29.122 -
  29.123 -static uint64_t tv_delta(struct timeval *new, struct timeval *old)
  29.124 -{
  29.125 -    return (((new->tv_sec - old->tv_sec)*1000000) +
  29.126 -            (new->tv_usec - old->tv_usec));
  29.127 -}
  29.128 -
  29.129 -
  29.130 -#define RATE_IS_MAX() (0)
  29.131 -#define ratewrite(_io_fd, _buf, _n) write((_io_fd), (_buf), (_n))
  29.132 -#define initialize_mbit_rate()
  29.133 -
  29.134 -static inline ssize_t write_exact(int fd, void *buf, size_t count)
  29.135 -{
  29.136 -    return (write(fd, buf, count) == count);
  29.137 -}
  29.138 -
  29.139 -static int print_stats(int xc_handle, uint32_t domid, int pages_sent,
  29.140 -                       xc_shadow_op_stats_t *stats, int print)
  29.141 -{
  29.142 -    static struct timeval wall_last;
  29.143 -    static long long      d0_cpu_last;
  29.144 -    static long long      d1_cpu_last;
  29.145 -
  29.146 -    struct timeval        wall_now;
  29.147 -    long long             wall_delta;
  29.148 -    long long             d0_cpu_now, d0_cpu_delta;
  29.149 -    long long             d1_cpu_now, d1_cpu_delta;
  29.150 -
  29.151 -    gettimeofday(&wall_now, NULL);
  29.152 -
  29.153 -    d0_cpu_now = xc_domain_get_cpu_usage(xc_handle, 0, /* FIXME */ 0)/1000;
  29.154 -    d1_cpu_now = xc_domain_get_cpu_usage(xc_handle, domid, /* FIXME */ 0)/1000;
  29.155 -
  29.156 -    if ( (d0_cpu_now == -1) || (d1_cpu_now == -1) )
  29.157 -        DPRINTF("ARRHHH!!\n");
  29.158 -
  29.159 -    wall_delta = tv_delta(&wall_now,&wall_last)/1000;
  29.160 -    if ( wall_delta == 0 )
  29.161 -        wall_delta = 1;
  29.162 -
  29.163 -    d0_cpu_delta = (d0_cpu_now - d0_cpu_last)/1000;
  29.164 -    d1_cpu_delta = (d1_cpu_now - d1_cpu_last)/1000;
  29.165 -
  29.166 -    if ( print )
  29.167 -        DPRINTF("delta %lldms, dom0 %d%%, target %d%%, sent %dMb/s, "
  29.168 -                "dirtied %dMb/s %" PRId32 " pages\n",
  29.169 -                wall_delta,
  29.170 -                (int)((d0_cpu_delta*100)/wall_delta),
  29.171 -                (int)((d1_cpu_delta*100)/wall_delta),
  29.172 -                (int)((pages_sent*PAGE_SIZE)/(wall_delta*(1000/8))),
  29.173 -                (int)((stats->dirty_count*PAGE_SIZE)/(wall_delta*(1000/8))),
  29.174 -                stats->dirty_count);
  29.175 -
  29.176 -    d0_cpu_last = d0_cpu_now;
  29.177 -    d1_cpu_last = d1_cpu_now;
  29.178 -    wall_last   = wall_now;
  29.179 -
  29.180 -    return 0;
  29.181 -}
  29.182 -
  29.183 -static int analysis_phase(int xc_handle, uint32_t domid, int pfn_array_size,
  29.184 -                          unsigned long *arr, int runs)
  29.185 -{
  29.186 -    long long start, now;
  29.187 -    xc_shadow_op_stats_t stats;
  29.188 -    int j;
  29.189 -
  29.190 -    start = llgettimeofday();
  29.191 -
  29.192 -    for ( j = 0; j < runs; j++ )
  29.193 -    {
  29.194 -        int i;
  29.195 -
  29.196 -        xc_shadow_control(xc_handle, domid, XEN_DOMCTL_SHADOW_OP_CLEAN,
  29.197 -                          arr, pfn_array_size, NULL, 0, NULL);
  29.198 -        DPRINTF("#Flush\n");
  29.199 -        for ( i = 0; i < 40; i++ )
  29.200 -        {
  29.201 -            usleep(50000);
  29.202 -            now = llgettimeofday();
  29.203 -            xc_shadow_control(xc_handle, domid, XEN_DOMCTL_SHADOW_OP_PEEK,
  29.204 -                              NULL, 0, NULL, 0, &stats);
  29.205 -            DPRINTF("now= %lld faults= %"PRId32" dirty= %"PRId32"\n",
  29.206 -                    ((now-start)+500)/1000,
  29.207 -                    stats.fault_count, stats.dirty_count);
  29.208 -        }
  29.209 -    }
  29.210 -
  29.211 -    return -1;
  29.212 -}
  29.213 -
  29.214 -static int suspend_and_state(int (*suspend)(int), int xc_handle, int io_fd,
  29.215 -                             int dom, xc_dominfo_t *info,
  29.216 -                             vcpu_guest_context_t *ctxt)
  29.217 -{
  29.218 -    int i = 0;
  29.219 -
  29.220 -    if ( !(*suspend)(dom) )
  29.221 -    {
  29.222 -        ERROR("Suspend request failed");
  29.223 -        return -1;
  29.224 -    }
  29.225 -
  29.226 - retry:
  29.227 -
  29.228 -    if ( xc_domain_getinfo(xc_handle, dom, 1, info) != 1 )
  29.229 -    {
  29.230 -        ERROR("Could not get domain info");
  29.231 -        return -1;
  29.232 -    }
  29.233 -
  29.234 -    if ( xc_vcpu_getcontext(xc_handle, dom, 0, ctxt) )
  29.235 -        ERROR("Could not get vcpu context");
  29.236 -
  29.237 -    if ( info->shutdown && (info->shutdown_reason == SHUTDOWN_suspend) )
  29.238 -        return 0; /* success */
  29.239 -
  29.240 -    if ( info->paused )
  29.241 -    {
  29.242 -        /* Try unpausing domain, wait, and retest. */
  29.243 -        xc_domain_unpause( xc_handle, dom );
  29.244 -        ERROR("Domain was paused. Wait and re-test.");
  29.245 -        usleep(10000);  /* 10ms */
  29.246 -        goto retry;
  29.247 -    }
  29.248 -
  29.249 -    if ( ++i < 100 )
  29.250 -    {
  29.251 -        ERROR("Retry suspend domain.");
  29.252 -        usleep(10000); /* 10ms */
  29.253 -        goto retry;
  29.254 -    }
  29.255 -
  29.256 -    ERROR("Unable to suspend domain.");
  29.257 -
  29.258 -    return -1;
  29.259 -}
  29.260 -
  29.261 -int xc_hvm_save(int xc_handle, int io_fd, uint32_t dom, uint32_t max_iters,
  29.262 -                uint32_t max_factor, uint32_t flags, int (*suspend)(int),
  29.263 -                void *(*init_qemu_maps)(int, unsigned), 
  29.264 -                void (*qemu_flip_buffer)(int, int))
  29.265 -{
  29.266 -    xc_dominfo_t info;
  29.267 -
  29.268 -    int rc = 1, i, j, last_iter, iter = 0;
  29.269 -    int live  = !!(flags & XCFLAGS_LIVE);
  29.270 -    int debug = !!(flags & XCFLAGS_DEBUG);
  29.271 -    int sent_last_iter, skip_this_iter;
  29.272 -
  29.273 -    /* The highest guest-physical frame number used by the current guest */
  29.274 -    unsigned long max_pfn;
  29.275 -
  29.276 -    /* The size of an array big enough to contain all guest pfns */
  29.277 -    unsigned long pfn_array_size;
  29.278 -
  29.279 -    /* Magic frames: ioreqs and xenstore comms. */
  29.280 -    uint64_t magic_pfns[3]; /* ioreq_pfn, bufioreq_pfn, store_pfn */
  29.281 -
  29.282 -    /* A copy of the CPU context of the guest. */
  29.283 -    vcpu_guest_context_t ctxt;
  29.284 -
  29.285 -    /* A table containg the PFNs (/not/ MFN!) to map. */
  29.286 -    xen_pfn_t *pfn_batch = NULL;
  29.287 -
  29.288 -    /* A copy of hvm domain context buffer*/
  29.289 -    uint32_t hvm_buf_size;
  29.290 -    uint8_t *hvm_buf = NULL;
  29.291 -
  29.292 -    /* base of the region in which domain memory is mapped */
  29.293 -    unsigned char *region_base = NULL;
  29.294 -
  29.295 -    uint32_t rec_size, nr_vcpus;
  29.296 -
  29.297 -    /* power of 2 order of pfn_array_size */
  29.298 -    int order_nr;
  29.299 -
  29.300 -    /* bitmap of pages:
  29.301 -       - that should be sent this iteration (unless later marked as skip);
  29.302 -       - to skip this iteration because already dirty; */
  29.303 -    unsigned long *to_send = NULL, *to_skip = NULL;
  29.304 -
  29.305 -    xc_shadow_op_stats_t stats;
  29.306 -
  29.307 -    unsigned long total_sent = 0;
  29.308 -
  29.309 -    uint64_t vcpumap = 1ULL;
  29.310 -
  29.311 -    DPRINTF("xc_hvm_save: dom=%d, max_iters=%d, max_factor=%d, flags=0x%x, "
  29.312 -            "live=%d, debug=%d.\n", dom, max_iters, max_factor, flags,
  29.313 -            live, debug);
  29.314 -    
  29.315 -    /* If no explicit control parameters given, use defaults */
  29.316 -    max_iters  = max_iters  ? : DEF_MAX_ITERS;
  29.317 -    max_factor = max_factor ? : DEF_MAX_FACTOR;
  29.318 -
  29.319 -    initialize_mbit_rate();
  29.320 -
  29.321 -    if ( xc_domain_getinfo(xc_handle, dom, 1, &info) != 1 )
  29.322 -    {
  29.323 -        ERROR("HVM: Could not get domain info");
  29.324 -        return 1;
  29.325 -    }
  29.326 -    nr_vcpus = info.nr_online_vcpus;
  29.327 -
  29.328 -    if ( mlock(&ctxt, sizeof(ctxt)) )
  29.329 -    {
  29.330 -        ERROR("HVM: Unable to mlock ctxt");
  29.331 -        return 1;
  29.332 -    }
  29.333 -
  29.334 -    /* Only have to worry about vcpu 0 even for SMP */
  29.335 -    if ( xc_vcpu_getcontext(xc_handle, dom, 0, &ctxt) )
  29.336 -    {
  29.337 -        ERROR("HVM: Could not get vcpu context");
  29.338 -        goto out;
  29.339 -    }
  29.340 -
  29.341 -    DPRINTF("saved hvm domain info: max_memkb=0x%lx, nr_pages=0x%lx\n",
  29.342 -            info.max_memkb, info.nr_pages); 
  29.343 -
  29.344 -    if ( live )
  29.345 -    {
  29.346 -        /* Live suspend. Enable log-dirty mode. */
  29.347 -        if ( xc_shadow_control(xc_handle, dom,
  29.348 -                               XEN_DOMCTL_SHADOW_OP_ENABLE_LOGDIRTY,
  29.349 -                               NULL, 0, NULL, 0, NULL) < 0 )
  29.350 -        {
  29.351 -            ERROR("Couldn't enable shadow mode");
  29.352 -            goto out;
  29.353 -        }
  29.354 -    }
  29.355 -    else
  29.356 -    {
  29.357 -        /* This is a non-live suspend. Suspend the domain .*/
  29.358 -        if ( suspend_and_state(suspend, xc_handle, io_fd, dom, &info, &ctxt) )
  29.359 -        {
  29.360 -            ERROR("HVM Domain appears not to have suspended");
  29.361 -            goto out;
  29.362 -        }
  29.363 -    }
  29.364 -
  29.365 -    last_iter = !live;
  29.366 -
  29.367 -    max_pfn = xc_memory_op(xc_handle, XENMEM_maximum_gpfn, &dom);
  29.368 -
  29.369 -    DPRINTF("after 1st handle hvm domain max_pfn=0x%lx, "
  29.370 -            "max_memkb=0x%lx, live=%d.\n",
  29.371 -            max_pfn, info.max_memkb, live);
  29.372 -
  29.373 -    /* Size of any array that covers 0 ... max_pfn */
  29.374 -    pfn_array_size = max_pfn + 1;
  29.375 -    if ( !write_exact(io_fd, &pfn_array_size, sizeof(unsigned long)) )
  29.376 -    {
  29.377 -        ERROR("Error when writing to state file (1)");
  29.378 -        goto out;
  29.379 -    }
  29.380 -
  29.381 -    /* pretend we sent all the pages last iteration */
  29.382 -    sent_last_iter = pfn_array_size;
  29.383 -
  29.384 -    /* calculate the power of 2 order of pfn_array_size, e.g.
  29.385 -       15->4 16->4 17->5 */
  29.386 -    for ( i = pfn_array_size-1, order_nr = 0; i ; i >>= 1, order_nr++ )
  29.387 -        continue;
  29.388 -
  29.389 -    /* Setup to_send / to_fix and to_skip bitmaps */
  29.390 -    to_send = malloc(BITMAP_SIZE);
  29.391 -    to_skip = malloc(BITMAP_SIZE);
  29.392 -
  29.393 -    if ( live )
  29.394 -    {
  29.395 -        /* Get qemu-dm logging dirty pages too */
  29.396 -        void *seg = init_qemu_maps(dom, BITMAP_SIZE);
  29.397 -        qemu_bitmaps[0] = seg;
  29.398 -        qemu_bitmaps[1] = seg + BITMAP_SIZE;
  29.399 -        qemu_active = 0;
  29.400 -        qemu_non_active = 1;
  29.401 -    }
  29.402 -
  29.403 -    hvm_buf_size = xc_domain_hvm_getcontext(xc_handle, dom, 0, 0);
  29.404 -    if ( hvm_buf_size == -1 )
  29.405 -    {
  29.406 -        ERROR("Couldn't get HVM context size from Xen");
  29.407 -        goto out;
  29.408 -    }
  29.409 -    hvm_buf = malloc(hvm_buf_size);
  29.410 -
  29.411 -    if ( !to_send || !to_skip || !hvm_buf )
  29.412 -    {
  29.413 -        ERROR("Couldn't allocate memory");
  29.414 -        goto out;
  29.415 -    }
  29.416 -
  29.417 -    memset(to_send, 0xff, BITMAP_SIZE);
  29.418 -
  29.419 -    if ( lock_pages(to_send, BITMAP_SIZE) )
  29.420 -    {
  29.421 -        ERROR("Unable to lock to_send");
  29.422 -        return 1;
  29.423 -    }
  29.424 -
  29.425 -    /* (to fix is local only) */
  29.426 -    if ( lock_pages(to_skip, BITMAP_SIZE) )
  29.427 -    {
  29.428 -        ERROR("Unable to lock to_skip");
  29.429 -        return 1;
  29.430 -    }
  29.431 -
  29.432 -    analysis_phase(xc_handle, dom, pfn_array_size, to_skip, 0);
  29.433 -
  29.434 -    /* We want zeroed memory so use calloc rather than malloc. */
  29.435 -    pfn_batch = calloc(MAX_BATCH_SIZE, sizeof(*pfn_batch));
  29.436 -    if ( pfn_batch == NULL )
  29.437 -    {
  29.438 -        ERROR("failed to alloc memory for pfn_batch array");
  29.439 -        errno = ENOMEM;
  29.440 -        goto out;
  29.441 -    }
  29.442 -
  29.443 -    for ( ; ; )
  29.444 -    {
  29.445 -        unsigned int prev_pc, sent_this_iter, N, batch;
  29.446 -
  29.447 -        iter++;
  29.448 -        sent_this_iter = 0;
  29.449 -        skip_this_iter = 0;
  29.450 -        prev_pc = 0;
  29.451 -        N=0;
  29.452 -
  29.453 -        DPRINTF("Saving memory pages: iter %d   0%%", iter);
  29.454 -
  29.455 -        while ( N < pfn_array_size )
  29.456 -        {
  29.457 -            unsigned int this_pc = (N * 100) / pfn_array_size;
  29.458 -            int rc;
  29.459 -
  29.460 -            if ( (this_pc - prev_pc) >= 5 )
  29.461 -            {
  29.462 -                DPRINTF("\b\b\b\b%3d%%", this_pc);
  29.463 -                prev_pc = this_pc;
  29.464 -            }
  29.465 -
  29.466 -            if ( !last_iter )
  29.467 -            {
  29.468 -                /* Slightly wasteful to peek the whole array evey time,
  29.469 -                   but this is fast enough for the moment. */
  29.470 -                rc = xc_shadow_control(
  29.471 -                    xc_handle, dom, XEN_DOMCTL_SHADOW_OP_PEEK, to_skip, 
  29.472 -                    pfn_array_size, NULL, 0, NULL);
  29.473 -                if ( rc != pfn_array_size )
  29.474 -                {
  29.475 -                    ERROR("Error peeking shadow bitmap");
  29.476 -                    goto out;
  29.477 -                }
  29.478 -            }
  29.479 -
  29.480 -            /* load pfn_batch[] with the mfn of all the pages we're doing in
  29.481 -               this batch. */
  29.482 -            for ( batch = 0;
  29.483 -                  (batch < MAX_BATCH_SIZE) && (N < pfn_array_size);
  29.484 -                  N++ )
  29.485 -            {
  29.486 -                int n = permute(N, pfn_array_size, order_nr);
  29.487 -
  29.488 -                if ( 0 && debug )
  29.489 -                    DPRINTF("%d pfn= %08lx %d \n",
  29.490 -                            iter, (unsigned long)n, test_bit(n, to_send));
  29.491 -
  29.492 -                if ( !last_iter &&
  29.493 -                     test_bit(n, to_send) &&
  29.494 -                     test_bit(n, to_skip) )
  29.495 -                    skip_this_iter++; /* stats keeping */
  29.496 -
  29.497 -                if ( !((test_bit(n, to_send) && !test_bit(n, to_skip)) ||
  29.498 -                       (test_bit(n, to_send) && last_iter)) )
  29.499 -                    continue;
  29.500 -
  29.501 -                /* Skip PFNs that aren't really there */
  29.502 -                if ( (n >= 0xa0 && n < 0xc0) /* VGA hole */
  29.503 -                     || (n >= (HVM_BELOW_4G_MMIO_START >> PAGE_SHIFT) &&
  29.504 -                         n < (1ULL << 32) >> PAGE_SHIFT) /* 4G MMIO hole */ )
  29.505 -                    continue;
  29.506 -
  29.507 -                /*
  29.508 -                ** we get here if:
  29.509 -                **  1. page is marked to_send & hasn't already been re-dirtied
  29.510 -                **  2. (ignore to_skip in last iteration)
  29.511 -                */
  29.512 -
  29.513 -                pfn_batch[batch] = n;
  29.514 -
  29.515 -                batch++;
  29.516 -            }
  29.517 -
  29.518 -            if ( batch == 0 )
  29.519 -                goto skip; /* vanishingly unlikely... */
  29.520 -
  29.521 -            region_base = xc_map_foreign_batch(
  29.522 -                xc_handle, dom, PROT_READ, pfn_batch, batch);
  29.523 -            if ( region_base == 0 )
  29.524 -            {
  29.525 -                ERROR("map batch failed");
  29.526 -                goto out;
  29.527 -            }
  29.528 -
  29.529 -            /* write num of pfns */
  29.530 -            if ( !write_exact(io_fd, &batch, sizeof(unsigned int)) )
  29.531 -            {
  29.532 -                ERROR("Error when writing to state file (2)");
  29.533 -                goto out;
  29.534 -            }
  29.535 -
  29.536 -            /* write all the pfns */
  29.537 -            if ( !write_exact(io_fd, pfn_batch, sizeof(unsigned long)*batch) )
  29.538 -            {
  29.539 -                ERROR("Error when writing to state file (3)");
  29.540 -                goto out;
  29.541 -            }
  29.542 -
  29.543 -            for ( j = 0; j < batch; j++ )
  29.544 -            {
  29.545 -                if ( pfn_batch[j] & XEN_DOMCTL_PFINFO_LTAB_MASK )
  29.546 -                    continue;
  29.547 -                if ( ratewrite(io_fd, region_base + j*PAGE_SIZE,
  29.548 -                               PAGE_SIZE) != PAGE_SIZE )
  29.549 -                {
  29.550 -                    ERROR("ERROR when writing to state file (4)");
  29.551 -                    goto out;
  29.552 -                }
  29.553 -            }
  29.554 -
  29.555 -            sent_this_iter += batch;
  29.556 -
  29.557 -            munmap(region_base, batch*PAGE_SIZE);
  29.558 -
  29.559 -        } /* end of this while loop for this iteration */
  29.560 -
  29.561 -      skip:
  29.562 -
  29.563 -        total_sent += sent_this_iter;
  29.564 -
  29.565 -        DPRINTF("\r %d: sent %d, skipped %d, ",
  29.566 -                iter, sent_this_iter, skip_this_iter );
  29.567 -
  29.568 -        if ( last_iter )
  29.569 -        {
  29.570 -            print_stats( xc_handle, dom, sent_this_iter, &stats, 1);
  29.571 -            DPRINTF("Total pages sent= %ld (%.2fx)\n",
  29.572 -                    total_sent, ((float)total_sent)/pfn_array_size );
  29.573 -        }
  29.574 -
  29.575 -        if ( last_iter && debug )
  29.576 -        {
  29.577 -            int minusone = -1;
  29.578 -            memset(to_send, 0xff, BITMAP_SIZE);
  29.579 -            debug = 0;
  29.580 -            DPRINTF("Entering debug resend-all mode\n");
  29.581 -
  29.582 -            /* send "-1" to put receiver into debug mode */
  29.583 -            if ( !write_exact(io_fd, &minusone, sizeof(int)) )
  29.584 -            {
  29.585 -                ERROR("Error when writing to state file (6)");
  29.586 -                goto out;
  29.587 -            }
  29.588 -
  29.589 -            continue;
  29.590 -        }
  29.591 -
  29.592 -        if ( last_iter )
  29.593 -            break;
  29.594 -
  29.595 -        if ( live )
  29.596 -        {
  29.597 -            if ( ((sent_this_iter > sent_last_iter) && RATE_IS_MAX()) ||
  29.598 -                 (iter >= max_iters) ||
  29.599 -                 (sent_this_iter+skip_this_iter < 50) ||
  29.600 -                 (total_sent > pfn_array_size*max_factor) )
  29.601 -            {
  29.602 -                DPRINTF("Start last iteration for HVM domain\n");
  29.603 -                last_iter = 1;
  29.604 -
  29.605 -                if ( suspend_and_state(suspend, xc_handle, io_fd, dom, &info,
  29.606 -                                       &ctxt))
  29.607 -                {
  29.608 -                    ERROR("Domain appears not to have suspended");
  29.609 -                    goto out;
  29.610 -                }
  29.611 -
  29.612 -                DPRINTF("SUSPEND eip %08lx edx %08lx\n",
  29.613 -                        (unsigned long)ctxt.user_regs.eip,
  29.614 -                        (unsigned long)ctxt.user_regs.edx);
  29.615 -            }
  29.616 -
  29.617 -            if ( xc_shadow_control(xc_handle, dom, 
  29.618 -                                   XEN_DOMCTL_SHADOW_OP_CLEAN, to_send, 
  29.619 -                                   pfn_array_size, NULL, 
  29.620 -                                   0, &stats) != pfn_array_size )
  29.621 -            {
  29.622 -                ERROR("Error flushing shadow PT");
  29.623 -                goto out;
  29.624 -            }
  29.625 -
  29.626 -            /* Pull in the dirty bits from qemu too */
  29.627 -            if ( !last_iter )
  29.628 -            {
  29.629 -                qemu_active = qemu_non_active;
  29.630 -                qemu_non_active = qemu_active ? 0 : 1;
  29.631 -                qemu_flip_buffer(dom, qemu_active);
  29.632 -                for ( j = 0; j < BITMAP_SIZE / sizeof(unsigned long); j++ )
  29.633 -                {
  29.634 -                    to_send[j] |= qemu_bitmaps[qemu_non_active][j];
  29.635 -                    qemu_bitmaps[qemu_non_active][j] = 0;
  29.636 -                }
  29.637 -            }
  29.638 -            else
  29.639 -            {
  29.640 -                for ( j = 0; j < BITMAP_SIZE / sizeof(unsigned long); j++ )
  29.641 -                    to_send[j] |= qemu_bitmaps[qemu_active][j];
  29.642 -            }
  29.643 -
  29.644 -            sent_last_iter = sent_this_iter;
  29.645 -
  29.646 -            print_stats(xc_handle, dom, sent_this_iter, &stats, 1);
  29.647 -        }
  29.648 -    } /* end of while 1 */
  29.649 -
  29.650 -
  29.651 -    DPRINTF("All HVM memory is saved\n");
  29.652 -
  29.653 -    {
  29.654 -        struct {
  29.655 -            int minustwo;
  29.656 -            int max_vcpu_id;
  29.657 -            uint64_t vcpumap;
  29.658 -        } chunk = { -2, info.max_vcpu_id };
  29.659 -
  29.660 -        if (info.max_vcpu_id >= 64) {
  29.661 -            ERROR("Too many VCPUS in guest!");
  29.662 -            goto out;
  29.663 -        }
  29.664 -
  29.665 -        for (i = 1; i <= info.max_vcpu_id; i++) {
  29.666 -            xc_vcpuinfo_t vinfo;
  29.667 -            if ((xc_vcpu_getinfo(xc_handle, dom, i, &vinfo) == 0) &&
  29.668 -                vinfo.online)
  29.669 -                vcpumap |= 1ULL << i;
  29.670 -        }
  29.671 -
  29.672 -        chunk.vcpumap = vcpumap;
  29.673 -        if(!write_exact(io_fd, &chunk, sizeof(chunk))) {
  29.674 -            ERROR("Error when writing to state file (errno %d)", errno);
  29.675 -            goto out;
  29.676 -        }
  29.677 -    }
  29.678 -
  29.679 -    /* Zero terminate */
  29.680 -    i = 0;
  29.681 -    if ( !write_exact(io_fd, &i, sizeof(int)) )
  29.682 -    {
  29.683 -        ERROR("Error when writing to state file (6)");
  29.684 -        goto out;
  29.685 -    }
  29.686 -
  29.687 -    /* Save magic-page locations. */
  29.688 -    memset(magic_pfns, 0, sizeof(magic_pfns));
  29.689 -    xc_get_hvm_param(xc_handle, dom, HVM_PARAM_IOREQ_PFN,
  29.690 -                     (unsigned long *)&magic_pfns[0]);
  29.691 -    xc_get_hvm_param(xc_handle, dom, HVM_PARAM_BUFIOREQ_PFN,
  29.692 -                     (unsigned long *)&magic_pfns[1]);
  29.693 -    xc_get_hvm_param(xc_handle, dom, HVM_PARAM_STORE_PFN,
  29.694 -                     (unsigned long *)&magic_pfns[2]);
  29.695 -    if ( !write_exact(io_fd, magic_pfns, sizeof(magic_pfns)) )
  29.696 -    {
  29.697 -        ERROR("Error when writing to state file (7)");
  29.698 -        goto out;
  29.699 -    }
  29.700 -
  29.701 -    /* save vcpu/vmcs contexts */
  29.702 -    for ( i = 0; i < nr_vcpus; i++ )
  29.703 -    {
  29.704 -        if ( !(vcpumap & (1ULL << i)) )
  29.705 -            continue;
  29.706 -
  29.707 -        if ( xc_vcpu_getcontext(xc_handle, dom, i, &ctxt) )
  29.708 -        {
  29.709 -            ERROR("HVM:Could not get vcpu context");
  29.710 -            goto out;
  29.711 -        }
  29.712 -
  29.713 -        DPRINTF("write vcpu %d context.\n", i); 
  29.714 -        if ( !write_exact(io_fd, &(ctxt), sizeof(ctxt)) )
  29.715 -        {
  29.716 -            ERROR("write vcpu context failed!\n");
  29.717 -            goto out;
  29.718 -        }
  29.719 -    }
  29.720 -
  29.721 -    if ( (rec_size = xc_domain_hvm_getcontext(xc_handle, dom, hvm_buf, 
  29.722 -                                              hvm_buf_size)) == -1 )
  29.723 -    {
  29.724 -        ERROR("HVM:Could not get hvm buffer");
  29.725 -        goto out;
  29.726 -    }
  29.727 -
  29.728 -    if ( !write_exact(io_fd, &rec_size, sizeof(uint32_t)) )
  29.729 -    {
  29.730 -        ERROR("error write hvm buffer size");
  29.731 -        goto out;
  29.732 -    }
  29.733 -
  29.734 -    if ( !write_exact(io_fd, hvm_buf, rec_size) )
  29.735 -    {
  29.736 -        ERROR("write HVM info failed!\n");
  29.737 -        goto out;
  29.738 -    }
  29.739 -
  29.740 -    /* Success! */
  29.741 -    rc = 0;
  29.742 -
  29.743 - out:
  29.744 -
  29.745 -    if ( live )
  29.746 -    {
  29.747 -        if ( xc_shadow_control(xc_handle, dom, XEN_DOMCTL_SHADOW_OP_OFF,
  29.748 -                               NULL, 0, NULL, 0, NULL) < 0 )
  29.749 -            DPRINTF("Warning - couldn't disable shadow mode");
  29.750 -    }
  29.751 -
  29.752 -    free(hvm_buf);
  29.753 -    free(pfn_batch);
  29.754 -    free(to_send);
  29.755 -    free(to_skip);
  29.756 -
  29.757 -    return !!rc;
  29.758 -}
    30.1 --- a/tools/libxc/xc_linux_save.c	Thu Apr 12 16:37:32 2007 -0500
    30.2 +++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
    30.3 @@ -1,1414 +0,0 @@
    30.4 -/******************************************************************************
    30.5 - * xc_linux_save.c
    30.6 - *
    30.7 - * Save the state of a running Linux session.
    30.8 - *
    30.9 - * Copyright (c) 2003, K A Fraser.
   30.10 - */
   30.11 -
   30.12 -#include <inttypes.h>
   30.13 -#include <time.h>
   30.14 -#include <stdlib.h>
   30.15 -#include <unistd.h>
   30.16 -#include <sys/time.h>
   30.17 -
   30.18 -#include "xc_private.h"
   30.19 -#include "xc_dom.h"
   30.20 -#include "xg_private.h"
   30.21 -#include "xg_save_restore.h"
   30.22 -
   30.23 -/*
   30.24 -** Default values for important tuning parameters. Can override by passing
   30.25 -** non-zero replacement values to xc_linux_save().
   30.26 -**
   30.27 -** XXX SMH: should consider if want to be able to override MAX_MBIT_RATE too.
   30.28 -**
   30.29 -*/
   30.30 -#define DEF_MAX_ITERS   29   /* limit us to 30 times round loop   */
   30.31 -#define DEF_MAX_FACTOR   3   /* never send more than 3x p2m_size  */
   30.32 -
   30.33 -/* max mfn of the whole machine */
   30.34 -static unsigned long max_mfn;
   30.35 -
   30.36 -/* virtual starting address of the hypervisor */
   30.37 -static unsigned long hvirt_start;
   30.38 -
   30.39 -/* #levels of page tables used by the current guest */
   30.40 -static unsigned int pt_levels;
   30.41 -
   30.42 -/* number of pfns this guest has (i.e. number of entries in the P2M) */
   30.43 -static unsigned long p2m_size;
   30.44 -
   30.45 -/* Live mapping of the table mapping each PFN to its current MFN. */
   30.46 -static xen_pfn_t *live_p2m = NULL;
   30.47 -
   30.48 -/* Live mapping of system MFN to PFN table. */
   30.49 -static xen_pfn_t *live_m2p = NULL;
   30.50 -static unsigned long m2p_mfn0;
   30.51 -
   30.52 -/* grep fodder: machine_to_phys */
   30.53 -
   30.54 -#define mfn_to_pfn(_mfn) live_m2p[(_mfn)]
   30.55 -
   30.56 -/*
   30.57 - * Returns TRUE if the given machine frame number has a unique mapping
   30.58 - * in the guest's pseudophysical map.
   30.59 - */
   30.60 -#define MFN_IS_IN_PSEUDOPHYS_MAP(_mfn)          \
   30.61 -    (((_mfn) < (max_mfn)) &&                    \
   30.62 -     ((mfn_to_pfn(_mfn) < (p2m_size)) &&        \
   30.63 -      (live_p2m[mfn_to_pfn(_mfn)] == (_mfn))))
   30.64 -
   30.65 -/* Returns TRUE if MFN is successfully converted to a PFN. */
   30.66 -#define translate_mfn_to_pfn(_pmfn)                             \
   30.67 -({                                                              \
   30.68 -    unsigned long mfn = *(_pmfn);                               \
   30.69 -    int _res = 1;                                               \
   30.70 -    if ( !MFN_IS_IN_PSEUDOPHYS_MAP(mfn) )                       \
   30.71 -        _res = 0;                                               \
   30.72 -    else                                                        \
   30.73 -        *(_pmfn) = mfn_to_pfn(mfn);                             \
   30.74 -    _res;                                                       \
   30.75 -})
   30.76 -
   30.77 -/*
   30.78 -** During (live) save/migrate, we maintain a number of bitmaps to track
   30.79 -** which pages we have to send, to fixup, and to skip.
   30.80 -*/
   30.81 -
   30.82 -#define BITS_PER_LONG (sizeof(unsigned long) * 8)
   30.83 -#define BITMAP_SIZE   ((p2m_size + BITS_PER_LONG - 1) / 8)
   30.84 -
   30.85 -#define BITMAP_ENTRY(_nr,_bmap) \
   30.86 -   ((volatile unsigned long *)(_bmap))[(_nr)/BITS_PER_LONG]
   30.87 -
   30.88 -#define BITMAP_SHIFT(_nr) ((_nr) % BITS_PER_LONG)
   30.89 -
   30.90 -static inline int test_bit (int nr, volatile void * addr)
   30.91 -{
   30.92 -    return (BITMAP_ENTRY(nr, addr) >> BITMAP_SHIFT(nr)) & 1;
   30.93 -}
   30.94 -
   30.95 -static inline void clear_bit (int nr, volatile void * addr)
   30.96 -{
   30.97 -    BITMAP_ENTRY(nr, addr) &= ~(1UL << BITMAP_SHIFT(nr));
   30.98 -}
   30.99 -
  30.100 -static inline void set_bit ( int nr, volatile void * addr)
  30.101 -{
  30.102 -    BITMAP_ENTRY(nr, addr) |= (1UL << BITMAP_SHIFT(nr));
  30.103 -}
  30.104 -
  30.105 -/* Returns the hamming weight (i.e. the number of bits set) in a N-bit word */
  30.106 -static inline unsigned int hweight32(unsigned int w)
  30.107 -{
  30.108 -    unsigned int res = (w & 0x55555555) + ((w >> 1) & 0x55555555);
  30.109 -    res = (res & 0x33333333) + ((res >> 2) & 0x33333333);
  30.110 -    res = (res & 0x0F0F0F0F) + ((res >> 4) & 0x0F0F0F0F);
  30.111 -    res = (res & 0x00FF00FF) + ((res >> 8) & 0x00FF00FF);
  30.112 -    return (res & 0x0000FFFF) + ((res >> 16) & 0x0000FFFF);
  30.113 -}
  30.114 -
  30.115 -static inline int count_bits ( int nr, volatile void *addr)
  30.116 -{
  30.117 -    int i, count = 0;
  30.118 -    volatile unsigned long *p = (volatile unsigned long *)addr;
  30.119 -    /* We know that the array is padded to unsigned long. */
  30.120 -    for ( i = 0; i < (nr / (sizeof(unsigned long)*8)); i++, p++ )
  30.121 -        count += hweight32(*p);
  30.122 -    return count;
  30.123 -}
  30.124 -
  30.125 -static inline int permute( int i, int nr, int order_nr  )
  30.126 -{
  30.127 -    /* Need a simple permutation function so that we scan pages in a
  30.128 -       pseudo random order, enabling us to get a better estimate of
  30.129 -       the domain's page dirtying rate as we go (there are often
  30.130 -       contiguous ranges of pfns that have similar behaviour, and we
  30.131 -       want to mix them up. */
  30.132 -
  30.133 -    /* e.g. nr->oder 15->4 16->4 17->5 */
  30.134 -    /* 512MB domain, 128k pages, order 17 */
  30.135 -
  30.136 -    /*
  30.137 -      QPONMLKJIHGFEDCBA
  30.138 -             QPONMLKJIH
  30.139 -      GFEDCBA
  30.140 -     */
  30.141 -
  30.142 -    /*
  30.143 -      QPONMLKJIHGFEDCBA
  30.144 -                  EDCBA
  30.145 -             QPONM
  30.146 -      LKJIHGF
  30.147 -      */
  30.148 -
  30.149 -    do { i = ((i>>(order_nr-10)) | ( i<<10 ) ) & ((1<<order_nr)-1); }
  30.150 -    while ( i >= nr ); /* this won't ever loop if nr is a power of 2 */
  30.151 -
  30.152 -    return i;
  30.153 -}
  30.154 -
  30.155 -static uint64_t tv_to_us(struct timeval *new)
  30.156 -{
  30.157 -    return (new->tv_sec * 1000000) + new->tv_usec;
  30.158 -}
  30.159 -
  30.160 -static uint64_t llgettimeofday(void)
  30.161 -{
  30.162 -    struct timeval now;
  30.163 -    gettimeofday(&now, NULL);
  30.164 -    return tv_to_us(&now);
  30.165 -}
  30.166 -
  30.167 -static uint64_t tv_delta(struct timeval *new, struct timeval *old)
  30.168 -{
  30.169 -    return (((new->tv_sec - old->tv_sec)*1000000) +
  30.170 -            (new->tv_usec - old->tv_usec));
  30.171 -}
  30.172 -
  30.173 -static int noncached_write(int fd, int live, void *buffer, int len) 
  30.174 -{
  30.175 -    static int write_count = 0;
  30.176 -
  30.177 -    int rc = write(fd,buffer,len);
  30.178 -
  30.179 -    write_count += len;
  30.180 -    if ( write_count >= (MAX_PAGECACHE_USAGE * PAGE_SIZE) )
  30.181 -    {
  30.182 -        /* Time to discard cache - dont care if this fails */
  30.183 -        discard_file_cache(fd, 0 /* no flush */);
  30.184 -        write_count = 0;
  30.185 -    }
  30.186 -
  30.187 -    return rc;
  30.188 -}
  30.189 -
  30.190 -#ifdef ADAPTIVE_SAVE
  30.191 -
  30.192 -/*
  30.193 -** We control the rate at which we transmit (or save) to minimize impact
  30.194 -** on running domains (including the target if we're doing live migrate).
  30.195 -*/
  30.196 -
  30.197 -#define MAX_MBIT_RATE    500      /* maximum transmit rate for migrate */
  30.198 -#define START_MBIT_RATE  100      /* initial transmit rate for migrate */
  30.199 -
  30.200 -/* Scaling factor to convert between a rate (in Mb/s) and time (in usecs) */
  30.201 -#define RATE_TO_BTU      781250
  30.202 -
  30.203 -/* Amount in bytes we allow ourselves to send in a burst */
  30.204 -#define BURST_BUDGET (100*1024)
  30.205 -
  30.206 -/* We keep track of the current and previous transmission rate */
  30.207 -static int mbit_rate, ombit_rate = 0;
  30.208 -
  30.209 -/* Have we reached the maximum transmission rate? */
  30.210 -#define RATE_IS_MAX() (mbit_rate == MAX_MBIT_RATE)
  30.211 -
  30.212 -static inline void initialize_mbit_rate()
  30.213 -{
  30.214 -    mbit_rate = START_MBIT_RATE;
  30.215 -}
  30.216 -
  30.217 -static int ratewrite(int io_fd, int live, void *buf, int n)
  30.218 -{
  30.219 -    static int budget = 0;
  30.220 -    static int burst_time_us = -1;
  30.221 -    static struct timeval last_put = { 0 };
  30.222 -    struct timeval now;
  30.223 -    struct timespec delay;
  30.224 -    long long delta;
  30.225 -
  30.226 -    if ( START_MBIT_RATE == 0 )
  30.227 -        return noncached_write(io_fd, live, buf, n);
  30.228 -
  30.229 -    budget -= n;
  30.230 -    if ( budget < 0 )
  30.231 -    {
  30.232 -        if ( mbit_rate != ombit_rate )
  30.233 -        {
  30.234 -            burst_time_us = RATE_TO_BTU / mbit_rate;
  30.235 -            ombit_rate = mbit_rate;
  30.236 -            DPRINTF("rate limit: %d mbit/s burst budget %d slot time %d\n",
  30.237 -                    mbit_rate, BURST_BUDGET, burst_time_us);
  30.238 -        }
  30.239 -        if ( last_put.tv_sec == 0 )
  30.240 -        {
  30.241 -            budget += BURST_BUDGET;
  30.242 -            gettimeofday(&last_put, NULL);
  30.243 -        }
  30.244 -        else
  30.245 -        {
  30.246 -            while ( budget < 0 )
  30.247 -            {
  30.248 -                gettimeofday(&now, NULL);
  30.249 -                delta = tv_delta(&now, &last_put);
  30.250 -                while ( delta > burst_time_us )
  30.251 -                {
  30.252 -                    budget += BURST_BUDGET;
  30.253 -                    last_put.tv_usec += burst_time_us;
  30.254 -                    if ( last_put.tv_usec > 1000000 
  30.255 -                    {
  30.256 -                        last_put.tv_usec -= 1000000;
  30.257 -                        last_put.tv_sec++;
  30.258 -                    }
  30.259 -                    delta -= burst_time_us;
  30.260 -                }
  30.261 -                if ( budget > 0 )
  30.262 -                    break;
  30.263 -                delay.tv_sec = 0;
  30.264 -                delay.tv_nsec = 1000 * (burst_time_us - delta);
  30.265 -                while ( delay.tv_nsec > 0 )
  30.266 -                    if ( nanosleep(&delay, &delay) == 0 )
  30.267 -                        break;
  30.268 -            }
  30.269 -        }
  30.270 -    }
  30.271 -    return noncached_write(io_fd, live, buf, n);
  30.272 -}
  30.273 -
  30.274 -#else /* ! ADAPTIVE SAVE */
  30.275 -
  30.276 -#define RATE_IS_MAX() (0)
  30.277 -#define ratewrite(_io_fd, _live, _buf, _n) noncached_write((_io_fd), (_live), (_buf), (_n))
  30.278 -#define initialize_mbit_rate()
  30.279 -
  30.280 -#endif
  30.281 -
  30.282 -static inline ssize_t write_exact(int fd, void *buf, size_t count)
  30.283 -{
  30.284 -    return (write(fd, buf, count) == count);
  30.285 -}
  30.286 -
  30.287 -static int print_stats(int xc_handle, uint32_t domid, int pages_sent,
  30.288 -                       xc_shadow_op_stats_t *stats, int print)
  30.289 -{
  30.290 -    static struct timeval wall_last;
  30.291 -    static long long      d0_cpu_last;
  30.292 -    static long long      d1_cpu_last;
  30.293 -
  30.294 -    struct timeval        wall_now;
  30.295 -    long long             wall_delta;
  30.296 -    long long             d0_cpu_now, d0_cpu_delta;
  30.297 -    long long             d1_cpu_now, d1_cpu_delta;
  30.298 -
  30.299 -    gettimeofday(&wall_now, NULL);
  30.300 -
  30.301 -    d0_cpu_now = xc_domain_get_cpu_usage(xc_handle, 0, /* FIXME */ 0)/1000;
  30.302 -    d1_cpu_now = xc_domain_get_cpu_usage(xc_handle, domid, /* FIXME */ 0)/1000;
  30.303 -
  30.304 -    if ( (d0_cpu_now == -1) || (d1_cpu_now == -1) )
  30.305 -        DPRINTF("ARRHHH!!\n");
  30.306 -
  30.307 -    wall_delta = tv_delta(&wall_now,&wall_last)/1000;
  30.308 -    if ( wall_delta == 0 )
  30.309 -        wall_delta = 1;
  30.310 -
  30.311 -    d0_cpu_delta = (d0_cpu_now - d0_cpu_last)/1000;
  30.312 -    d1_cpu_delta = (d1_cpu_now - d1_cpu_last)/1000;
  30.313 -
  30.314 -    if ( print )
  30.315 -        DPRINTF("delta %lldms, dom0 %d%%, target %d%%, sent %dMb/s, "
  30.316 -                "dirtied %dMb/s %" PRId32 " pages\n",
  30.317 -                wall_delta,
  30.318 -                (int)((d0_cpu_delta*100)/wall_delta),
  30.319 -                (int)((d1_cpu_delta*100)/wall_delta),
  30.320 -                (int)((pages_sent*PAGE_SIZE)/(wall_delta*(1000/8))),
  30.321 -                (int)((stats->dirty_count*PAGE_SIZE)/(wall_delta*(1000/8))),
  30.322 -                stats->dirty_count);
  30.323 -
  30.324 -#ifdef ADAPTIVE_SAVE
  30.325 -    if ( ((stats->dirty_count*PAGE_SIZE)/(wall_delta*(1000/8))) > mbit_rate )
  30.326 -    {
  30.327 -        mbit_rate = (int)((stats->dirty_count*PAGE_SIZE)/(wall_delta*(1000/8)))
  30.328 -            + 50;
  30.329 -        if ( mbit_rate > MAX_MBIT_RATE )
  30.330 -            mbit_rate = MAX_MBIT_RATE;
  30.331 -    }
  30.332 -#endif
  30.333 -
  30.334 -    d0_cpu_last = d0_cpu_now;
  30.335 -    d1_cpu_last = d1_cpu_now;
  30.336 -    wall_last   = wall_now;
  30.337 -
  30.338 -    return 0;
  30.339 -}
  30.340 -
  30.341 -
  30.342 -static int analysis_phase(int xc_handle, uint32_t domid, int p2m_size,
  30.343 -                          unsigned long *arr, int runs)
  30.344 -{
  30.345 -    long long start, now;
  30.346 -    xc_shadow_op_stats_t stats;
  30.347 -    int j;
  30.348 -
  30.349 -    start = llgettimeofday();
  30.350 -
  30.351 -    for ( j = 0; j < runs; j++ )
  30.352 -    {
  30.353 -        int i;
  30.354 -
  30.355 -        xc_shadow_control(xc_handle, domid, XEN_DOMCTL_SHADOW_OP_CLEAN,
  30.356 -                          arr, p2m_size, NULL, 0, NULL);
  30.357 -        DPRINTF("#Flush\n");
  30.358 -        for ( i = 0; i < 40; i++ )
  30.359 -        {
  30.360 -            usleep(50000);
  30.361 -            now = llgettimeofday();
  30.362 -            xc_shadow_control(xc_handle, domid, XEN_DOMCTL_SHADOW_OP_PEEK,
  30.363 -                              NULL, 0, NULL, 0, &stats);
  30.364 -            DPRINTF("now= %lld faults= %"PRId32" dirty= %"PRId32"\n",
  30.365 -                    ((now-start)+500)/1000,
  30.366 -                    stats.fault_count, stats.dirty_count);
  30.367 -        }
  30.368 -    }
  30.369 -
  30.370 -    return -1;
  30.371 -}
  30.372 -
  30.373 -
  30.374 -static int suspend_and_state(int (*suspend)(int), int xc_handle, int io_fd,
  30.375 -                             int dom, xc_dominfo_t *info,
  30.376 -                             vcpu_guest_context_t *ctxt)
  30.377 -{
  30.378 -    int i = 0;
  30.379 -
  30.380 -    if ( !(*suspend)(dom) )
  30.381 -    {
  30.382 -        ERROR("Suspend request failed");
  30.383 -        return -1;
  30.384 -    }
  30.385 -
  30.386 - retry:
  30.387 -
  30.388 -    if ( xc_domain_getinfo(xc_handle, dom, 1, info) != 1 )
  30.389 -    {
  30.390 -        ERROR("Could not get domain info");
  30.391 -        return -1;
  30.392 -    }
  30.393 -
  30.394 -    if ( xc_vcpu_getcontext(xc_handle, dom, 0, ctxt) )
  30.395 -        ERROR("Could not get vcpu context");
  30.396 -
  30.397 -
  30.398 -    if ( info->dying )
  30.399 -    {
  30.400 -        ERROR("domain is dying");
  30.401 -        return -1;
  30.402 -    }
  30.403 -
  30.404 -    if ( info->crashed )
  30.405 -    {
  30.406 -        ERROR("domain has crashed");
  30.407 -        return -1;
  30.408 -    }
  30.409 -
  30.410 -    if ( info->shutdown )
  30.411 -    {
  30.412 -        switch ( info->shutdown_reason )
  30.413 -        {
  30.414 -        case SHUTDOWN_poweroff:
  30.415 -        case SHUTDOWN_reboot:
  30.416 -            ERROR("domain has shut down");
  30.417 -            return -1;
  30.418 -        case SHUTDOWN_suspend:
  30.419 -            return 0;
  30.420 -        case SHUTDOWN_crash:
  30.421 -            ERROR("domain has crashed");
  30.422 -            return -1;
  30.423 -        }
  30.424 -    }
  30.425 -
  30.426 -    if ( info->paused )
  30.427 -    {
  30.428 -        /* Try unpausing domain, wait, and retest. */
  30.429 -        xc_domain_unpause( xc_handle, dom );
  30.430 -        ERROR("Domain was paused. Wait and re-test.");
  30.431 -        usleep(10000); /* 10ms */
  30.432 -        goto retry;
  30.433 -    }
  30.434 -
  30.435 -    if ( ++i < 100 )
  30.436 -    {
  30.437 -        ERROR("Retry suspend domain");
  30.438 -        usleep(10000); /* 10ms */
  30.439 -        goto retry;
  30.440 -    }
  30.441 -
  30.442 -    ERROR("Unable to suspend domain.");
  30.443 -
  30.444 -    return -1;
  30.445 -}
  30.446 -
  30.447 -/*
  30.448 -** Map the top-level page of MFNs from the guest. The guest might not have
  30.449 -** finished resuming from a previous restore operation, so we wait a while for
  30.450 -** it to update the MFN to a reasonable value.
  30.451 -*/
  30.452 -static void *map_frame_list_list(int xc_handle, uint32_t dom,
  30.453 -                                 shared_info_t *shinfo)
  30.454 -{
  30.455 -    int count = 100;
  30.456 -    void *p;
  30.457 -
  30.458 -    while ( count-- && (shinfo->arch.pfn_to_mfn_frame_list_list == 0) )
  30.459 -        usleep(10000);
  30.460 -
  30.461 -    if ( shinfo->arch.pfn_to_mfn_frame_list_list == 0 )
  30.462 -    {
  30.463 -        ERROR("Timed out waiting for frame list updated.");
  30.464 -        return NULL;
  30.465 -    }
  30.466 -
  30.467 -    p = xc_map_foreign_range(xc_handle, dom, PAGE_SIZE, PROT_READ,
  30.468 -                             shinfo->arch.pfn_to_mfn_frame_list_list);
  30.469 -    if ( p == NULL )
  30.470 -        ERROR("Couldn't map p2m_frame_list_list (errno %d)", errno);
  30.471 -
  30.472 -    return p;
  30.473 -}
  30.474 -
  30.475 -/*
  30.476 -** During transfer (or in the state file), all page-table pages must be
  30.477 -** converted into a 'canonical' form where references to actual mfns
  30.478 -** are replaced with references to the corresponding pfns.
  30.479 -**
  30.480 -** This function performs the appropriate conversion, taking into account
  30.481 -** which entries do not require canonicalization (in particular, those
  30.482 -** entries which map the virtual address reserved for the hypervisor).
  30.483 -*/
  30.484 -static int canonicalize_pagetable(unsigned long type, unsigned long pfn,
  30.485 -                           const void *spage, void *dpage)
  30.486 -{
  30.487 -
  30.488 -    int i, pte_last, xen_start, xen_end, race = 0; 
  30.489 -    uint64_t pte;
  30.490 -
  30.491 -    /*
  30.492 -    ** We need to determine which entries in this page table hold
  30.493 -    ** reserved hypervisor mappings. This depends on the current
  30.494 -    ** page table type as well as the number of paging levels.
  30.495 -    */
  30.496 -    xen_start = xen_end = pte_last = PAGE_SIZE / ((pt_levels == 2) ? 4 : 8);
  30.497 -
  30.498 -    if ( (pt_levels == 2) && (type == XEN_DOMCTL_PFINFO_L2TAB) )
  30.499 -        xen_start = (hvirt_start >> L2_PAGETABLE_SHIFT);
  30.500 -
  30.501 -    if ( (pt_levels == 3) && (type == XEN_DOMCTL_PFINFO_L3TAB) )
  30.502 -        xen_start = L3_PAGETABLE_ENTRIES_PAE;
  30.503 -
  30.504 -    /*
  30.505 -    ** in PAE only the L2 mapping the top 1GB contains Xen mappings.
  30.506 -    ** We can spot this by looking for the guest linear mapping which
  30.507 -    ** Xen always ensures is present in that L2. Guests must ensure
  30.508 -    ** that this check will fail for other L2s.
  30.509 -    */
  30.510 -    if ( (pt_levels == 3) && (type == XEN_DOMCTL_PFINFO_L2TAB) )
  30.511 -    {
  30.512 -        int hstart;
  30.513 -        uint64_t he;
  30.514 -
  30.515 -        hstart = (hvirt_start >> L2_PAGETABLE_SHIFT_PAE) & 0x1ff;
  30.516 -        he = ((const uint64_t *) spage)[hstart];
  30.517 -
  30.518 -        if ( ((he >> PAGE_SHIFT) & MFN_MASK_X86) == m2p_mfn0 )
  30.519 -        {
  30.520 -            /* hvirt starts with xen stuff... */
  30.521 -            xen_start = hstart;
  30.522 -        }
  30.523 -        else if ( hvirt_start != 0xf5800000 )
  30.524 -        {
  30.525 -            /* old L2s from before hole was shrunk... */
  30.526 -            hstart = (0xf5800000 >> L2_PAGETABLE_SHIFT_PAE) & 0x1ff;
  30.527 -            he = ((const uint64_t *) spage)[hstart];
  30.528 -            if ( ((he >> PAGE_SHIFT) & MFN_MASK_X86) == m2p_mfn0 )
  30.529 -                xen_start = hstart;
  30.530 -        }
  30.531 -    }
  30.532 -
  30.533 -    if ( (pt_levels == 4) && (type == XEN_DOMCTL_PFINFO_L4TAB) )
  30.534 -    {
  30.535 -        /*
  30.536 -        ** XXX SMH: should compute these from hvirt_start (which we have)
  30.537 -        ** and hvirt_end (which we don't)
  30.538 -        */
  30.539 -        xen_start = 256;
  30.540 -        xen_end   = 272;
  30.541 -    }
  30.542 -
  30.543 -    /* Now iterate through the page table, canonicalizing each PTE */
  30.544 -    for (i = 0; i < pte_last; i++ )
  30.545 -    {
  30.546 -        unsigned long pfn, mfn;
  30.547 -
  30.548 -        if ( pt_levels == 2 )
  30.549 -            pte = ((const uint32_t*)spage)[i];
  30.550 -        else
  30.551 -            pte = ((const uint64_t*)spage)[i];
  30.552 -
  30.553 -        if ( (i >= xen_start) && (i < xen_end) )
  30.554 -            pte = 0;
  30.555 -
  30.556 -        if ( pte & _PAGE_PRESENT )
  30.557 -        {
  30.558 -            mfn = (pte >> PAGE_SHIFT) & MFN_MASK_X86;
  30.559 -            if ( !MFN_IS_IN_PSEUDOPHYS_MAP(mfn) )
  30.560 -            {
  30.561 -                /* This will happen if the type info is stale which
  30.562 -                   is quite feasible under live migration */
  30.563 -                pfn  = 0;  /* zap it - we'll retransmit this page later */
  30.564 -                race = 1;  /* inform the caller of race; fatal if !live */ 
  30.565 -            }
  30.566 -            else
  30.567 -                pfn = mfn_to_pfn(mfn);
  30.568 -
  30.569 -            pte &= ~MADDR_MASK_X86;
  30.570 -            pte |= (uint64_t)pfn << PAGE_SHIFT;
  30.571 -
  30.572 -            /*
  30.573 -             * PAE guest L3Es can contain these flags when running on
  30.574 -             * a 64bit hypervisor. We zap these here to avoid any
  30.575 -             * surprise at restore time...
  30.576 -             */
  30.577 -            if ( (pt_levels == 3) &&
  30.578 -                 (type == XEN_DOMCTL_PFINFO_L3TAB) &&
  30.579 -                 (pte & (_PAGE_USER|_PAGE_RW|_PAGE_ACCESSED)) )
  30.580 -                pte &= ~(_PAGE_USER|_PAGE_RW|_PAGE_ACCESSED);
  30.581 -        }
  30.582 -
  30.583 -        if ( pt_levels == 2 )
  30.584 -            ((uint32_t*)dpage)[i] = pte;
  30.585 -        else
  30.586 -            ((uint64_t*)dpage)[i] = pte;
  30.587 -    }
  30.588 -
  30.589 -    return race;
  30.590 -}
  30.591 -
  30.592 -static xen_pfn_t *xc_map_m2p(int xc_handle,
  30.593 -                                 unsigned long max_mfn,
  30.594 -                                 int prot)
  30.595 -{
  30.596 -    struct xen_machphys_mfn_list xmml;
  30.597 -    privcmd_mmap_entry_t *entries;
  30.598 -    unsigned long m2p_chunks, m2p_size;
  30.599 -    xen_pfn_t *m2p;
  30.600 -    xen_pfn_t *extent_start;
  30.601 -    int i, rc;
  30.602 -
  30.603 -    m2p_size   = M2P_SIZE(max_mfn);
  30.604 -    m2p_chunks = M2P_CHUNKS(max_mfn);
  30.605 -
  30.606 -    xmml.max_extents = m2p_chunks;
  30.607 -    if ( !(extent_start = malloc(m2p_chunks * sizeof(xen_pfn_t))) )
  30.608 -    {
  30.609 -        ERROR("failed to allocate space for m2p mfns");
  30.610 -        return NULL;
  30.611 -    }
  30.612 -    set_xen_guest_handle(xmml.extent_start, extent_start);
  30.613 -
  30.614 -    if ( xc_memory_op(xc_handle, XENMEM_machphys_mfn_list, &xmml) ||
  30.615 -         (xmml.nr_extents != m2p_chunks) )
  30.616 -    {
  30.617 -        ERROR("xc_get_m2p_mfns");
  30.618 -        return NULL;
  30.619 -    }
  30.620 -
  30.621 -    if ( (m2p = mmap(NULL, m2p_size, prot,
  30.622 -                     MAP_SHARED, xc_handle, 0)) == MAP_FAILED )
  30.623 -    {
  30.624 -        ERROR("failed to mmap m2p");
  30.625 -        return NULL;
  30.626 -    }
  30.627 -
  30.628 -    if ( !(entries = malloc(m2p_chunks * sizeof(privcmd_mmap_entry_t))) )
  30.629 -    {
  30.630 -        ERROR("failed to allocate space for mmap entries");
  30.631 -        return NULL;
  30.632 -    }
  30.633 -
  30.634 -    for ( i = 0; i < m2p_chunks; i++ )
  30.635 -    {
  30.636 -        entries[i].va = (unsigned long)(((void *)m2p) + (i * M2P_CHUNK_SIZE));
  30.637 -        entries[i].mfn = extent_start[i];
  30.638 -        entries[i].npages = M2P_CHUNK_SIZE >> PAGE_SHIFT;
  30.639 -    }
  30.640 -
  30.641 -    if ( (rc = xc_map_foreign_ranges(xc_handle, DOMID_XEN,
  30.642 -                                     entries, m2p_chunks)) < 0 )
  30.643 -    {
  30.644 -        ERROR("xc_mmap_foreign_ranges failed (rc = %d)", rc);
  30.645 -        return NULL;
  30.646 -    }
  30.647 -
  30.648 -    m2p_mfn0 = entries[0].mfn;
  30.649 -
  30.650 -    free(extent_start);
  30.651 -    free(entries);
  30.652 -
  30.653 -    return m2p;
  30.654 -}
  30.655 -
  30.656 -int xc_linux_save(int xc_handle, int io_fd, uint32_t dom, uint32_t max_iters,
  30.657 -                  uint32_t max_factor, uint32_t flags, int (*suspend)(int))
  30.658 -{
  30.659 -    xc_dominfo_t info;
  30.660 -
  30.661 -    int rc = 1, i, j, last_iter, iter = 0;
  30.662 -    int live  = (flags & XCFLAGS_LIVE);
  30.663 -    int debug = (flags & XCFLAGS_DEBUG);
  30.664 -    int race = 0, sent_last_iter, skip_this_iter;
  30.665 -
  30.666 -    /* The new domain's shared-info frame number. */
  30.667 -    unsigned long shared_info_frame;
  30.668 -
  30.669 -    /* A copy of the CPU context of the guest. */
  30.670 -    vcpu_guest_context_t ctxt;
  30.671 -
  30.672 -    /* A table containg the type of each PFN (/not/ MFN!). */
  30.673 -    unsigned long *pfn_type = NULL;
  30.674 -    unsigned long *pfn_batch = NULL;
  30.675 -
  30.676 -    /* A temporary mapping, and a copy, of one frame of guest memory. */
  30.677 -    char page[PAGE_SIZE];
  30.678 -
  30.679 -    /* Double and single indirect references to the live P2M table */
  30.680 -    xen_pfn_t *live_p2m_frame_list_list = NULL;
  30.681 -    xen_pfn_t *live_p2m_frame_list = NULL;
  30.682 -
  30.683 -    /* A copy of the pfn-to-mfn table frame list. */
  30.684 -    xen_pfn_t *p2m_frame_list = NULL;
  30.685 -
  30.686 -    /* Live mapping of shared info structure */
  30.687 -    shared_info_t *live_shinfo = NULL;
  30.688 -
  30.689 -    /* base of the region in which domain memory is mapped */
  30.690 -    unsigned char *region_base = NULL;
  30.691 -
  30.692 -    /* power of 2 order of p2m_size */
  30.693 -    int order_nr;
  30.694 -
  30.695 -    /* bitmap of pages:
  30.696 -       - that should be sent this iteration (unless later marked as skip);
  30.697 -       - to skip this iteration because already dirty;
  30.698 -       - to fixup by sending at the end if not already resent; */
  30.699 -    unsigned long *to_send = NULL, *to_skip = NULL, *to_fix = NULL;
  30.700 -
  30.701 -    xc_shadow_op_stats_t stats;
  30.702 -
  30.703 -    unsigned long needed_to_fix = 0;
  30.704 -    unsigned long total_sent    = 0;
  30.705 -
  30.706 -    uint64_t vcpumap = 1ULL;
  30.707 -
  30.708 -    /* If no explicit control parameters given, use defaults */
  30.709 -    max_iters  = max_iters  ? : DEF_MAX_ITERS;
  30.710 -    max_factor = max_factor ? : DEF_MAX_FACTOR;
  30.711 -
  30.712 -    initialize_mbit_rate();
  30.713 -
  30.714 -    if ( !get_platform_info(xc_handle, dom,
  30.715 -                            &max_mfn, &hvirt_start, &pt_levels) )
  30.716 -    {
  30.717 -        ERROR("Unable to get platform info.");
  30.718 -        return 1;
  30.719 -    }
  30.720 -
  30.721 -    if ( xc_domain_getinfo(xc_handle, dom, 1, &info) != 1 )
  30.722 -    {
  30.723 -        ERROR("Could not get domain info");
  30.724 -        return 1;
  30.725 -    }
  30.726 -
  30.727 -    if ( xc_vcpu_getcontext(xc_handle, dom, 0, &ctxt) )
  30.728 -    {
  30.729 -        ERROR("Could not get vcpu context");
  30.730 -        goto out;
  30.731 -    }
  30.732 -    shared_info_frame = info.shared_info_frame;
  30.733 -
  30.734 -    /* Map the shared info frame */
  30.735 -    if ( !(live_shinfo = xc_map_foreign_range(xc_handle, dom, PAGE_SIZE,
  30.736 -                                              PROT_READ, shared_info_frame)) )
  30.737 -    {
  30.738 -        ERROR("Couldn't map live_shinfo");
  30.739 -        goto out;
  30.740 -    }
  30.741 -
  30.742 -    p2m_size = live_shinfo->arch.max_pfn;
  30.743 -
  30.744 -    live_p2m_frame_list_list = map_frame_list_list(xc_handle, dom,
  30.745 -                                                   live_shinfo);
  30.746 -    if ( !live_p2m_frame_list_list )
  30.747 -        goto out;
  30.748 -
  30.749 -    live_p2m_frame_list =
  30.750 -        xc_map_foreign_batch(xc_handle, dom, PROT_READ,
  30.751 -                             live_p2m_frame_list_list,
  30.752 -                             P2M_FLL_ENTRIES);
  30.753 -    if ( !live_p2m_frame_list )
  30.754 -    {
  30.755 -        ERROR("Couldn't map p2m_frame_list");
  30.756 -        goto out;
  30.757 -    }
  30.758 -
  30.759 -    /* Map all the frames of the pfn->mfn table. For migrate to succeed,
  30.760 -       the guest must not change which frames are used for this purpose.
  30.761 -       (its not clear why it would want to change them, and we'll be OK
  30.762 -       from a safety POV anyhow. */
  30.763 -
  30.764 -    live_p2m = xc_map_foreign_batch(xc_handle, dom, PROT_READ,
  30.765 -                                    live_p2m_frame_list,
  30.766 -                                    P2M_FL_ENTRIES);
  30.767 -    if ( !live_p2m )
  30.768 -    {
  30.769 -        ERROR("Couldn't map p2m table");
  30.770 -        goto out;
  30.771 -    }
  30.772 -
  30.773 -    /* Setup the mfn_to_pfn table mapping */
  30.774 -    if ( !(live_m2p = xc_map_m2p(xc_handle, max_mfn, PROT_READ)) )
  30.775 -    {
  30.776 -        ERROR("Failed to map live M2P table");
  30.777 -        goto out;
  30.778 -    }
  30.779 -
  30.780 -
  30.781 -    /* Get a local copy of the live_P2M_frame_list */
  30.782 -    if ( !(p2m_frame_list = malloc(P2M_FL_SIZE)) )
  30.783 -    {
  30.784 -        ERROR("Couldn't allocate p2m_frame_list array");
  30.785 -        goto out;
  30.786 -    }
  30.787 -    memcpy(p2m_frame_list, live_p2m_frame_list, P2M_FL_SIZE);
  30.788 -
  30.789 -    /* Canonicalise the pfn-to-mfn table frame-number list. */
  30.790 -    for ( i = 0; i < p2m_size; i += fpp )
  30.791 -    {
  30.792 -        if ( !translate_mfn_to_pfn(&p2m_frame_list[i/fpp]) )
  30.793 -        {
  30.794 -            ERROR("Frame# in pfn-to-mfn frame list is not in pseudophys");
  30.795 -            ERROR("entry %d: p2m_frame_list[%ld] is 0x%"PRIx64, i, i/fpp,
  30.796 -                  (uint64_t)p2m_frame_list[i/fpp]);
  30.797 -            goto out;
  30.798 -        }
  30.799 -    }
  30.800 -
  30.801 -    /* Domain is still running at this point */
  30.802 -    if ( live )
  30.803 -    {
  30.804 -        /* Live suspend. Enable log-dirty mode. */
  30.805 -        if ( xc_shadow_control(xc_handle, dom,
  30.806 -                               XEN_DOMCTL_SHADOW_OP_ENABLE_LOGDIRTY,
  30.807 -                               NULL, 0, NULL, 0, NULL) < 0 )
  30.808 -        {
  30.809 -            ERROR("Couldn't enable shadow mode");
  30.810 -            goto out;
  30.811 -        }
  30.812 -    }
  30.813 -    else
  30.814 -    {
  30.815 -        /* This is a non-live suspend. Suspend the domain .*/
  30.816 -        if ( suspend_and_state(suspend, xc_handle, io_fd, dom, &info, &ctxt) )
  30.817 -        {
  30.818 -            ERROR("Domain appears not to have suspended");
  30.819 -            goto out;
  30.820 -        }
  30.821 -    }
  30.822 -
  30.823 -    last_iter = !live;
  30.824 -
  30.825 -    /* pretend we sent all the pages last iteration */
  30.826 -    sent_last_iter = p2m_size;
  30.827 -
  30.828 -    /* calculate the power of 2 order of p2m_size, e.g.
  30.829 -       15->4 16->4 17->5 */
  30.830 -    for ( i = p2m_size-1, order_nr = 0; i ; i >>= 1, order_nr++ )
  30.831 -        continue;
  30.832 -
  30.833 -    /* Setup to_send / to_fix and to_skip bitmaps */
  30.834 -    to_send = malloc(BITMAP_SIZE);
  30.835 -    to_fix  = calloc(1, BITMAP_SIZE);
  30.836 -    to_skip = malloc(BITMAP_SIZE);
  30.837 -
  30.838 -    if ( !to_send || !to_fix || !to_skip )
  30.839 -    {
  30.840 -        ERROR("Couldn't allocate to_send array");
  30.841 -        goto out;
  30.842 -    }
  30.843 -
  30.844 -    memset(to_send, 0xff, BITMAP_SIZE);
  30.845 -
  30.846 -    if ( lock_pages(to_send, BITMAP_SIZE) )
  30.847 -    {
  30.848 -        ERROR("Unable to lock to_send");
  30.849 -        return 1;
  30.850 -    }
  30.851 -
  30.852 -    /* (to fix is local only) */
  30.853 -    if ( lock_pages(to_skip, BITMAP_SIZE) )
  30.854 -    {
  30.855 -        ERROR("Unable to lock to_skip");
  30.856 -        return 1;
  30.857 -    }
  30.858 -
  30.859 -    analysis_phase(xc_handle, dom, p2m_size, to_skip, 0);
  30.860 -
  30.861 -    /* We want zeroed memory so use calloc rather than malloc. */
  30.862 -    pfn_type   = calloc(MAX_BATCH_SIZE, sizeof(*pfn_type));
  30.863 -    pfn_batch  = calloc(MAX_BATCH_SIZE, sizeof(*pfn_batch));
  30.864 -    if ( (pfn_type == NULL) || (pfn_batch == NULL) )
  30.865 -    {
  30.866 -        ERROR("failed to alloc memory for pfn_type and/or pfn_batch arrays");
  30.867 -        errno = ENOMEM;
  30.868 -        goto out;
  30.869 -    }
  30.870 -
  30.871 -    if ( lock_pages(pfn_type, MAX_BATCH_SIZE * sizeof(*pfn_type)) )
  30.872 -    {
  30.873 -        ERROR("Unable to lock");
  30.874 -        goto out;
  30.875 -    }
  30.876 -
  30.877 -    /*
  30.878 -     * Quick belt and braces sanity check.
  30.879 -     */
  30.880 -    {
  30.881 -        int err=0;
  30.882 -        unsigned long mfn;
  30.883 -        for ( i = 0; i < p2m_size; i++ )
  30.884 -        {
  30.885 -            mfn = live_p2m[i];
  30.886 -            if( (mfn != INVALID_P2M_ENTRY) && (mfn_to_pfn(mfn) != i) )
  30.887 -            {
  30.888 -                DPRINTF("i=0x%x mfn=%lx live_m2p=%lx\n", i,
  30.889 -                        mfn, mfn_to_pfn(mfn));
  30.890 -                err++;
  30.891 -            }
  30.892 -        }
  30.893 -        DPRINTF("Had %d unexplained entries in p2m table\n", err);
  30.894 -    }
  30.895 -
  30.896 -    /* Start writing out the saved-domain record. */
  30.897 -    if ( !write_exact(io_fd, &p2m_size, sizeof(unsigned long)) )
  30.898 -    {
  30.899 -        ERROR("write: p2m_size");
  30.900 -        goto out;
  30.901 -    }
  30.902 -
  30.903 -    /*
  30.904 -     * Write an extended-info structure to inform the restore code that
  30.905 -     * a PAE guest understands extended CR3 (PDPTs above 4GB). Turns off
  30.906 -     * slow paths in the restore code.
  30.907 -     */
  30.908 -    if ( (pt_levels == 3) &&
  30.909 -         (ctxt.vm_assist & (1UL << VMASST_TYPE_pae_extended_cr3)) )
  30.910 -    {
  30.911 -        unsigned long signature = ~0UL;
  30.912 -        uint32_t tot_sz   = sizeof(struct vcpu_guest_context) + 8;
  30.913 -        uint32_t chunk_sz = sizeof(struct vcpu_guest_context);
  30.914 -        char chunk_sig[]  = "vcpu";
  30.915 -        if ( !write_exact(io_fd, &signature, sizeof(signature)) ||
  30.916 -             !write_exact(io_fd, &tot_sz,    sizeof(tot_sz)) ||
  30.917 -             !write_exact(io_fd, &chunk_sig, 4) ||
  30.918 -             !write_exact(io_fd, &chunk_sz,  sizeof(chunk_sz)) ||
  30.919 -             !write_exact(io_fd, &ctxt,      sizeof(ctxt)) )
  30.920 -        {
  30.921 -            ERROR("write: extended info");
  30.922 -            goto out;
  30.923 -        }
  30.924 -    }
  30.925 -
  30.926 -    if ( !write_exact(io_fd, p2m_frame_list, P2M_FL_SIZE) )
  30.927 -    {
  30.928 -        ERROR("write: p2m_frame_list");
  30.929 -        goto out;
  30.930 -    }
  30.931 -
  30.932 -    print_stats(xc_handle, dom, 0, &stats, 0);
  30.933 -
  30.934 -    /* Now write out each data page, canonicalising page tables as we go... */
  30.935 -    for ( ; ; )
  30.936 -    {
  30.937 -        unsigned int prev_pc, sent_this_iter, N, batch;
  30.938 -
  30.939 -        iter++;
  30.940 -        sent_this_iter = 0;
  30.941 -        skip_this_iter = 0;
  30.942 -        prev_pc = 0;
  30.943 -        N = 0;
  30.944 -
  30.945 -        DPRINTF("Saving memory pages: iter %d   0%%", iter);
  30.946 -
  30.947 -        while ( N < p2m_size )
  30.948 -        {
  30.949 -            unsigned int this_pc = (N * 100) / p2m_size;
  30.950 -            int rc;
  30.951 -
  30.952 -            if ( (this_pc - prev_pc) >= 5 )
  30.953 -            {
  30.954 -                DPRINTF("\b\b\b\b%3d%%", this_pc);
  30.955 -                prev_pc = this_pc;
  30.956 -            }
  30.957 -
  30.958 -            if ( !last_iter )
  30.959 -            {
  30.960 -                /* Slightly wasteful to peek the whole array evey time,
  30.961 -                   but this is fast enough for the moment. */
  30.962 -                rc = xc_shadow_control(
  30.963 -                    xc_handle, dom, XEN_DOMCTL_SHADOW_OP_PEEK, to_skip, 
  30.964 -                    p2m_size, NULL, 0, NULL);
  30.965 -                if ( rc != p2m_size )
  30.966 -                {
  30.967 -                    ERROR("Error peeking shadow bitmap");
  30.968 -                    goto out;
  30.969 -                }
  30.970 -            }
  30.971 -
  30.972 -            /* load pfn_type[] with the mfn of all the pages we're doing in
  30.973 -               this batch. */
  30.974 -            for  ( batch = 0;
  30.975 -                   (batch < MAX_BATCH_SIZE) && (N < p2m_size);
  30.976 -                   N++ )
  30.977 -            {
  30.978 -                int n = permute(N, p2m_size, order_nr);
  30.979 -
  30.980 -                if ( debug )
  30.981 -                    DPRINTF("%d pfn= %08lx mfn= %08lx %d  [mfn]= %08lx\n",
  30.982 -                            iter, (unsigned long)n, live_p2m[n],
  30.983 -                            test_bit(n, to_send),
  30.984 -                            mfn_to_pfn(live_p2m[n]&0xFFFFF));
  30.985 -
  30.986 -                if ( !last_iter &&
  30.987 -                     test_bit(n, to_send) &&
  30.988 -                     test_bit(n, to_skip) )
  30.989 -                    skip_this_iter++; /* stats keeping */
  30.990 -
  30.991 -                if ( !((test_bit(n, to_send) && !test_bit(n, to_skip)) ||
  30.992 -                       (test_bit(n, to_send) && last_iter) ||
  30.993 -                       (test_bit(n, to_fix)  && last_iter)) )
  30.994 -                    continue;
  30.995 -
  30.996 -                /*
  30.997 -                ** we get here if:
  30.998 -                **  1. page is marked to_send & hasn't already been re-dirtied
  30.999 -                **  2. (ignore to_skip in last iteration)
 30.1000 -                **  3. add in pages that still need fixup (net bufs)
 30.1001 -                */
 30.1002 -
 30.1003 -                pfn_batch[batch] = n;
 30.1004 -                pfn_type[batch]  = live_p2m[n];
 30.1005 -
 30.1006 -                if ( !is_mapped(pfn_type[batch]) )
 30.1007 -                {
 30.1008 -                    /*
 30.1009 -                    ** not currently in psuedo-physical map -- set bit
 30.1010 -                    ** in to_fix since we must send this page in last_iter
 30.1011 -                    ** unless its sent sooner anyhow, or it never enters
 30.1012 -                    ** pseudo-physical map (e.g. for ballooned down domains)
 30.1013 -                    */
 30.1014 -                    set_bit(n, to_fix);
 30.1015 -                    continue;
 30.1016 -                }
 30.1017 -
 30.1018 -                if ( last_iter &&
 30.1019 -                     test_bit(n, to_fix) &&
 30.1020 -                     !test_bit(n, to_send) )
 30.1021 -                {
 30.1022 -                    needed_to_fix++;
 30.1023 -                    DPRINTF("Fix! iter %d, pfn %x. mfn %lx\n",
 30.1024 -                            iter, n, pfn_type[batch]);
 30.1025 -                }
 30.1026 -
 30.1027 -                clear_bit(n, to_fix);
 30.1028 -
 30.1029 -                batch++;
 30.1030 -            }
 30.1031 -
 30.1032 -            if ( batch == 0 )
 30.1033 -                goto skip; /* vanishingly unlikely... */
 30.1034 -
 30.1035 -            region_base = xc_map_foreign_batch(
 30.1036 -                xc_handle, dom, PROT_READ, pfn_type, batch);
 30.1037 -            if ( region_base == NULL )
 30.1038 -            {
 30.1039 -                ERROR("map batch failed");
 30.1040 -                goto out;
 30.1041 -            }
 30.1042 -
 30.1043 -            for ( j = 0; j < batch; j++ )
 30.1044 -                ((uint32_t *)pfn_type)[j] = pfn_type[j];
 30.1045 -            if ( xc_get_pfn_type_batch(xc_handle, dom, batch,
 30.1046 -                                       (uint32_t *)pfn_type) )
 30.1047 -            {
 30.1048 -                ERROR("get_pfn_type_batch failed");
 30.1049 -                goto out;
 30.1050 -            }
 30.1051 -            for ( j = batch-1; j >= 0; j-- )
 30.1052 -                pfn_type[j] = ((uint32_t *)pfn_type)[j];
 30.1053 -
 30.1054 -            for ( j = 0; j < batch; j++ )
 30.1055 -            {
 30.1056 -
 30.1057 -                if ( (pfn_type[j] & XEN_DOMCTL_PFINFO_LTAB_MASK) ==
 30.1058 -                     XEN_DOMCTL_PFINFO_XTAB )
 30.1059 -                {
 30.1060 -                    DPRINTF("type fail: page %i mfn %08lx\n", j, pfn_type[j]);
 30.1061 -                    continue;
 30.1062 -                }
 30.1063 -
 30.1064 -                if ( debug )
 30.1065 -                    DPRINTF("%d pfn= %08lx mfn= %08lx [mfn]= %08lx"
 30.1066 -                            " sum= %08lx\n",
 30.1067 -                            iter,
 30.1068 -                            (pfn_type[j] & XEN_DOMCTL_PFINFO_LTAB_MASK) |
 30.1069 -                            pfn_batch[j],
 30.1070 -                            pfn_type[j],
 30.1071 -                            mfn_to_pfn(pfn_type[j] &
 30.1072 -                                       ~XEN_DOMCTL_PFINFO_LTAB_MASK),
 30.1073 -                            csum_page(region_base + (PAGE_SIZE*j)));
 30.1074 -
 30.1075 -                /* canonicalise mfn->pfn */
 30.1076 -                pfn_type[j] = (pfn_type[j] & XEN_DOMCTL_PFINFO_LTAB_MASK) |
 30.1077 -                    pfn_batch[j];
 30.1078 -            }
 30.1079 -
 30.1080 -            if ( !write_exact(io_fd, &batch, sizeof(unsigned int)) )
 30.1081 -            {
 30.1082 -                ERROR("Error when writing to state file (2) (errno %d)",
 30.1083 -                      errno);
 30.1084 -                goto out;
 30.1085 -            }
 30.1086 -
 30.1087 -            if ( !write_exact(io_fd, pfn_type, sizeof(unsigned long)*j) )
 30.1088 -            {
 30.1089 -                ERROR("Error when writing to state file (3) (errno %d)",
 30.1090 -                      errno);
 30.1091 -                goto out;
 30.1092 -            }
 30.1093 -
 30.1094 -            /* entering this loop, pfn_type is now in pfns (Not mfns) */
 30.1095 -            for ( j = 0; j < batch; j++ )
 30.1096 -            {
 30.1097 -                unsigned long pfn, pagetype;
 30.1098 -                void *spage = (char *)region_base + (PAGE_SIZE*j);
 30.1099 -
 30.1100 -                pfn      = pfn_type[j] & ~XEN_DOMCTL_PFINFO_LTAB_MASK;
 30.1101 -                pagetype = pfn_type[j] &  XEN_DOMCTL_PFINFO_LTAB_MASK;
 30.1102 -
 30.1103 -                /* write out pages in batch */
 30.1104 -                if ( pagetype == XEN_DOMCTL_PFINFO_XTAB )
 30.1105 -                    continue;
 30.1106 -
 30.1107 -                pagetype &= XEN_DOMCTL_PFINFO_LTABTYPE_MASK;
 30.1108 -
 30.1109 -                if ( (pagetype >= XEN_DOMCTL_PFINFO_L1TAB) &&
 30.1110 -                     (pagetype <= XEN_DOMCTL_PFINFO_L4TAB) )
 30.1111 -                {
 30.1112 -                    /* We have a pagetable page: need to rewrite it. */
 30.1113 -                    race = 
 30.1114 -                        canonicalize_pagetable(pagetype, pfn, spage, page); 
 30.1115 -
 30.1116 -                    if ( race && !live )
 30.1117 -                    {
 30.1118 -                        ERROR("Fatal PT race (pfn %lx, type %08lx)", pfn,
 30.1119 -                              pagetype);
 30.1120 -                        goto out;
 30.1121 -                    }
 30.1122 -
 30.1123 -                    if ( ratewrite(io_fd, live, page, PAGE_SIZE) != PAGE_SIZE )
 30.1124 -                    {
 30.1125 -                        ERROR("Error when writing to state file (4)"
 30.1126 -                              " (errno %d)", errno);
 30.1127 -                        goto out;
 30.1128 -                    }
 30.1129 -                }
 30.1130 -                else
 30.1131 -                {
 30.1132 -                    /* We have a normal page: just write it directly. */
 30.1133 -                    if ( ratewrite(io_fd, live, spage, PAGE_SIZE) !=
 30.1134 -                         PAGE_SIZE )
 30.1135 -                    {
 30.1136 -                        ERROR("Error when writing to state file (5)"
 30.1137 -                              " (errno %d)", errno);
 30.1138 -                        goto out;
 30.1139 -                    }
 30.1140 -                }
 30.1141 -            } /* end of the write out for this batch */
 30.1142 -
 30.1143 -            sent_this_iter += batch;
 30.1144 -
 30.1145 -            munmap(region_base, batch*PAGE_SIZE);
 30.1146 -
 30.1147 -        } /* end of this while loop for this iteration */
 30.1148 -
 30.1149 -      skip:
 30.1150 -
 30.1151 -        total_sent += sent_this_iter;
 30.1152 -
 30.1153 -        DPRINTF("\r %d: sent %d, skipped %d, ",
 30.1154 -                iter, sent_this_iter, skip_this_iter );
 30.1155 -
 30.1156 -        if ( last_iter )
 30.1157 -        {
 30.1158 -            print_stats( xc_handle, dom, sent_this_iter, &stats, 1);
 30.1159 -
 30.1160 -            DPRINTF("Total pages sent= %ld (%.2fx)\n",
 30.1161 -                    total_sent, ((float)total_sent)/p2m_size );
 30.1162 -            DPRINTF("(of which %ld were fixups)\n", needed_to_fix  );
 30.1163 -        }
 30.1164 -
 30.1165 -        if ( last_iter && debug )
 30.1166 -        {
 30.1167 -            int minusone = -1;
 30.1168 -            memset(to_send, 0xff, BITMAP_SIZE);
 30.1169 -            debug = 0;
 30.1170 -            DPRINTF("Entering debug resend-all mode\n");
 30.1171 -
 30.1172 -            /* send "-1" to put receiver into debug mode */
 30.1173 -            if ( !write_exact(io_fd, &minusone, sizeof(int)) )
 30.1174 -            {
 30.1175 -                ERROR("Error when writing to state file (6) (errno %d)",
 30.1176 -                      errno);
 30.1177 -                goto out;
 30.1178 -            }
 30.1179 -
 30.1180 -            continue;
 30.1181 -        }
 30.1182 -
 30.1183 -        if ( last_iter )
 30.1184 -            break;
 30.1185 -
 30.1186 -        if ( live )
 30.1187 -        {
 30.1188 -            if ( ((sent_this_iter > sent_last_iter) && RATE_IS_MAX()) ||
 30.1189 -                 (iter >= max_iters) ||
 30.1190 -                 (sent_this_iter+skip_this_iter < 50) ||
 30.1191 -                 (total_sent > p2m_size*max_factor) )
 30.1192 -            {
 30.1193 -                DPRINTF("Start last iteration\n");
 30.1194 -                last_iter = 1;
 30.1195 -
 30.1196 -                if ( suspend_and_state(suspend, xc_handle, io_fd, dom, &info,
 30.1197 -                                       &ctxt) )
 30.1198 -                {
 30.1199 -                    ERROR("Domain appears not to have suspended");
 30.1200 -                    goto out;
 30.1201 -                }
 30.1202 -
 30.1203 -                DPRINTF("SUSPEND shinfo %08lx eip %08lx edx %08lx\n",
 30.1204 -                        info.shared_info_frame,
 30.1205 -                        (unsigned long)ctxt.user_regs.eip,
 30.1206 -                        (unsigned long)ctxt.user_regs.edx);
 30.1207 -            }
 30.1208 -
 30.1209 -            if ( xc_shadow_control(xc_handle, dom, 
 30.1210 -                                   XEN_DOMCTL_SHADOW_OP_CLEAN, to_send, 
 30.1211 -                                   p2m_size, NULL, 0, &stats) != p2m_size )
 30.1212 -            {
 30.1213 -                ERROR("Error flushing shadow PT");
 30.1214 -                goto out;
 30.1215 -            }
 30.1216 -
 30.1217 -            sent_last_iter = sent_this_iter;
 30.1218 -
 30.1219 -            print_stats(xc_handle, dom, sent_this_iter, &stats, 1);
 30.1220 -
 30.1221 -        }
 30.1222 -    } /* end of infinite for loop */
 30.1223 -
 30.1224 -    DPRINTF("All memory is saved\n");
 30.1225 -
 30.1226 -    {
 30.1227 -        struct {
 30.1228 -            int minustwo;
 30.1229 -            int max_vcpu_id;
 30.1230 -            uint64_t vcpumap;
 30.1231 -        } chunk = { -2, info.max_vcpu_id };
 30.1232 -
 30.1233 -        if ( info.max_vcpu_id >= 64 )
 30.1234 -        {
 30.1235 -            ERROR("Too many VCPUS in guest!");
 30.1236 -            goto out;
 30.1237 -        }
 30.1238 -
 30.1239 -        for ( i = 1; i <= info.max_vcpu_id; i++ )
 30.1240 -        {
 30.1241 -            xc_vcpuinfo_t vinfo;
 30.1242 -            if ( (xc_vcpu_getinfo(xc_handle, dom, i, &vinfo) == 0) &&
 30.1243 -                 vinfo.online )
 30.1244 -                vcpumap |= 1ULL << i;
 30.1245 -        }
 30.1246 -
 30.1247 -        chunk.vcpumap = vcpumap;
 30.1248 -        if ( !write_exact(io_fd, &chunk, sizeof(chunk)) )
 30.1249 -        {
 30.1250 -            ERROR("Error when writing to state file (errno %d)", errno);
 30.1251 -            goto out;
 30.1252 -        }
 30.1253 -    }
 30.1254 -
 30.1255 -    /* Zero terminate */
 30.1256 -    i = 0;
 30.1257 -    if ( !write_exact(io_fd, &i, sizeof(int)) )
 30.1258 -    {
 30.1259 -        ERROR("Error when writing to state file (6') (errno %d)", errno);
 30.1260 -        goto out;
 30.1261 -    }
 30.1262 -
 30.1263 -    /* Send through a list of all the PFNs that were not in map at the close */
 30.1264 -    {
 30.1265 -        unsigned int i,j;
 30.1266 -        unsigned long pfntab[1024];
 30.1267 -
 30.1268 -        for ( i = 0, j = 0; i < p2m_size; i++ )
 30.1269 -        {
 30.1270 -            if ( !is_mapped(live_p2m[i]) )
 30.1271 -                j++;
 30.1272 -        }
 30.1273 -
 30.1274 -        if ( !write_exact(io_fd, &j, sizeof(unsigned int)) )
 30.1275 -        {
 30.1276 -            ERROR("Error when writing to state file (6a) (errno %d)", errno);
 30.1277 -            goto out;
 30.1278 -        }
 30.1279 -
 30.1280 -        for ( i = 0, j = 0; i < p2m_size; )
 30.1281 -        {
 30.1282 -            if ( !is_mapped(live_p2m[i]) )
 30.1283 -                pfntab[j++] = i;
 30.1284 -
 30.1285 -            i++;
 30.1286 -            if ( (j == 1024) || (i == p2m_size) )
 30.1287 -            {
 30.1288 -                if ( !write_exact(io_fd, &pfntab, sizeof(unsigned long)*j) )
 30.1289 -                {
 30.1290 -                    ERROR("Error when writing to state file (6b) (errno %d)",
 30.1291 -                          errno);
 30.1292 -                    goto out;
 30.1293 -                }
 30.1294 -                j = 0;
 30.1295 -            }
 30.1296 -        }
 30.1297 -    }
 30.1298 -
 30.1299 -    /* Canonicalise the suspend-record frame number. */
 30.1300 -    if ( !translate_mfn_to_pfn(&ctxt.user_regs.edx) )
 30.1301 -    {
 30.1302 -        ERROR("Suspend record is not in range of pseudophys map");
 30.1303 -        goto out;
 30.1304 -    }
 30.1305 -
 30.1306 -    for ( i = 0; i <= info.max_vcpu_id; i++ )
 30.1307 -    {
 30.1308 -        if ( !(vcpumap & (1ULL << i)) )
 30.1309 -            continue;
 30.1310 -
 30.1311 -        if ( (i != 0) && xc_vcpu_getcontext(xc_handle, dom, i, &ctxt) )
 30.1312 -        {
 30.1313 -            ERROR("No context for VCPU%d", i);
 30.1314 -            goto out;
 30.1315 -        }
 30.1316 -
 30.1317 -        /* Canonicalise each GDT frame number. */
 30.1318 -        for ( j = 0; (512*j) < ctxt.gdt_ents; j++ )
 30.1319 -        {
 30.1320 -            if ( !translate_mfn_to_pfn(&ctxt.gdt_frames[j]) )
 30.1321 -            {
 30.1322 -                ERROR("GDT frame is not in range of pseudophys map");
 30.1323 -                goto out;
 30.1324 -            }
 30.1325 -        }
 30.1326 -
 30.1327 -        /* Canonicalise the page table base pointer. */
 30.1328 -        if ( !MFN_IS_IN_PSEUDOPHYS_MAP(xen_cr3_to_pfn(ctxt.ctrlreg[3])) )
 30.1329 -        {
 30.1330 -            ERROR("PT base is not in range of pseudophys map");
 30.1331 -            goto out;
 30.1332 -        }
 30.1333 -        ctxt.ctrlreg[3] = 
 30.1334 -            xen_pfn_to_cr3(mfn_to_pfn(xen_cr3_to_pfn(ctxt.ctrlreg[3])));
 30.1335 -
 30.1336 -        /* Guest pagetable (x86/64) stored in otherwise-unused CR1. */
 30.1337 -        if ( (pt_levels == 4) && ctxt.ctrlreg[1] )
 30.1338 -        {
 30.1339 -            if ( !MFN_IS_IN_PSEUDOPHYS_MAP(xen_cr3_to_pfn(ctxt.ctrlreg[1])) )
 30.1340 -            {
 30.1341 -                ERROR("PT base is not in range of pseudophys map");
 30.1342 -                goto out;
 30.1343 -            }
 30.1344 -            /* Least-significant bit means 'valid PFN'. */
 30.1345 -            ctxt.ctrlreg[1] = 1 |
 30.1346 -                xen_pfn_to_cr3(mfn_to_pfn(xen_cr3_to_pfn(ctxt.ctrlreg[1])));
 30.1347 -        }
 30.1348 -
 30.1349 -        if ( !write_exact(io_fd, &ctxt, sizeof(ctxt)) )
 30.1350 -        {
 30.1351 -            ERROR("Error when writing to state file (1) (errno %d)", errno);
 30.1352 -            goto out;
 30.1353 -        }
 30.1354 -    }
 30.1355 -
 30.1356 -    /*
 30.1357 -     * Reset the MFN to be a known-invalid value. See map_frame_list_list().
 30.1358 -     */
 30.1359 -    memcpy(page, live_shinfo, PAGE_SIZE);
 30.1360 -    ((shared_info_t *)page)->arch.pfn_to_mfn_frame_list_list = 0;
 30.1361 -    if ( !write_exact(io_fd, page, PAGE_SIZE) )
 30.1362 -    {
 30.1363 -        ERROR("Error when writing to state file (1) (errno %d)", errno);
 30.1364 -        goto out;
 30.1365 -    }
 30.1366 -
 30.1367 -    /* Success! */
 30.1368 -    rc = 0;
 30.1369 -
 30.1370 - out:
 30.1371 -
 30.1372 -    if ( live )
 30.1373 -    {
 30.1374 -        if ( xc_shadow_control(xc_handle, dom, 
 30.1375 -                               XEN_DOMCTL_SHADOW_OP_OFF,
 30.1376 -                               NULL, 0, NULL, 0, NULL) < 0 )
 30.1377 -            DPRINTF("Warning - couldn't disable shadow mode");
 30.1378 -    }
 30.1379 -
 30.1380 -    /* Flush last write and discard cache for file. */
 30.1381 -    discard_file_cache(io_fd, 1 /* flush */);
 30.1382 -
 30.1383 -    if ( live_shinfo )
 30.1384 -        munmap(live_shinfo, PAGE_SIZE);
 30.1385 -
 30.1386 -    if ( live_p2m_frame_list_list )
 30.1387 -        munmap(live_p2m_frame_list_list, PAGE_SIZE);
 30.1388 -
 30.1389 -    if ( live_p2m_frame_list )
 30.1390 -        munmap(live_p2m_frame_list, P2M_FLL_ENTRIES * PAGE_SIZE);
 30.1391 -
 30.1392 -    if ( live_p2m )
 30.1393 -        munmap(live_p2m, ROUNDUP(p2m_size * sizeof(xen_pfn_t), PAGE_SHIFT));
 30.1394 -
 30.1395 -    if ( live_m2p )
 30.1396 -        munmap(live_m2p, M2P_SIZE(max_mfn));
 30.1397 -
 30.1398 -    free(pfn_type);
 30.1399 -    free(pfn_batch);
 30.1400 -    free(to_send);
 30.1401 -    free(to_fix);
 30.1402 -    free(to_skip);
 30.1403 -
 30.1404 -    DPRINTF("Save exit rc=%d\n",rc);
 30.1405 -
 30.1406 -    return !!rc;
 30.1407 -}
 30.1408 -
 30.1409 -/*
 30.1410 - * Local variables:
 30.1411 - * mode: C
 30.1412 - * c-set-style: "BSD"
 30.1413 - * c-basic-offset: 4
 30.1414 - * tab-width: 4
 30.1415 - * indent-tabs-mode: nil
 30.1416 - * End:
 30.1417 - */
    31.1 --- a/tools/libxc/xc_resume.c	Thu Apr 12 16:37:32 2007 -0500
    31.2 +++ b/tools/libxc/xc_resume.c	Fri Apr 13 11:14:26 2007 +0100
    31.3 @@ -3,24 +3,71 @@
    31.4  #include "xg_save_restore.h"
    31.5  
    31.6  #if defined(__i386__) || defined(__x86_64__)
    31.7 +
    31.8 +#include <xen/foreign/x86_32.h>
    31.9 +#include <xen/foreign/x86_64.h>
   31.10 +#include <xen/hvm/params.h>
   31.11 +
   31.12 +/* Need to provide the right flavour of vcpu context for Xen */
   31.13 +typedef union
   31.14 +{
   31.15 +    vcpu_guest_context_x86_64_t c64;
   31.16 +    vcpu_guest_context_x86_32_t c32;   
   31.17 +    vcpu_guest_context_t c;
   31.18 +} vcpu_guest_context_either_t;
   31.19 +
   31.20  static int modify_returncode(int xc_handle, uint32_t domid)
   31.21  {
   31.22 -    vcpu_guest_context_t ctxt;
   31.23 +    vcpu_guest_context_either_t ctxt;
   31.24 +    xc_dominfo_t info;
   31.25 +    xen_capabilities_info_t caps;
   31.26      int rc;
   31.27  
   31.28 -    if ( (rc = xc_vcpu_getcontext(xc_handle, domid, 0, &ctxt)) != 0 )
   31.29 +    if ( xc_domain_getinfo(xc_handle, domid, 1, &info) != 1 )
   31.30 +    {
   31.31 +        PERROR("Could not get domain info");
   31.32 +        return -1;
   31.33 +    }
   31.34 +
   31.35 +    /* HVM guests without PV drivers do not have a return code to modify. */
   31.36 +    if ( info.hvm )
   31.37 +    {
   31.38 +        unsigned long irq = 0;
   31.39 +        xc_get_hvm_param(xc_handle, domid, HVM_PARAM_CALLBACK_IRQ, &irq);
   31.40 +        if ( !irq )
   31.41 +            return 0;
   31.42 +    }
   31.43 +
   31.44 +    if ( xc_version(xc_handle, XENVER_capabilities, &caps) != 0 )
   31.45 +    {
   31.46 +        PERROR("Could not get Xen capabilities\n");
   31.47 +        return -1;
   31.48 +    }
   31.49 +
   31.50 +    if ( (rc = xc_vcpu_getcontext(xc_handle, domid, 0, &ctxt.c)) != 0 )
   31.51          return rc;
   31.52 -    ctxt.user_regs.eax = 1;
   31.53 -    if ( (rc = xc_vcpu_setcontext(xc_handle, domid, 0, &ctxt)) != 0 )
   31.54 +
   31.55 +    if ( !info.hvm )
   31.56 +        ctxt.c.user_regs.eax = 1;
   31.57 +    else if ( strstr(caps, "x86_64") )
   31.58 +        ctxt.c64.user_regs.eax = 1;
   31.59 +    else
   31.60 +        ctxt.c32.user_regs.eax = 1;
   31.61 +
   31.62 +    if ( (rc = xc_vcpu_setcontext(xc_handle, domid, 0, &ctxt.c)) != 0 )
   31.63          return rc;
   31.64  
   31.65      return 0;
   31.66  }
   31.67 +
   31.68  #else
   31.69 +
   31.70  static int modify_returncode(int xc_handle, uint32_t domid)
   31.71  {
   31.72      return 0;
   31.73 +
   31.74  }
   31.75 +
   31.76  #endif
   31.77  
   31.78  static int xc_domain_resume_cooperative(int xc_handle, uint32_t domid)
   31.79 @@ -65,6 +112,12 @@ static int xc_domain_resume_any(int xc_h
   31.80       * (x86 only) Rewrite store_mfn and console_mfn back to MFN (from PFN).
   31.81       */
   31.82  #if defined(__i386__) || defined(__x86_64__)
   31.83 +    if ( info.hvm )
   31.84 +    {
   31.85 +        ERROR("Cannot resume uncooperative HVM guests");
   31.86 +        return rc;
   31.87 +    }
   31.88 +
   31.89      /* Map the shared info frame */
   31.90      shinfo = xc_map_foreign_range(xc_handle, domid, PAGE_SIZE,
   31.91                                    PROT_READ, info.shared_info_frame);
    32.1 --- a/tools/libxc/xenctrl.h	Thu Apr 12 16:37:32 2007 -0500
    32.2 +++ b/tools/libxc/xenctrl.h	Fri Apr 13 11:14:26 2007 +0100
    32.3 @@ -840,6 +840,9 @@ const char *xc_error_code_to_desc(int co
    32.4   */
    32.5  xc_error_handler xc_set_error_handler(xc_error_handler handler);
    32.6  
    32.7 +int xc_set_hvm_param(int handle, domid_t dom, int param, unsigned long value);
    32.8 +int xc_get_hvm_param(int handle, domid_t dom, int param, unsigned long *value);
    32.9 +
   32.10  /* PowerPC specific. */
   32.11  int xc_alloc_real_mode_area(int xc_handle,
   32.12                              uint32_t domid,
    33.1 --- a/tools/libxc/xenguest.h	Thu Apr 12 16:37:32 2007 -0500
    33.2 +++ b/tools/libxc/xenguest.h	Fri Apr 13 11:14:26 2007 +0100
    33.3 @@ -16,26 +16,19 @@
    33.4  
    33.5  
    33.6  /**
    33.7 - * This function will save a domain running Linux.
    33.8 + * This function will save a running domain.
    33.9   *
   33.10   * @parm xc_handle a handle to an open hypervisor interface
   33.11   * @parm fd the file descriptor to save a domain to
   33.12   * @parm dom the id of the domain
   33.13   * @return 0 on success, -1 on failure
   33.14   */
   33.15 -int xc_linux_save(int xc_handle, int io_fd, uint32_t dom, uint32_t max_iters,
   33.16 -                  uint32_t max_factor, uint32_t flags /* XCFLAGS_xxx */,
   33.17 -                  int (*suspend)(int domid));
   33.18 +int xc_domain_save(int xc_handle, int io_fd, uint32_t dom, uint32_t max_iters,
   33.19 +                   uint32_t max_factor, uint32_t flags /* XCFLAGS_xxx */,
   33.20 +                   int (*suspend)(int domid), int hvm,
   33.21 +                   void *(*init_qemu_maps)(int, unsigned),  /* HVM only */
   33.22 +                   void (*qemu_flip_buffer)(int, int));     /* HVM only */
   33.23  
   33.24 -/**
   33.25 - * This function will save a hvm domain running unmodified guest.
   33.26 - * @return 0 on success, -1 on failure
   33.27 - */
   33.28 -int xc_hvm_save(int xc_handle, int io_fd, uint32_t dom, uint32_t max_iters,
   33.29 -                uint32_t max_factor, uint32_t flags /* XCFLAGS_xxx */,
   33.30 -                int (*suspend)(int domid),  
   33.31 -                void *(*init_qemu_maps)(int, unsigned), 
   33.32 -                void (*qemu_flip_buffer)(int, int));
   33.33  
   33.34  /**
   33.35   * This function will restore a saved domain.
   33.36 @@ -143,11 +136,6 @@ int xc_hvm_build_mem(int xc_handle,
   33.37                       const char *image_buffer,
   33.38                       unsigned long image_size);
   33.39  
   33.40 -int xc_set_hvm_param(
   33.41 -    int handle, domid_t dom, int param, unsigned long value);
   33.42 -int xc_get_hvm_param(
   33.43 -    int handle, domid_t dom, int param, unsigned long *value);
   33.44 -
   33.45  /* PowerPC specific. */
   33.46  int xc_prose_build(int xc_handle,
   33.47                     uint32_t domid,
    34.1 --- a/tools/libxc/xg_private.c	Thu Apr 12 16:37:32 2007 -0500
    34.2 +++ b/tools/libxc/xg_private.c	Fri Apr 13 11:14:26 2007 +0100
    34.3 @@ -198,29 +198,6 @@ unsigned long csum_page(void *page)
    34.4      return -1;
    34.5  }
    34.6  
    34.7 -__attribute__((weak)) 
    34.8 -    int xc_hvm_save(int xc_handle, int io_fd, uint32_t dom, uint32_t max_iters,
    34.9 -                    uint32_t max_factor, uint32_t flags,
   34.10 -                    int (*suspend)(int domid), 
   34.11 -                    void *(*init_qemu_maps)(int, unsigned), 
   34.12 -                    void (*qemu_flip_buffer)(int, int))
   34.13 -{
   34.14 -    errno = ENOSYS;
   34.15 -    return -1;
   34.16 -}
   34.17 -
   34.18 -__attribute__((weak)) int xc_get_hvm_param(
   34.19 -    int handle, domid_t dom, int param, unsigned long *value)
   34.20 -{
   34.21 -    return -ENOSYS;
   34.22 -}
   34.23 -
   34.24 -__attribute__((weak)) int xc_set_hvm_param(
   34.25 -    int handle, domid_t dom, int param, unsigned long value)
   34.26 -{
   34.27 -    return -ENOSYS;
   34.28 -}
   34.29 -
   34.30  /*
   34.31   * Local variables:
   34.32   * mode: C
    35.1 --- a/tools/libxen/include/xen_host_cpu.h	Thu Apr 12 16:37:32 2007 -0500
    35.2 +++ b/tools/libxen/include/xen_host_cpu.h	Fri Apr 13 11:14:26 2007 +0100
    35.3 @@ -70,6 +70,7 @@ typedef struct xen_host_cpu_record
    35.4      char *modelname;
    35.5      char *stepping;
    35.6      char *flags;
    35.7 +    char *features;
    35.8      double utilisation;
    35.9  } xen_host_cpu_record;
   35.10  
   35.11 @@ -223,6 +224,13 @@ xen_host_cpu_get_flags(xen_session *sess
   35.12  
   35.13  
   35.14  /**
   35.15 + * Get the features field of the given host_cpu.
   35.16 + */
   35.17 +extern bool
   35.18 +xen_host_cpu_get_features(xen_session *session, char **result, xen_host_cpu host_cpu);
   35.19 +
   35.20 +
   35.21 +/**
   35.22   * Get the utilisation field of the given host_cpu.
   35.23   */
   35.24  extern bool
    36.1 --- a/tools/libxen/include/xen_vm.h	Thu Apr 12 16:37:32 2007 -0500
    36.2 +++ b/tools/libxen/include/xen_vm.h	Fri Apr 13 11:14:26 2007 +0100
    36.3 @@ -838,6 +838,28 @@ xen_vm_set_vcpus_number_live(xen_session
    36.4  
    36.5  
    36.6  /**
    36.7 + * Add the given key-value pair to VM.VCPUs_params, and apply that
    36.8 + * value on the running VM.
    36.9 + */
   36.10 +extern bool
   36.11 +xen_vm_add_to_vcpus_params_live(xen_session *session, xen_vm self, char *key, char *value);
   36.12 +
   36.13 +
   36.14 +/**
   36.15 + * Set memory_dynamic_max in database and on running VM.
   36.16 + */
   36.17 +extern bool
   36.18 +xen_vm_set_memory_dynamic_max_live(xen_session *session, xen_vm self, int64_t max);
   36.19 +
   36.20 +
   36.21 +/**
   36.22 + * Set memory_dynamic_min in database and on running VM.
   36.23 + */
   36.24 +extern bool
   36.25 +xen_vm_set_memory_dynamic_min_live(xen_session *session, xen_vm self, int64_t min);
   36.26 +
   36.27 +
   36.28 +/**
   36.29   * Send the given key as a sysrq to this VM.  The key is specified as a
   36.30   * single character (a String of length 1).  This can only be called when the
   36.31   * specified VM is in the Running state.
    37.1 --- a/tools/libxen/include/xen_vm_metrics.h	Thu Apr 12 16:37:32 2007 -0500
    37.2 +++ b/tools/libxen/include/xen_vm_metrics.h	Fri Apr 13 11:14:26 2007 +0100
    37.3 @@ -22,6 +22,7 @@
    37.4  #include "xen_common.h"
    37.5  #include "xen_int_float_map.h"
    37.6  #include "xen_int_int_map.h"
    37.7 +#include "xen_string_set.h"
    37.8  #include "xen_string_string_map.h"
    37.9  #include "xen_vm_metrics_decl.h"
   37.10  
   37.11 @@ -70,6 +71,7 @@ typedef struct xen_vm_metrics_record
   37.12      xen_int_float_map *vcpus_utilisation;
   37.13      xen_int_int_map *vcpus_cpu;
   37.14      xen_string_string_map *vcpus_params;
   37.15 +    struct xen_string_set *state;
   37.16      time_t start_time;
   37.17      time_t last_updated;
   37.18  } xen_vm_metrics_record;
   37.19 @@ -210,6 +212,13 @@ xen_vm_metrics_get_vcpus_params(xen_sess
   37.20  
   37.21  
   37.22  /**
   37.23 + * Get the state field of the given VM_metrics.
   37.24 + */
   37.25 +extern bool
   37.26 +xen_vm_metrics_get_state(xen_session *session, struct xen_string_set **result, xen_vm_metrics vm_metrics);
   37.27 +
   37.28 +
   37.29 +/**
   37.30   * Get the start_time field of the given VM_metrics.
   37.31   */
   37.32  extern bool
    38.1 --- a/tools/libxen/src/xen_host_cpu.c	Thu Apr 12 16:37:32 2007 -0500
    38.2 +++ b/tools/libxen/src/xen_host_cpu.c	Fri Apr 13 11:14:26 2007 +0100
    38.3 @@ -61,6 +61,9 @@ static const struct_member xen_host_cpu_
    38.4          { .key = "flags",
    38.5            .type = &abstract_type_string,
    38.6            .offset = offsetof(xen_host_cpu_record, flags) },
    38.7 +        { .key = "features",
    38.8 +          .type = &abstract_type_string,
    38.9 +          .offset = offsetof(xen_host_cpu_record, features) },
   38.10          { .key = "utilisation",
   38.11            .type = &abstract_type_float,
   38.12            .offset = offsetof(xen_host_cpu_record, utilisation) }
   38.13 @@ -90,6 +93,7 @@ xen_host_cpu_record_free(xen_host_cpu_re
   38.14      free(record->modelname);
   38.15      free(record->stepping);
   38.16      free(record->flags);
   38.17 +    free(record->features);
   38.18      free(record);
   38.19  }
   38.20  
   38.21 @@ -252,6 +256,23 @@ xen_host_cpu_get_flags(xen_session *sess
   38.22  
   38.23  
   38.24  bool
   38.25 +xen_host_cpu_get_features(xen_session *session, char **result, xen_host_cpu host_cpu)
   38.26 +{
   38.27 +    abstract_value param_values[] =
   38.28 +        {
   38.29 +            { .type = &abstract_type_string,
   38.30 +              .u.string_val = host_cpu }
   38.31 +        };
   38.32 +
   38.33 +    abstract_type result_type = abstract_type_string;
   38.34 +
   38.35 +    *result = NULL;
   38.36 +    XEN_CALL_("host_cpu.get_features");
   38.37 +    return session->ok;
   38.38 +}
   38.39 +
   38.40 +
   38.41 +bool
   38.42  xen_host_cpu_get_utilisation(xen_session *session, double *result, xen_host_cpu host_cpu)
   38.43  {
   38.44      abstract_value param_values[] =
    39.1 --- a/tools/libxen/src/xen_vm.c	Thu Apr 12 16:37:32 2007 -0500
    39.2 +++ b/tools/libxen/src/xen_vm.c	Fri Apr 13 11:14:26 2007 +0100
    39.3 @@ -1610,6 +1610,56 @@ xen_vm_set_vcpus_number_live(xen_session
    39.4  
    39.5  
    39.6  bool
    39.7 +xen_vm_add_to_vcpus_params_live(xen_session *session, xen_vm self, char *key, char *value)
    39.8 +{
    39.9 +    abstract_value param_values[] =
   39.10 +        {
   39.11 +            { .type = &abstract_type_string,
   39.12 +              .u.string_val = self },
   39.13 +            { .type = &abstract_type_string,
   39.14 +              .u.string_val = key },
   39.15 +            { .type = &abstract_type_string,
   39.16 +              .u.string_val = value }
   39.17 +        };
   39.18 +
   39.19 +    xen_call_(session, "VM.add_to_VCPUs_params_live", param_values, 3, NULL, NULL);
   39.20 +    return session->ok;
   39.21 +}
   39.22 +
   39.23 +
   39.24 +bool
   39.25 +xen_vm_set_memory_dynamic_max_live(xen_session *session, xen_vm self, int64_t max)
   39.26 +{
   39.27 +    abstract_value param_values[] =
   39.28 +        {
   39.29 +            { .type = &abstract_type_string,
   39.30 +              .u.string_val = self },
   39.31 +            { .type = &abstract_type_int,
   39.32 +              .u.int_val = max }
   39.33 +        };
   39.34 +
   39.35 +    xen_call_(session, "VM.set_memory_dynamic_max_live", param_values, 2, NULL, NULL);
   39.36 +    return session->ok;
   39.37 +}
   39.38 +
   39.39 +
   39.40 +bool
   39.41 +xen_vm_set_memory_dynamic_min_live(xen_session *session, xen_vm self, int64_t min)
   39.42 +{
   39.43 +    abstract_value param_values[] =
   39.44 +        {
   39.45 +            { .type = &abstract_type_string,
   39.46 +              .u.string_val = self },
   39.47 +            { .type = &abstract_type_int,
   39.48 +              .u.int_val = min }
   39.49 +        };
   39.50 +
   39.51 +    xen_call_(session, "VM.set_memory_dynamic_min_live", param_values, 2, NULL, NULL);
   39.52 +    return session->ok;
   39.53 +}
   39.54 +
   39.55 +
   39.56 +bool
   39.57  xen_vm_send_sysrq(xen_session *session, xen_vm vm, char *key)
   39.58  {
   39.59      abstract_value param_values[] =
    40.1 --- a/tools/libxen/src/xen_vm_metrics.c	Thu Apr 12 16:37:32 2007 -0500
    40.2 +++ b/tools/libxen/src/xen_vm_metrics.c	Fri Apr 13 11:14:26 2007 +0100
    40.3 @@ -57,6 +57,9 @@ static const struct_member xen_vm_metric
    40.4          { .key = "VCPUs_params",
    40.5            .type = &abstract_type_string_string_map,
    40.6            .offset = offsetof(xen_vm_metrics_record, vcpus_params) },
    40.7 +        { .key = "state",
    40.8 +          .type = &abstract_type_string_set,
    40.9 +          .offset = offsetof(xen_vm_metrics_record, state) },
   40.10          { .key = "start_time",
   40.11            .type = &abstract_type_datetime,
   40.12            .offset = offsetof(xen_vm_metrics_record, start_time) },
   40.13 @@ -87,6 +90,7 @@ xen_vm_metrics_record_free(xen_vm_metric
   40.14      xen_int_float_map_free(record->vcpus_utilisation);
   40.15      xen_int_int_map_free(record->vcpus_cpu);
   40.16      xen_string_string_map_free(record->vcpus_params);
   40.17 +    xen_string_set_free(record->state);
   40.18      free(record);
   40.19  }
   40.20  
   40.21 @@ -215,6 +219,23 @@ xen_vm_metrics_get_vcpus_params(xen_sess
   40.22  
   40.23  
   40.24  bool
   40.25 +xen_vm_metrics_get_state(xen_session *session, struct xen_string_set **result, xen_vm_metrics vm_metrics)
   40.26 +{
   40.27 +    abstract_value param_values[] =
   40.28 +        {
   40.29 +            { .type = &abstract_type_string,
   40.30 +              .u.string_val = vm_metrics }
   40.31 +        };
   40.32 +
   40.33 +    abstract_type result_type = abstract_type_string_set;
   40.34 +
   40.35 +    *result = NULL;
   40.36 +    XEN_CALL_("VM_metrics.get_state");
   40.37 +    return session->ok;
   40.38 +}
   40.39 +
   40.40 +
   40.41 +bool
   40.42  xen_vm_metrics_get_start_time(xen_session *session, time_t *result, xen_vm_metrics vm_metrics)
   40.43  {
   40.44      abstract_value param_values[] =
    41.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
    41.2 +++ b/tools/pygrub/src/LiloConf.py	Fri Apr 13 11:14:26 2007 +0100
    41.3 @@ -0,0 +1,147 @@
    41.4 +#
    41.5 +#LiloConf.py
    41.6 +#
    41.7 +
    41.8 +import sys, re, os
    41.9 +import logging
   41.10 +import GrubConf
   41.11 +
   41.12 +class LiloImage(object):
   41.13 +    def __init__(self, lines, path):
   41.14 +        self.reset(lines, path)
   41.15 +
   41.16 +    def __repr__(self):
   41.17 +        return ("title: %s\n"
   41.18 +                "  root: %s\n"
   41.19 +                "  kernel: %s\n"
   41.20 +                "  args: %s\n"
   41.21 +                "  initrd: %s\n" %(self.title, self.root, self.kernel,
   41.22 +                                   self.args, self.initrd))
   41.23 +    def reset(self, lines, path):
   41.24 +        self._root = self._initrd = self._kernel = self._args = None
   41.25 +        self.title = ""
   41.26 +        self.lines = []
   41.27 +        self.path = path
   41.28 +        map(self.set_from_line, lines)
   41.29 +        self.root = "" # dummy
   41.30 +
   41.31 +    def set_from_line(self, line, replace = None):
   41.32 +        (com, arg) = GrubConf.grub_exact_split(line, 2)
   41.33 +
   41.34 +        if self.commands.has_key(com):
   41.35 +            if self.commands[com] is not None:
   41.36 +                exec("%s = r\'%s\'" %(self.commands[com], re.sub('^"(.+)"$', r"\1", arg.strip())))
   41.37 +            else:
   41.38 +                logging.info("Ignored image directive %s" %(com,))
   41.39 +        else:
   41.40 +            logging.warning("Unknown image directive %s" %(com,))
   41.41 +
   41.42 +        # now put the line in the list of lines
   41.43 +        if replace is None:
   41.44 +            self.lines.append(line)
   41.45 +        else:
   41.46 +            self.lines.pop(replace)
   41.47 +            self.lines.insert(replace, line)
   41.48 +
   41.49 +    def set_kernel(self, val):
   41.50 +        self._kernel = (None, self.path + "/" + val)
   41.51 +    def get_kernel(self):
   41.52 +        return self._kernel
   41.53 +    kernel = property(get_kernel, set_kernel)
   41.54 +
   41.55 +    def set_initrd(self, val):
   41.56 +        self._initrd = (None, self.path + "/" + val)
   41.57 +    def get_initrd(self):
   41.58 +        return self._initrd
   41.59 +    initrd = property(get_initrd, set_initrd)
   41.60 +
   41.61 +    # set up command handlers
   41.62 +    commands = { "label": "self.title",
   41.63 +                 "root": "self.root",
   41.64 +                 "rootnoverify": "self.root",
   41.65 +                 "image": "self.kernel",
   41.66 +                 "initrd": "self.initrd",
   41.67 +                 "append": "self.args",
   41.68 +                 "read-only": None,
   41.69 +                 "chainloader": None,
   41.70 +                 "module": None}
   41.71 +
   41.72 +class LiloConfigFile(object):
   41.73 +    def __init__(self, fn = None):
   41.74 +        self.filename = fn
   41.75 +        self.images = []
   41.76 +        self.timeout = -1
   41.77 +        self._default = 0
   41.78 +
   41.79 +        if fn is not None:
   41.80 +            self.parse()
   41.81 +
   41.82 +    def parse(self, buf = None):
   41.83 +        if buf is None:
   41.84 +            if self.filename is None:
   41.85 +                raise ValueError, "No config file defined to parse!"
   41.86 +
   41.87 +            f = open(self.filename, 'r')
   41.88 +            lines = f.readlines()
   41.89 +            f.close()
   41.90 +        else:
   41.91 +            lines = buf.split("\n")
   41.92 +
   41.93 +        path = os.path.dirname(self.filename)
   41.94 +        img = []
   41.95 +        for l in lines:
   41.96 +            l = l.strip()
   41.97 +            # skip blank lines
   41.98 +            if len(l) == 0:
   41.99 +                continue
  41.100 +            # skip comments
  41.101 +            if l.startswith('#'):
  41.102 +                continue
  41.103 +            # new image
  41.104 +            if l.startswith("image"):
  41.105 +                if len(img) > 0:
  41.106 +                    self.add_image(LiloImage(img, path))
  41.107 +                img = [l]
  41.108 +                continue
  41.109 +
  41.110 +            if len(img) > 0:
  41.111 +                img.append(l)
  41.112 +                continue
  41.113 +
  41.114 +            (com, arg) = GrubConf.grub_exact_split(l, 2)
  41.115 +            if self.commands.has_key(com):
  41.116 +                if self.commands[com] is not None:
  41.117 +                    exec("%s = r\"%s\"" %(self.commands[com], arg.strip()))
  41.118 +                else:
  41.119 +                    logging.info("Ignored directive %s" %(com,))
  41.120 +            else:
  41.121 +                logging.warning("Unknown directive %s" %(com,))
  41.122 +
  41.123 +        if len(img) > 0:
  41.124 +            self.add_image(LiloImage(img, path))
  41.125 +
  41.126 +    def add_image(self, image):
  41.127 +        self.images.append(image)
  41.128 +
  41.129 +    def _get_default(self):
  41.130 +        for i in range(0, len(self.images) - 1):
  41.131 +            if self.images[i].title == self._default:
  41.132 +                return i
  41.133 +        return 0
  41.134 +    def _set_default(self, val):
  41.135 +        self._default = val
  41.136 +    default = property(_get_default, _set_default)
  41.137 +
  41.138 +    commands = { "default": "self.default",
  41.139 +                 "timeout": "self.timeout",
  41.140 +                 "prompt": None,
  41.141 +                 "relocatable": None,
  41.142 +                 }
  41.143 +
  41.144 +if __name__ == "__main__":
  41.145 +    if sys.argv < 2:
  41.146 +        raise RuntimeError, "Need a grub.conf to read"
  41.147 +    g = LiloConfigFile(sys.argv[1])
  41.148 +    for i in g.images:
  41.149 +        print i #, i.title, i.root, i.kernel, i.args, i.initrd
  41.150 +    print g.default
    42.1 --- a/tools/pygrub/src/pygrub	Thu Apr 12 16:37:32 2007 -0500
    42.2 +++ b/tools/pygrub/src/pygrub	Fri Apr 13 11:14:26 2007 +0100
    42.3 @@ -16,6 +16,7 @@
    42.4  import os, sys, string, struct, tempfile, re
    42.5  import copy
    42.6  import logging
    42.7 +import platform
    42.8  
    42.9  import curses, _curses, curses.wrapper, curses.textpad, curses.ascii
   42.10  import getopt
   42.11 @@ -24,6 +25,7 @@ sys.path = [ '/usr/lib/python' ] + sys.p
   42.12  
   42.13  import fsimage
   42.14  import grub.GrubConf
   42.15 +import grub.LiloConf
   42.16  
   42.17  PYGRUB_VER = 0.5
   42.18  
   42.19 @@ -59,6 +61,13 @@ def get_active_partition(file):
   42.20          if struct.unpack("<c", buf[poff:poff+1]) == ('\x80',):
   42.21              return buf[poff:poff+16]
   42.22  
   42.23 +        # type=0xee: GUID partition table
   42.24 +        # XXX assume the first partition is active
   42.25 +        if struct.unpack("<c", buf[poff+4:poff+5]) == ('\xee',):
   42.26 +            os.lseek(fd, 0x400, 0)
   42.27 +            buf = os.read(fd, 512)
   42.28 +            return buf[24:40] # XXX buf[32:40]
   42.29 +
   42.30      # if there's not a partition marked as active, fall back to
   42.31      # the first partition
   42.32      return buf[446:446+16]
   42.33 @@ -346,7 +355,13 @@ class Grub:
   42.34          if not os.access(fn, os.R_OK):
   42.35              raise RuntimeError, "Unable to access %s" %(fn,)
   42.36  
   42.37 -        self.cf = grub.GrubConf.GrubConfigFile()
   42.38 +        if platform.machine() == 'ia64':
   42.39 +            self.cf = grub.LiloConf.LiloConfigFile()
   42.40 +            file_list = ("/efi/redhat/elilo.conf",)
   42.41 +        else:
   42.42 +            self.cf = grub.GrubConf.GrubConfigFile()
   42.43 +            file_list = ("/boot/grub/menu.lst", "/boot/grub/grub.conf",
   42.44 +                         "/grub/menu.lst", "/grub/grub.conf")
   42.45  
   42.46          if not fs:
   42.47              # set the config file and parse it
   42.48 @@ -354,18 +369,15 @@ class Grub:
   42.49              self.cf.parse()
   42.50              return
   42.51  
   42.52 -        grubfile = None
   42.53 -        for f in ("/boot/grub/menu.lst", "/boot/grub/grub.conf",
   42.54 -                  "/grub/menu.lst", "/grub/grub.conf"):
   42.55 +        for f in file_list:
   42.56              if fs.file_exists(f):
   42.57 -                grubfile = f
   42.58 +                self.cf.filename = f
   42.59                  break
   42.60 -        if grubfile is None:
   42.61 -            raise RuntimeError, "we couldn't find grub config file in the image provided."
   42.62 -        f = fs.open_file(grubfile)
   42.63 +        if self.cf.filename is None:
   42.64 +            raise RuntimeError, "couldn't find bootloader config file in the image provided."
   42.65 +        f = fs.open_file(self.cf.filename)
   42.66          buf = f.read()
   42.67          del f
   42.68 -        # then parse the grub config
   42.69          self.cf.parse(buf)
   42.70  
   42.71      def run(self):
    43.1 --- a/tools/python/README.XendConfig	Thu Apr 12 16:37:32 2007 -0500
    43.2 +++ b/tools/python/README.XendConfig	Fri Apr 13 11:14:26 2007 +0100
    43.3 @@ -115,6 +115,7 @@ otherConfig
    43.4                                  image.nographic
    43.5                                  image.vnc
    43.6                                  image.sdl
    43.7 +                                image.monitor
    43.8                                  image.vncdisplay
    43.9                                  image.vncunused
   43.10                                  image.hvm.device_model
    44.1 --- a/tools/python/README.sxpcfg	Thu Apr 12 16:37:32 2007 -0500
    44.2 +++ b/tools/python/README.sxpcfg	Fri Apr 13 11:14:26 2007 +0100
    44.3 @@ -63,6 +63,7 @@ image
    44.4    - fdb
    44.5    - soundhw
    44.6    - localtime
    44.7 +  - monitor
    44.8    - serial
    44.9    - stdvga
   44.10    - isa
    45.1 --- a/tools/python/xen/xend/XendConfig.py	Thu Apr 12 16:37:32 2007 -0500
    45.2 +++ b/tools/python/xen/xend/XendConfig.py	Fri Apr 13 11:14:26 2007 +0100
    45.3 @@ -117,7 +117,7 @@ LEGACY_CFG_TO_XENAPI_CFG = reverse_dict(
    45.4  
    45.5  # Platform configuration keys.
    45.6  XENAPI_PLATFORM_CFG = [ 'acpi', 'apic', 'boot', 'device_model', 'display', 
    45.7 -                        'fda', 'fdb', 'keymap', 'isa', 'localtime',
    45.8 +                        'fda', 'fdb', 'keymap', 'isa', 'localtime', 'monitor', 
    45.9                          'nographic', 'pae', 'rtc_timeoffset', 'serial', 'sdl',
   45.10                          'soundhw','stdvga', 'usb', 'usbdevice', 'vnc',
   45.11                          'vncconsole', 'vncdisplay', 'vnclisten',
    46.1 --- a/tools/python/xen/xend/XendDomainInfo.py	Thu Apr 12 16:37:32 2007 -0500
    46.2 +++ b/tools/python/xen/xend/XendDomainInfo.py	Fri Apr 13 11:14:26 2007 +0100
    46.3 @@ -1601,7 +1601,6 @@ class XendDomainInfo:
    46.4              self.image = image.create(self, self.info)
    46.5              if self.image:
    46.6                  self.image.createDeviceModel(True)
    46.7 -                self.image.register_shutdown_watch()
    46.8          self._storeDomDetails()
    46.9          self._registerWatches()
   46.10          self.refreshShutdown()
    47.1 --- a/tools/python/xen/xend/XendNode.py	Thu Apr 12 16:37:32 2007 -0500
    47.2 +++ b/tools/python/xen/xend/XendNode.py	Fri Apr 13 11:14:26 2007 +0100
    47.3 @@ -603,7 +603,7 @@ class XendNode:
    47.4          return [[k, info[k]] for k in ITEM_ORDER]
    47.5  
    47.6      def xendinfo(self):
    47.7 -        return [['xend_config_format', 3]]
    47.8 +        return [['xend_config_format', 4]]
    47.9  
   47.10      #
   47.11      # utilisation tracking
    48.1 --- a/tools/python/xen/xend/image.py	Thu Apr 12 16:37:32 2007 -0500
    48.2 +++ b/tools/python/xen/xend/image.py	Fri Apr 13 11:14:26 2007 +0100
    48.3 @@ -284,17 +284,16 @@ class HVMImageHandler(ImageHandler):
    48.4          log.debug("acpi           = %d", self.acpi)
    48.5          log.debug("apic           = %d", self.apic)
    48.6  
    48.7 -        self.register_shutdown_watch()
    48.8 -        self.register_reboot_feature_watch()
    48.9 -
   48.10 -        return xc.hvm_build(domid          = self.vm.getDomid(),
   48.11 -                            image          = self.kernel,
   48.12 -                            store_evtchn   = store_evtchn,
   48.13 -                            memsize        = mem_mb,
   48.14 -                            vcpus          = self.vm.getVCpuCount(),
   48.15 -                            pae            = self.pae,
   48.16 -                            acpi           = self.acpi,
   48.17 -                            apic           = self.apic)
   48.18 +        rc = xc.hvm_build(domid          = self.vm.getDomid(),
   48.19 +                          image          = self.kernel,
   48.20 +                          store_evtchn   = store_evtchn,
   48.21 +                          memsize        = mem_mb,
   48.22 +                          vcpus          = self.vm.getVCpuCount(),
   48.23 +                          pae            = self.pae,
   48.24 +                          acpi           = self.acpi,
   48.25 +                          apic           = self.apic)
   48.26 +        rc['notes'] = { 'SUSPEND_CANCEL': 1 }
   48.27 +        return rc
   48.28  
   48.29      # Return a list of cmd line args to the device models based on the
   48.30      # xm config file
   48.31 @@ -418,6 +417,8 @@ class HVMImageHandler(ImageHandler):
   48.32          else:
   48.33              ret.append('-nographic')
   48.34  
   48.35 +        if int(vmConfig['platform'].get('monitor', 0)) != 0:
   48.36 +            ret.append('-monitor vc')
   48.37          return ret
   48.38  
   48.39      def createDeviceModel(self, restore = False):
   48.40 @@ -448,13 +449,9 @@ class HVMImageHandler(ImageHandler):
   48.41          log.info("device model pid: %d", self.pid)
   48.42  
   48.43      def recreate(self):
   48.44 -        self.register_shutdown_watch()
   48.45 -        self.register_reboot_feature_watch()
   48.46          self.pid = self.vm.gatherDom(('image/device-model-pid', int))
   48.47  
   48.48      def destroy(self, suspend = False):
   48.49 -        self.unregister_shutdown_watch()
   48.50 -        self.unregister_reboot_feature_watch();
   48.51          if self.pid:
   48.52              try:
   48.53                  sig = signal.SIGKILL
   48.54 @@ -473,74 +470,6 @@ class HVMImageHandler(ImageHandler):
   48.55                  pass
   48.56              self.pid = None
   48.57  
   48.58 -    def register_shutdown_watch(self):
   48.59 -        """ add xen store watch on control/shutdown """
   48.60 -        self.shutdownWatch = xswatch(self.vm.dompath + "/control/shutdown",
   48.61 -                                     self.hvm_shutdown)
   48.62 -        log.debug("hvm shutdown watch registered")
   48.63 -
   48.64 -    def unregister_shutdown_watch(self):
   48.65 -        """Remove the watch on the control/shutdown, if any. Nothrow
   48.66 -        guarantee."""
   48.67 -
   48.68 -        try:
   48.69 -            if self.shutdownWatch:
   48.70 -                self.shutdownWatch.unwatch()
   48.71 -        except:
   48.72 -            log.exception("Unwatching hvm shutdown watch failed.")
   48.73 -        self.shutdownWatch = None
   48.74 -        log.debug("hvm shutdown watch unregistered")
   48.75 -
   48.76 -    def hvm_shutdown(self, _):
   48.77 -        """ watch call back on node control/shutdown,
   48.78 -            if node changed, this function will be called
   48.79 -        """
   48.80 -        xd = xen.xend.XendDomain.instance()
   48.81 -        try:
   48.82 -            vm = xd.domain_lookup( self.vm.getDomid() )
   48.83 -        except XendError:
   48.84 -            # domain isn't registered, no need to clean it up.
   48.85 -            return False
   48.86 -
   48.87 -        reason = vm.getShutdownReason()
   48.88 -        log.debug("hvm_shutdown fired, shutdown reason=%s", reason)
   48.89 -        if reason in REVERSE_DOMAIN_SHUTDOWN_REASONS:
   48.90 -            vm.info['shutdown'] = 1
   48.91 -            vm.info['shutdown_reason'] = \
   48.92 -                REVERSE_DOMAIN_SHUTDOWN_REASONS[reason]
   48.93 -            vm.refreshShutdown(vm.info)
   48.94 -
   48.95 -        return True # Keep watching
   48.96 -
   48.97 -    def register_reboot_feature_watch(self):
   48.98 -        """ add xen store watch on control/feature-reboot """
   48.99 -        self.rebootFeatureWatch = xswatch(self.vm.dompath + "/control/feature-reboot", \
  48.100 -                                         self.hvm_reboot_feature)
  48.101 -        log.debug("hvm reboot feature watch registered")
  48.102 -
  48.103 -    def unregister_reboot_feature_watch(self):
  48.104 -        """Remove the watch on the control/feature-reboot, if any. Nothrow
  48.105 -        guarantee."""
  48.106 -
  48.107 -        try:
  48.108 -            if self.rebootFeatureWatch:
  48.109 -                self.rebootFeatureWatch.unwatch()
  48.110 -        except:
  48.111 -            log.exception("Unwatching hvm reboot feature watch failed.")
  48.112 -        self.rebootFeatureWatch = None
  48.113 -        log.debug("hvm reboot feature watch unregistered")
  48.114 -
  48.115 -    def hvm_reboot_feature(self, _):
  48.116 -        """ watch call back on node control/feature-reboot,
  48.117 -            if node changed, this function will be called
  48.118 -        """
  48.119 -        status = self.vm.readDom('control/feature-reboot')
  48.120 -        log.debug("hvm_reboot_feature fired, module status=%s", status)
  48.121 -        if status == '1':
  48.122 -            self.unregister_shutdown_watch()
  48.123 -
  48.124 -        return True # Keep watching
  48.125 -
  48.126  
  48.127  class IA64_HVM_ImageHandler(HVMImageHandler):
  48.128  
    49.1 --- a/tools/python/xen/xm/create.dtd	Thu Apr 12 16:37:32 2007 -0500
    49.2 +++ b/tools/python/xen/xm/create.dtd	Fri Apr 13 11:14:26 2007 +0100
    49.3 @@ -95,7 +95,7 @@
    49.4                   src             %URI; #REQUIRED
    49.5                   type            %VDI_TYPE; #REQUIRED
    49.6                   size            CDATA #REQUIRED
    49.7 -                 shareable       CDATA #REQUIRED
    49.8 +                 sharable        CDATA #REQUIRED
    49.9                   read_only       CDATA #REQUIRED>
   49.10  
   49.11  <!ELEMENT name   (label, 
    50.1 --- a/tools/python/xen/xm/create.py	Thu Apr 12 16:37:32 2007 -0500
    50.2 +++ b/tools/python/xen/xm/create.py	Fri Apr 13 11:14:26 2007 +0100
    50.3 @@ -421,6 +421,10 @@ gopts.var('serial', val='FILE',
    50.4            fn=set_value, default='',
    50.5            use="Path to serial or pty or vc")
    50.6  
    50.7 +gopts.var('monitor', val='no|yes',
    50.8 +          fn=set_bool, default=0,
    50.9 +          use="""Should the device model use monitor?""")
   50.10 +
   50.11  gopts.var('localtime', val='no|yes',
   50.12            fn=set_bool, default=0,
   50.13            use="Is RTC set to localtime?")
    51.1 --- a/tools/python/xen/xm/main.py	Thu Apr 12 16:37:32 2007 -0500
    51.2 +++ b/tools/python/xen/xm/main.py	Fri Apr 13 11:14:26 2007 +0100
    51.3 @@ -1544,34 +1544,59 @@ def xm_info(args):
    51.4  
    51.5          host_metrics_record = server.xenapi.host_metrics.get_record(host_record["metrics"])
    51.6  
    51.7 +        def getVal(keys, default=""):
    51.8 +            data = host_record
    51.9 +            for key in keys:
   51.10 +                if key in data:
   51.11 +                    data = data[key]
   51.12 +                else:
   51.13 +                    return default
   51.14 +            return data
   51.15 +
   51.16 +        def getCpuMhz():
   51.17 +            cpu_speeds = [int(host_cpu_record["speed"])
   51.18 +                          for host_cpu_record in host_cpu_records
   51.19 +                          if "speed" in host_cpu_record]
   51.20 +            if len(cpu_speeds) > 0:
   51.21 +                return sum(cpu_speeds) / len(cpu_speeds)
   51.22 +            else:
   51.23 +                return 0
   51.24 +
   51.25 +        getCpuMhz()
   51.26 +
   51.27 +        def getCpuFeatures():
   51.28 +            if len(host_cpu_records) > 0:
   51.29 +                return host_cpu_records[0].get("features", "")
   51.30 +            else:
   51.31 +                return ""
   51.32 +                
   51.33          info = {
   51.34 -            "host":              host_record["name_label"],
   51.35 -            "release":           host_record["software_version"]["release"],
   51.36 -            "version":           host_record["software_version"]["version"],
   51.37 -            "machine":           host_record["software_version"]["machine"],
   51.38 -            "nr_cpus":           len(host_record["host_CPUs"]),
   51.39 -            "nr_nodes":          host_record["cpu_configuration"]["nr_nodes"],
   51.40 -            "sockets_per_node":  host_record["cpu_configuration"]["sockets_per_node"],
   51.41 -            "cores_per_socket":  host_record["cpu_configuration"]["cores_per_socket"],
   51.42 -            "threads_per_core":  host_record["cpu_configuration"]["threads_per_core"],
   51.43 -            "cpu_mhz":           sum([int(host_cpu_record["speed"]) for host_cpu_record in host_cpu_records])
   51.44 -                                   / len(host_cpu_records),
   51.45 -            "hw_caps":           host_cpu_records[0]["features"],
   51.46 +            "host":              getVal(["name_label"]),
   51.47 +            "release":           getVal(["software_version", "release"]),
   51.48 +            "version":           getVal(["software_version", "version"]),
   51.49 +            "machine":           getVal(["software_version", "machine"]),
   51.50 +            "nr_cpus":           len(getVal(["host_CPUs"], [])),
   51.51 +            "nr_nodes":          getVal(["cpu_configuration", "nr_nodes"]),
   51.52 +            "sockets_per_node":  getVal(["cpu_configuration", "sockets_per_node"]),
   51.53 +            "cores_per_socket":  getVal(["cpu_configuration", "cores_per_socket"]),
   51.54 +            "threads_per_core":  getVal(["cpu_configuration", "threads_per_core"]),
   51.55 +            "cpu_mhz":           getCpuMhz(),
   51.56 +            "hw_caps":           getCpuFeatures(),
   51.57              "total_memory":      int(host_metrics_record["memory_total"])/1024/1024,
   51.58              "free_memory":       int(host_metrics_record["memory_free"])/1024/1024,
   51.59 -            "xen_major":         host_record["software_version"]["xen_major"],
   51.60 -            "xen_minor":         host_record["software_version"]["xen_minor"],
   51.61 -            "xen_extra":         host_record["software_version"]["xen_extra"],
   51.62 -            "xen_caps":          " ".join(host_record["capabilities"]),
   51.63 -            "xen_scheduler":     host_record["sched_policy"],
   51.64 -            "xen_pagesize":      host_record["other_config"]["xen_pagesize"],
   51.65 -            "platform_params":   host_record["other_config"]["platform_params"],
   51.66 -            "xen_changeset":     host_record["software_version"]["xen_changeset"],
   51.67 -            "cc_compiler":       host_record["software_version"]["cc_compiler"],
   51.68 -            "cc_compile_by":     host_record["software_version"]["cc_compile_by"],
   51.69 -            "cc_compile_domain": host_record["software_version"]["cc_compile_domain"],
   51.70 -            "cc_compile_date":   host_record["software_version"]["cc_compile_date"],
   51.71 -            "xend_config_format":host_record["software_version"]["xend_config_format"]                                
   51.72 +            "xen_major":         getVal(["software_version", "xen_major"]),
   51.73 +            "xen_minor":         getVal(["software_version", "xen_minor"]),
   51.74 +            "xen_extra":         getVal(["software_version", "xen_extra"]),
   51.75 +            "xen_caps":          " ".join(getVal(["capabilities"], [])),
   51.76 +            "xen_scheduler":     getVal(["sched_policy"]),
   51.77 +            "xen_pagesize":      getVal(["other_config", "xen_pagesize"]),
   51.78 +            "platform_params":   getVal(["other_config", "platform_params"]),
   51.79 +            "xen_changeset":     getVal(["software_version", "xen_changeset"]),
   51.80 +            "cc_compiler":       getVal(["software_version", "cc_compiler"]),
   51.81 +            "cc_compile_by":     getVal(["software_version", "cc_compile_by"]),
   51.82 +            "cc_compile_domain": getVal(["software_version", "cc_compile_domain"]),
   51.83 +            "cc_compile_date":   getVal(["software_version", "cc_compile_date"]),
   51.84 +            "xend_config_format":getVal(["software_version", "xend_config_format"])                                
   51.85          }
   51.86  
   51.87          sorted = info.items()
    52.1 --- a/tools/python/xen/xm/xenapi_create.py	Thu Apr 12 16:37:32 2007 -0500
    52.2 +++ b/tools/python/xen/xm/xenapi_create.py	Fri Apr 13 11:14:26 2007 +0100
    52.3 @@ -48,7 +48,7 @@ def get_name_description(node):
    52.4  
    52.5  def get_text_in_child_node(node, child):
    52.6      tag_node = node.getElementsByTagName(child)[0]
    52.7 -    return tag_node.nodeValue
    52.8 +    return " ".join([child.nodeValue for child in tag_node.childNodes])
    52.9  
   52.10  def get_child_node_attribute(node, child, attribute):
   52.11      tag_node = node.getElementsByTagName(child)[0]
   52.12 @@ -212,8 +212,8 @@ class xenapi_create:
   52.13              "SR":               self.DEFAULT_STORAGE_REPOSITORY,  
   52.14              "virtual_size":     vdi.attributes["size"].value,
   52.15              "type":             vdi.attributes["type"].value,
   52.16 -            "shareable":        vdi.attributes["shareable"].value,
   52.17 -            "read_only":        vdi.attributes["read_only"].value,
   52.18 +            "sharable":         bool(vdi.attributes["sharable"].value),
   52.19 +            "read_only":        bool(vdi.attributes["read_only"].value),
   52.20              "other_config":     {"location":
   52.21                  vdi.attributes["src"].value}
   52.22              }
   52.23 @@ -264,7 +264,23 @@ class xenapi_create:
   52.24              "platform":
   52.25                  get_child_nodes_as_dict(vm, "platform", "key", "value"),
   52.26              "other_config":
   52.27 -                get_child_nodes_as_dict(vm, "other_config", "key", "value")
   52.28 +                get_child_nodes_as_dict(vm, "other_config", "key", "value"),
   52.29 +            "PV_bootloader":
   52.30 +                "",
   52.31 +            "PV_kernel":
   52.32 +                "",
   52.33 +            "PV_ramdisk":
   52.34 +                "",
   52.35 +            "PV_args":
   52.36 +                "",
   52.37 +            "PV_bootloader_args":
   52.38 +                "",
   52.39 +            "HVM_boot_policy":
   52.40 +                "",
   52.41 +            "HVM_boot_params":
   52.42 +                {},
   52.43 +            "PCI_bus":
   52.44 +               ""
   52.45              }
   52.46  
   52.47          if len(vm.getElementsByTagName("pv")) > 0:
   52.48 @@ -494,7 +510,7 @@ class sxp2xml:
   52.49          # Make version tag
   52.50  
   52.51          version = document.createElement("version")
   52.52 -        version.appendChild(document.createTextNode("1.0"))
   52.53 +        version.appendChild(document.createTextNode("0"))
   52.54          vm.appendChild(version)
   52.55          
   52.56          # Make pv or hvm tag
   52.57 @@ -629,10 +645,10 @@ class sxp2xml:
   52.58          vdi.attributes["src"] = src
   52.59          vdi.attributes["read_only"] \
   52.60              = (get_child_by_name(vbd_sxp, "mode") != "w") \
   52.61 -               and "true" or "false"
   52.62 +               and "True" or "False"
   52.63          vdi.attributes["size"] = '-1'
   52.64          vdi.attributes["type"] = "system"
   52.65 -        vdi.attributes["shareable"] = "false"
   52.66 +        vdi.attributes["sharable"] = "False"
   52.67          vdi.attributes["name"] = name
   52.68  
   52.69          vdi.appendChild(self.make_name_tag(name, document))
    53.1 --- a/tools/xcutils/xc_save.c	Thu Apr 12 16:37:32 2007 -0500
    53.2 +++ b/tools/xcutils/xc_save.c	Fri Apr 13 11:14:26 2007 +0100
    53.3 @@ -174,12 +174,9 @@ main(int argc, char **argv)
    53.4      max_f = atoi(argv[4]);
    53.5      flags = atoi(argv[5]);
    53.6  
    53.7 -    if (flags & XCFLAGS_HVM)
    53.8 -        ret = xc_hvm_save(xc_fd, io_fd, domid, maxit, max_f, flags, 
    53.9 -                          &suspend, &init_qemu_maps, &qemu_flip_buffer);
   53.10 -    else 
   53.11 -        ret = xc_linux_save(xc_fd, io_fd, domid, maxit, max_f, flags, 
   53.12 -                            &suspend);
   53.13 +    ret = xc_domain_save(xc_fd, io_fd, domid, maxit, max_f, flags, 
   53.14 +                         &suspend, !!(flags & XCFLAGS_HVM),
   53.15 +                         &init_qemu_maps, &qemu_flip_buffer);
   53.16  
   53.17      xc_interface_close(xc_fd);
   53.18  
    54.1 --- a/tools/xm-test/lib/XmTestLib/NetConfig.py	Thu Apr 12 16:37:32 2007 -0500
    54.2 +++ b/tools/xm-test/lib/XmTestLib/NetConfig.py	Fri Apr 13 11:14:26 2007 +0100
    54.3 @@ -44,7 +44,11 @@ def getXendNetConfig():
    54.4      if not xconfig:
    54.5          xconfig = "/etc/xen/xend-config.sxp"
    54.6  
    54.7 -    configfile = open(xconfig, 'r')
    54.8 +    try:
    54.9 +        configfile = open(xconfig, 'r')
   54.10 +    except:
   54.11 +        return "bridge"
   54.12 +    
   54.13      S = configfile.read()
   54.14      pin = Parser()
   54.15      pin.input(S)
    55.1 --- a/unmodified_drivers/linux-2.6/compat-include/xen/platform-compat.h	Thu Apr 12 16:37:32 2007 -0500
    55.2 +++ b/unmodified_drivers/linux-2.6/compat-include/xen/platform-compat.h	Fri Apr 13 11:14:26 2007 +0100
    55.3 @@ -2,8 +2,8 @@
    55.4  #define COMPAT_INCLUDE_XEN_PLATFORM_COMPAT_H
    55.5  
    55.6  #include <linux/version.h>
    55.7 -
    55.8  #include <linux/spinlock.h>
    55.9 +#include <asm/maddr.h>
   55.10  
   55.11  #if defined(__LINUX_COMPILER_H) && !defined(__always_inline)
   55.12  #define __always_inline inline
   55.13 @@ -98,8 +98,6 @@ extern char *kasprintf(gfp_t gfp, const 
   55.14  
   55.15  #if defined(_PAGE_PRESENT) && !defined(_PAGE_NX)
   55.16  #define _PAGE_NX 0
   55.17 -#endif
   55.18 -
   55.19  /*
   55.20   * This variable at present is referenced by netfront, but only in code that
   55.21   * is dead when running in hvm guests. To detect potential active uses of it
   55.22 @@ -107,5 +105,6 @@ extern char *kasprintf(gfp_t gfp, const 
   55.23   * mappings created with it will fault when accessed.
   55.24   */
   55.25  #define __supported_pte_mask ((maddr_t)0)
   55.26 +#endif
   55.27  
   55.28  #endif
    56.1 --- a/unmodified_drivers/linux-2.6/platform-pci/machine_reboot.c	Thu Apr 12 16:37:32 2007 -0500
    56.2 +++ b/unmodified_drivers/linux-2.6/platform-pci/machine_reboot.c	Fri Apr 13 11:14:26 2007 +0100
    56.3 @@ -6,21 +6,32 @@
    56.4  #include "platform-pci.h"
    56.5  #include <asm/hypervisor.h>
    56.6  
    56.7 +struct ap_suspend_info {
    56.8 +	int      do_spin;
    56.9 +	atomic_t nr_spinning;
   56.10 +};
   56.11 +
   56.12  /*
   56.13   * Spinning prevents, for example, APs touching grant table entries while
   56.14   * the shared grant table is not mapped into the address space imemdiately
   56.15   * after resume.
   56.16   */
   56.17 -static void ap_suspend(void *_ap_spin)
   56.18 +static void ap_suspend(void *_info)
   56.19  {
   56.20 -	int *ap_spin = _ap_spin;
   56.21 +	struct ap_suspend_info *info = _info;
   56.22  
   56.23  	BUG_ON(!irqs_disabled());
   56.24  
   56.25 -	while (*ap_spin) {
   56.26 +	atomic_inc(&info->nr_spinning);
   56.27 +	mb();
   56.28 +
   56.29 +	while (info->do_spin) {
   56.30  		cpu_relax();
   56.31  		HYPERVISOR_yield();
   56.32  	}
   56.33 +
   56.34 +	mb();
   56.35 +	atomic_dec(&info->nr_spinning);
   56.36  }
   56.37  
   56.38  static int bp_suspend(void)
   56.39 @@ -42,7 +53,8 @@ static int bp_suspend(void)
   56.40  
   56.41  int __xen_suspend(int fast_suspend)
   56.42  {
   56.43 -	int err, suspend_cancelled, ap_spin;
   56.44 +	int err, suspend_cancelled, nr_cpus;
   56.45 +	struct ap_suspend_info info;
   56.46  
   56.47  	xenbus_suspend();
   56.48  
   56.49 @@ -51,22 +63,30 @@ int __xen_suspend(int fast_suspend)
   56.50  	/* Prevent any races with evtchn_interrupt() handler. */
   56.51  	disable_irq(xen_platform_pdev->irq);
   56.52  
   56.53 -	ap_spin = 1;
   56.54 +	info.do_spin = 1;
   56.55 +	atomic_set(&info.nr_spinning, 0);
   56.56  	smp_mb();
   56.57  
   56.58 -	err = smp_call_function(ap_suspend, &ap_spin, 0, 0);
   56.59 +	nr_cpus = num_online_cpus() - 1;
   56.60 +
   56.61 +	err = smp_call_function(ap_suspend, &info, 0, 0);
   56.62  	if (err < 0) {
   56.63  		preempt_enable();
   56.64  		xenbus_suspend_cancel();
   56.65  		return err;
   56.66  	}
   56.67  
   56.68 +	while (atomic_read(&info.nr_spinning) != nr_cpus)
   56.69 +		cpu_relax();
   56.70 +
   56.71  	local_irq_disable();
   56.72  	suspend_cancelled = bp_suspend();
   56.73  	local_irq_enable();
   56.74  
   56.75  	smp_mb();
   56.76 -	ap_spin = 0;
   56.77 +	info.do_spin = 0;
   56.78 +	while (atomic_read(&info.nr_spinning) != 0)
   56.79 +		cpu_relax();
   56.80  
   56.81  	enable_irq(xen_platform_pdev->irq);
   56.82  
    57.1 --- a/xen/arch/ia64/xen/hyperprivop.S	Thu Apr 12 16:37:32 2007 -0500
    57.2 +++ b/xen/arch/ia64/xen/hyperprivop.S	Fri Apr 13 11:14:26 2007 +0100
    57.3 @@ -304,6 +304,8 @@ ENTRY(hyper_ssm_i)
    57.4  	;;
    57.5  	adds r2=XSI_BANK1_R16_OFS-XSI_PSR_IC_OFS,r18;
    57.6  	adds r3=(XSI_BANK1_R16_OFS+8)-XSI_PSR_IC_OFS,r18;;
    57.7 +	// temporarily save ar.unat
    57.8 +	mov r28=ar.unat   
    57.9  	bsw.1;;
   57.10  	// FIXME?: ar.unat is not really handled correctly,
   57.11  	// but may not matter if the OS is NaT-clean
   57.12 @@ -324,6 +326,12 @@ ENTRY(hyper_ssm_i)
   57.13  	.mem.offset 0,0; st8.spill [r2]=r30,16;
   57.14  	.mem.offset 8,0; st8.spill [r3]=r31,16 ;;
   57.15  	bsw.0 ;;
   57.16 +	mov r27=ar.unat
   57.17 +	adds r26=XSI_B1NATS_OFS-XSI_PSR_IC_OFS,r18 ;;
   57.18 +	//save bank1 ar.unat
   57.19 +	st8 [r26]=r27
   57.20 +	//restore ar.unat
   57.21 +	mov ar.unat=r28
   57.22  	mov r2=r30
   57.23  	mov r3=r29
   57.24  	adds r20=XSI_BANKNUM_OFS-XSI_PSR_IC_OFS,r18 ;;
   57.25 @@ -1518,8 +1526,10 @@ ENTRY(hyper_get_psr)
   57.26  	adds r20=XSI_PSR_I_ADDR_OFS-XSI_PSR_IC_OFS,r18 ;;
   57.27  	ld8 r20=[r20];;
   57.28  	ld1 r21=[r20];;
   57.29 -	dep r8=r21,r8,IA64_PSR_I_BIT,1
   57.30 +	cmp.eq p8,p9=r0,r21
   57.31  	;;
   57.32 +(p8)	dep r8=-1,r8,IA64_PSR_I_BIT,1
   57.33 +(p9)	dep r8=0,r8,IA64_PSR_I_BIT,1
   57.34  	// set vpsr.dfh
   57.35  	adds r20=XSI_VPSR_DFH_OFS-XSI_PSR_IC_OFS,r18;;
   57.36  	ld1 r21=[r20];;
    58.1 --- a/xen/arch/ia64/xen/mm.c	Thu Apr 12 16:37:32 2007 -0500
    58.2 +++ b/xen/arch/ia64/xen/mm.c	Fri Apr 13 11:14:26 2007 +0100
    58.3 @@ -673,7 +673,7 @@ unsigned long lookup_domain_mpa(struct d
    58.4      } else if (mpaddr - IO_PORTS_PADDR < IO_PORTS_SIZE) {
    58.5          /* Log I/O port probing, but complain less loudly about it */
    58.6          gdprintk(XENLOG_INFO, "vcpu %d iip 0x%016lx: bad I/O port access "
    58.7 -                 "0x%lx\n ", current->vcpu_id, PSCB(current, iip),
    58.8 +                 "0x%lx\n", current->vcpu_id, PSCB(current, iip),
    58.9                   IO_SPACE_SPARSE_DECODING(mpaddr - IO_PORTS_PADDR));
   58.10      } else {
   58.11          gdprintk(XENLOG_WARNING, "vcpu %d iip 0x%016lx: bad mpa 0x%lx "
    59.1 --- a/xen/arch/x86/hvm/hvm.c	Thu Apr 12 16:37:32 2007 -0500
    59.2 +++ b/xen/arch/x86/hvm/hvm.c	Fri Apr 13 11:14:26 2007 +0100
    59.3 @@ -191,6 +191,7 @@ static int hvm_save_cpu_ctxt(struct doma
    59.4  {
    59.5      struct vcpu *v;
    59.6      struct hvm_hw_cpu ctxt;
    59.7 +    struct vcpu_guest_context *vc;
    59.8  
    59.9      for_each_vcpu(d, v)
   59.10      {
   59.11 @@ -199,7 +200,40 @@ static int hvm_save_cpu_ctxt(struct doma
   59.12          if ( test_bit(_VPF_down, &v->pause_flags) ) 
   59.13              continue;
   59.14  
   59.15 +        /* Architecture-specific vmcs/vmcb bits */
   59.16          hvm_funcs.save_cpu_ctxt(v, &ctxt);
   59.17 +
   59.18 +        /* Other vcpu register state */
   59.19 +        vc = &v->arch.guest_context;
   59.20 +        if ( vc->flags & VGCF_i387_valid )
   59.21 +            memcpy(ctxt.fpu_regs, &vc->fpu_ctxt, sizeof(ctxt.fpu_regs));
   59.22 +        else 
   59.23 +            memset(ctxt.fpu_regs, 0, sizeof(ctxt.fpu_regs));
   59.24 +        ctxt.rax = vc->user_regs.eax;
   59.25 +        ctxt.rbx = vc->user_regs.ebx;
   59.26 +        ctxt.rcx = vc->user_regs.ecx;
   59.27 +        ctxt.rdx = vc->user_regs.edx;
   59.28 +        ctxt.rbp = vc->user_regs.ebp;
   59.29 +        ctxt.rsi = vc->user_regs.esi;
   59.30 +        ctxt.rdi = vc->user_regs.edi;
   59.31 +        /* %rsp handled by arch-specific call above */
   59.32 +#ifdef __x86_64__        
   59.33 +        ctxt.r8  = vc->user_regs.r8;
   59.34 +        ctxt.r9  = vc->user_regs.r9;
   59.35 +        ctxt.r10 = vc->user_regs.r10;
   59.36 +        ctxt.r11 = vc->user_regs.r11;
   59.37 +        ctxt.r12 = vc->user_regs.r12;
   59.38 +        ctxt.r13 = vc->user_regs.r13;
   59.39 +        ctxt.r14 = vc->user_regs.r14;
   59.40 +        ctxt.r15 = vc->user_regs.r15;
   59.41 +#endif
   59.42 +        ctxt.dr0 = vc->debugreg[0];
   59.43 +        ctxt.dr1 = vc->debugreg[1];
   59.44 +        ctxt.dr2 = vc->debugreg[2];
   59.45 +        ctxt.dr3 = vc->debugreg[3];
   59.46 +        ctxt.dr6 = vc->debugreg[6];
   59.47 +        ctxt.dr7 = vc->debugreg[7];
   59.48 +
   59.49          if ( hvm_save_entry(CPU, v->vcpu_id, h, &ctxt) != 0 )
   59.50              return 1; 
   59.51      }
   59.52 @@ -208,9 +242,10 @@ static int hvm_save_cpu_ctxt(struct doma
   59.53  
   59.54  static int hvm_load_cpu_ctxt(struct domain *d, hvm_domain_context_t *h)
   59.55  {
   59.56 -    int vcpuid;
   59.57 +    int vcpuid, rc;
   59.58      struct vcpu *v;
   59.59      struct hvm_hw_cpu ctxt;
   59.60 +    struct vcpu_guest_context *vc;
   59.61  
   59.62      /* Which vcpu is this? */
   59.63      vcpuid = hvm_load_instance(h);
   59.64 @@ -219,13 +254,52 @@ static int hvm_load_cpu_ctxt(struct doma
   59.65          gdprintk(XENLOG_ERR, "HVM restore: domain has no vcpu %u\n", vcpuid);
   59.66          return -EINVAL;
   59.67      }
   59.68 +    vc = &v->arch.guest_context;
   59.69 +
   59.70 +    /* Need to init this vcpu before loading its contents */
   59.71 +    LOCK_BIGLOCK(d);
   59.72 +    if ( !v->is_initialised )
   59.73 +        if ( (rc = boot_vcpu(d, vcpuid, vc)) != 0 )
   59.74 +            return rc;
   59.75 +    UNLOCK_BIGLOCK(d);
   59.76  
   59.77      if ( hvm_load_entry(CPU, h, &ctxt) != 0 ) 
   59.78          return -EINVAL;
   59.79  
   59.80 +    /* Architecture-specific vmcs/vmcb bits */
   59.81      if ( hvm_funcs.load_cpu_ctxt(v, &ctxt) < 0 )
   59.82          return -EINVAL;
   59.83  
   59.84 +    /* Other vcpu register state */
   59.85 +    memcpy(&vc->fpu_ctxt, ctxt.fpu_regs, sizeof(ctxt.fpu_regs));
   59.86 +    vc->user_regs.eax = ctxt.rax;
   59.87 +    vc->user_regs.ebx = ctxt.rbx;
   59.88 +    vc->user_regs.ecx = ctxt.rcx;
   59.89 +    vc->user_regs.edx = ctxt.rdx;
   59.90 +    vc->user_regs.ebp = ctxt.rbp;
   59.91 +    vc->user_regs.esi = ctxt.rsi;
   59.92 +    vc->user_regs.edi = ctxt.rdi;
   59.93 +    vc->user_regs.esp = ctxt.rsp;
   59.94 +#ifdef __x86_64__
   59.95 +    vc->user_regs.r8  = ctxt.r8; 
   59.96 +    vc->user_regs.r9  = ctxt.r9; 
   59.97 +    vc->user_regs.r10 = ctxt.r10;
   59.98 +    vc->user_regs.r11 = ctxt.r11;
   59.99 +    vc->user_regs.r12 = ctxt.r12;
  59.100 +    vc->user_regs.r13 = ctxt.r13;
  59.101 +    vc->user_regs.r14 = ctxt.r14;
  59.102 +    vc->user_regs.r15 = ctxt.r15;
  59.103 +#endif
  59.104 +    vc->debugreg[0] = ctxt.dr0;
  59.105 +    vc->debugreg[1] = ctxt.dr1;
  59.106 +    vc->debugreg[2] = ctxt.dr2;
  59.107 +    vc->debugreg[3] = ctxt.dr3;
  59.108 +    vc->debugreg[6] = ctxt.dr6;
  59.109 +    vc->debugreg[7] = ctxt.dr7;
  59.110 +
  59.111 +    vc->flags = VGCF_i387_valid | VGCF_online;
  59.112 +    v->fpu_initialised = 1;
  59.113 +
  59.114      /* Auxiliary processors should be woken immediately. */
  59.115      if ( test_and_clear_bit(_VPF_down, &v->pause_flags) )
  59.116          vcpu_wake(v);
    60.1 --- a/xen/arch/x86/hvm/svm/svm.c	Thu Apr 12 16:37:32 2007 -0500
    60.2 +++ b/xen/arch/x86/hvm/svm/svm.c	Fri Apr 13 11:14:26 2007 +0100
    60.3 @@ -233,7 +233,7 @@ int svm_vmcb_save(struct vcpu *v, struct
    60.4  {
    60.5      struct vmcb_struct *vmcb = v->arch.hvm_svm.vmcb;
    60.6  
    60.7 -    c->eip = vmcb->rip;
    60.8 +    c->rip = vmcb->rip;
    60.9  
   60.10  #ifdef HVM_DEBUG_SUSPEND
   60.11      printk("%s: eip=0x%"PRIx64".\n", 
   60.12 @@ -241,10 +241,11 @@ int svm_vmcb_save(struct vcpu *v, struct
   60.13             inst_len, c->eip);
   60.14  #endif
   60.15  
   60.16 -    c->esp = vmcb->rsp;
   60.17 -    c->eflags = vmcb->rflags;
   60.18 +    c->rsp = vmcb->rsp;
   60.19 +    c->rflags = vmcb->rflags;
   60.20  
   60.21      c->cr0 = v->arch.hvm_svm.cpu_shadow_cr0;
   60.22 +    c->cr2 = v->arch.hvm_svm.cpu_cr2;
   60.23      c->cr3 = v->arch.hvm_svm.cpu_cr3;
   60.24      c->cr4 = v->arch.hvm_svm.cpu_shadow_cr4;
   60.25  
   60.26 @@ -315,15 +316,17 @@ int svm_vmcb_restore(struct vcpu *v, str
   60.27      unsigned long mfn, old_base_mfn;
   60.28      struct vmcb_struct *vmcb = v->arch.hvm_svm.vmcb;
   60.29  
   60.30 -    vmcb->rip    = c->eip;
   60.31 -    vmcb->rsp    = c->esp;
   60.32 -    vmcb->rflags = c->eflags;
   60.33 +    vmcb->rip    = c->rip;
   60.34 +    vmcb->rsp    = c->rsp;
   60.35 +    vmcb->rflags = c->rflags;
   60.36  
   60.37      v->arch.hvm_svm.cpu_shadow_cr0 = c->cr0;
   60.38      vmcb->cr0 = c->cr0 | X86_CR0_WP | X86_CR0_ET;
   60.39      if ( !paging_mode_hap(v->domain) ) 
   60.40          vmcb->cr0 |= X86_CR0_PG;
   60.41  
   60.42 +    v->arch.hvm_svm.cpu_cr2 = c->cr2;
   60.43 +
   60.44  #ifdef HVM_DEBUG_SUSPEND
   60.45      printk("%s: cr3=0x%"PRIx64", cr0=0x%"PRIx64", cr4=0x%"PRIx64".\n",
   60.46             __func__,
   60.47 @@ -421,6 +424,9 @@ int svm_vmcb_restore(struct vcpu *v, str
   60.48      vmcb->sysenter_esp = c->sysenter_esp;
   60.49      vmcb->sysenter_eip = c->sysenter_eip;
   60.50  
   60.51 +    vmcb->dr6 = c->dr6;
   60.52 +    vmcb->dr7 = c->dr7;
   60.53 +
   60.54      paging_update_paging_modes(v);
   60.55      return 0;
   60.56   
   60.57 @@ -440,6 +446,7 @@ void svm_save_cpu_state(struct vcpu *v, 
   60.58      data->msr_cstar        = vmcb->cstar;
   60.59      data->msr_syscall_mask = vmcb->sfmask;
   60.60      data->msr_efer         = v->arch.hvm_svm.cpu_shadow_efer;
   60.61 +    data->msr_flags        = -1ULL;
   60.62  
   60.63      data->tsc = hvm_get_guest_time(v);
   60.64  }
    61.1 --- a/xen/arch/x86/hvm/vmx/vmx.c	Thu Apr 12 16:37:32 2007 -0500
    61.2 +++ b/xen/arch/x86/hvm/vmx/vmx.c	Fri Apr 13 11:14:26 2007 +0100
    61.3 @@ -370,11 +370,12 @@ static inline void __restore_debug_regis
    61.4  
    61.5  int vmx_vmcs_save(struct vcpu *v, struct hvm_hw_cpu *c)
    61.6  {    
    61.7 -    c->eip = __vmread(GUEST_RIP);
    61.8 -    c->esp = __vmread(GUEST_RSP);
    61.9 -    c->eflags = __vmread(GUEST_RFLAGS);
   61.10 +    c->rip = __vmread(GUEST_RIP);
   61.11 +    c->rsp = __vmread(GUEST_RSP);
   61.12 +    c->rflags = __vmread(GUEST_RFLAGS);
   61.13  
   61.14      c->cr0 = v->arch.hvm_vmx.cpu_shadow_cr0;
   61.15 +    c->cr2 = v->arch.hvm_vmx.cpu_cr2;
   61.16      c->cr3 = v->arch.hvm_vmx.cpu_cr3;
   61.17      c->cr4 = v->arch.hvm_vmx.cpu_shadow_cr4;
   61.18  
   61.19 @@ -444,13 +445,15 @@ int vmx_vmcs_restore(struct vcpu *v, str
   61.20  
   61.21      vmx_vmcs_enter(v);
   61.22  
   61.23 -    __vmwrite(GUEST_RIP, c->eip);
   61.24 -    __vmwrite(GUEST_RSP, c->esp);
   61.25 -    __vmwrite(GUEST_RFLAGS, c->eflags);
   61.26 +    __vmwrite(GUEST_RIP, c->rip);
   61.27 +    __vmwrite(GUEST_RSP, c->rsp);
   61.28 +    __vmwrite(GUEST_RFLAGS, c->rflags);
   61.29  
   61.30      v->arch.hvm_vmx.cpu_shadow_cr0 = c->cr0;
   61.31      __vmwrite(CR0_READ_SHADOW, v->arch.hvm_vmx.cpu_shadow_cr0);
   61.32  
   61.33 +    v->arch.hvm_vmx.cpu_cr2 = c->cr2;
   61.34 +
   61.35  #ifdef HVM_DEBUG_SUSPEND
   61.36      printk("vmx_vmcs_restore: cr3=0x%"PRIx64", cr0=0x%"PRIx64", cr4=0x%"PRIx64".\n",
   61.37              c->cr3,
   61.38 @@ -555,6 +558,8 @@ int vmx_vmcs_restore(struct vcpu *v, str
   61.39      __vmwrite(GUEST_SYSENTER_ESP, c->sysenter_esp);
   61.40      __vmwrite(GUEST_SYSENTER_EIP, c->sysenter_eip);
   61.41  
   61.42 +    __vmwrite(GUEST_DR7, c->dr7);
   61.43 +
   61.44      vmx_vmcs_exit(v);
   61.45  
   61.46      paging_update_paging_modes(v);
   61.47 @@ -590,7 +595,7 @@ void vmx_save_cpu_state(struct vcpu *v, 
   61.48      data->shadow_gs = guest_state->shadow_gs;
   61.49  
   61.50      /* save msrs */
   61.51 -    data->flags = guest_flags;
   61.52 +    data->msr_flags        = guest_flags;
   61.53      data->msr_lstar        = guest_state->msrs[VMX_INDEX_MSR_LSTAR];
   61.54      data->msr_star         = guest_state->msrs[VMX_INDEX_MSR_STAR];
   61.55      data->msr_cstar        = guest_state->msrs[VMX_INDEX_MSR_CSTAR];
   61.56 @@ -607,7 +612,7 @@ void vmx_load_cpu_state(struct vcpu *v, 
   61.57      struct vmx_msr_state *guest_state = &v->arch.hvm_vmx.msr_state;
   61.58  
   61.59      /* restore msrs */
   61.60 -    guest_state->flags = data->flags;
   61.61 +    guest_state->flags = data->msr_flags;
   61.62      guest_state->msrs[VMX_INDEX_MSR_LSTAR]        = data->msr_lstar;
   61.63      guest_state->msrs[VMX_INDEX_MSR_STAR]         = data->msr_star;
   61.64      guest_state->msrs[VMX_INDEX_MSR_CSTAR]        = data->msr_cstar;
    62.1 --- a/xen/include/public/hvm/save.h	Thu Apr 12 16:37:32 2007 -0500
    62.2 +++ b/xen/include/public/hvm/save.h	Fri Apr 13 11:14:26 2007 +0100
    62.3 @@ -87,13 +87,40 @@ DECLARE_HVM_SAVE_TYPE(HEADER, 1, struct 
    62.4   */
    62.5  
    62.6  struct hvm_hw_cpu {
    62.7 -    uint64_t eip;
    62.8 -    uint64_t esp;
    62.9 -    uint64_t eflags;
   62.10 +    uint8_t  fpu_regs[512];
   62.11 +
   62.12 +    uint64_t rax;
   62.13 +    uint64_t rbx;
   62.14 +    uint64_t rcx;
   62.15 +    uint64_t rdx;
   62.16 +    uint64_t rbp;
   62.17 +    uint64_t rsi;
   62.18 +    uint64_t rdi;
   62.19 +    uint64_t rsp;
   62.20 +    uint64_t r8;
   62.21 +    uint64_t r9;
   62.22 +    uint64_t r10;
   62.23 +    uint64_t r11;
   62.24 +    uint64_t r12;
   62.25 +    uint64_t r13;
   62.26 +    uint64_t r14;
   62.27 +    uint64_t r15;
   62.28 +
   62.29 +    uint64_t rip;
   62.30 +    uint64_t rflags;
   62.31 +
   62.32      uint64_t cr0;
   62.33 +    uint64_t cr2;
   62.34      uint64_t cr3;
   62.35      uint64_t cr4;
   62.36  
   62.37 +    uint64_t dr0;
   62.38 +    uint64_t dr1;
   62.39 +    uint64_t dr2;
   62.40 +    uint64_t dr3;
   62.41 +    uint64_t dr6;
   62.42 +    uint64_t dr7;    
   62.43 +
   62.44      uint32_t cs_sel;
   62.45      uint32_t ds_sel;
   62.46      uint32_t es_sel;
   62.47 @@ -142,9 +169,9 @@ struct hvm_hw_cpu {
   62.48  
   62.49      /* msr for em64t */
   62.50      uint64_t shadow_gs;
   62.51 -    uint64_t flags;
   62.52  
   62.53      /* msr content saved/restored. */
   62.54 +    uint64_t msr_flags;
   62.55      uint64_t msr_lstar;
   62.56      uint64_t msr_star;
   62.57      uint64_t msr_cstar;