ia64/xen-unstable

changeset 8598:f5ab28885c75

Merge
author djm@kirby.fc.hp.com
date Fri Jan 13 14:12:24 2006 -0600 (2006-01-13)
parents 2884f14895d2 3f702887d4a6
children fb80697dbcf9
files xen/arch/ia64/xen/xensetup.c xen/include/asm-ia64/config.h
line diff
     1.1 --- a/docs/src/user.tex	Fri Jan 13 10:38:44 2006 -0600
     1.2 +++ b/docs/src/user.tex	Fri Jan 13 14:12:24 2006 -0600
     1.3 @@ -1885,6 +1885,220 @@ mailing lists and subscription informati
     1.4  
     1.5  \appendix
     1.6  
     1.7 +\chapter{Unmodified (VMX) guest domains in Xen with Intel\textregistered Virtualization Technology (VT)}
     1.8 +
     1.9 +Xen supports guest domains running unmodified Guest operating systems using Virtualization Technology (VT) available on recent Intel Processors. More information about the Intel Virtualization Technology implementing Virtual Machine Extensions (VMX) in the processor is available on the Intel website at \\
    1.10 + {\small {\tt http://www.intel.com/technology/computing/vptech}}
    1.11 +
    1.12 +\section{Building Xen with VT support}
    1.13 +
    1.14 +The following packages need to be installed in order to build Xen with VT support. Some Linux distributions do not provide these packages by default.
    1.15 +
    1.16 +\begin{tabular}{lp{11.0cm}}
    1.17 +{\bfseries Package} & {\bfseries Description} \\
    1.18 +
    1.19 +dev86 & The dev86 package provides an assembler and linker for real mode 80x86 instructions. You need to have this package installed in order to build the BIOS code which runs in (virtual) real mode. 
    1.20 +
    1.21 +If the dev86 package is not available on the x86\_64 distribution, you can install the i386 version of it. The dev86 rpm package for various distributions can be found at {\scriptsize {\tt http://www.rpmfind.net/linux/rpm2html/search.php?query=dev86\&submit=Search}} \\
    1.22 +
    1.23 +LibVNCServer & The unmodified guest's VGA display, keyboard, and mouse are virtualized using the vncserver library provided by this package. You can get the sources of libvncserver from {\small {\tt http://sourceforge.net/projects/libvncserver}}. Build and install the sources on the build system to get the libvncserver library. The 0.8pre version of libvncserver is currently working well with Xen.\\
    1.24 +
    1.25 +SDL-devel, SDL & Simple DirectMedia Layer (SDL) is another way of virtualizing the unmodified guest console. It provides an X window for the guest console. 
    1.26 +
    1.27 +If the SDL and SDL-devel packages are not installed by default on the build system, they can be obtained from  {\scriptsize {\tt http://www.rpmfind.net/linux/rpm2html/search.php?query=SDL\&submit=Search}}
    1.28 +, {\scriptsize {\tt http://www.rpmfind.net/linux/rpm2html/search.php?query=SDL-devel\&submit=Search}} \\
    1.29 +
    1.30 +\end{tabular}
    1.31 +
    1.32 +\section{Configuration file for unmodified VMX guests}
    1.33 +
    1.34 +The Xen installation includes a sample configuration file, {\small {\tt /etc/xen/xmexample.vmx}}. There are comments describing all the options. In addition to the common options that are the same as those for paravirtualized guest configurations, VMX guest configurations have the following settings:
    1.35 +
    1.36 +\begin{tabular}{lp{11.0cm}}
    1.37 +
    1.38 +{\bfseries Parameter} & {\bfseries Description} \\
    1.39 +
    1.40 +kernel &        The VMX firmware loader, {\small {\tt /usr/lib/xen/boot/vmxloader}}\\
    1.41 +
    1.42 +builder &       The domain build function. The VMX domain uses the vmx builder.\\
    1.43 +
    1.44 +acpi & Enable VMX guest ACPI, default=0 (disabled)\\
    1.45 +
    1.46 +apic & Enable VMX guest APIC, default=0 (disabled)\\
    1.47 +
    1.48 +vif     & Optionally defines MAC address and/or bridge for the network interfaces. Random MACs are assigned if not given. {\small {\tt type=ioemu}} means ioemu is used to virtualize the VMX NIC. If no type is specified, vbd is used, as with paravirtualized guests.\\
    1.49 +
    1.50 +disk & Defines the disk devices you want the domain to have access to, and what you want them accessible as. If using a physical device as the VMX guest's disk, each disk entry is of the form 
    1.51 +
    1.52 +{\small {\tt phy:UNAME,ioemu:DEV,MODE,}}
    1.53 +
    1.54 +where UNAME is the device, DEV is the device name the domain will see, and MODE is r for read-only, w for read-write. ioemu means the disk will use ioemu to virtualize the VMX disk. If not adding ioemu, it uses vbd like paravirtualized guests.
    1.55 +
    1.56 +If using disk image file, its form should be like 
    1.57 +
    1.58 +{\small {\tt file:FILEPATH,ioemu:DEV,MODE}}
    1.59 +
    1.60 +If using more than one disk, there should be a comma between each disk entry. For example:
    1.61 +
    1.62 +{\scriptsize {\tt disk = ['file:/var/images/image1.img,ioemu:hda,w', 'file:/var/images/image2.img,ioemu:hdb,w']}}\\
    1.63 +
    1.64 +cdrom   & Disk image for CD-ROM. The default is {\small {\tt /dev/cdrom}} for Domain0. Inside the VMX domain, the CD-ROM will available as device {\small {\tt /dev/hdc}}. The entry can also point to an ISO file.\\
    1.65 +
    1.66 +boot & Boot from floppy (a), hard disk (c) or CD-ROM (d). For example, to boot from CD-ROM, the entry should be:
    1.67 +
    1.68 +boot='d'\\
    1.69 +
    1.70 +device\_model & The device emulation tool for VMX guests. This parameter should not be changed.\\
    1.71 +
    1.72 +sdl &   Enable SDL library for graphics, default = 0 (disabled)\\
    1.73 +
    1.74 +vnc &   Enable VNC library for graphics, default = 1 (enabled)\\
    1.75 +
    1.76 +vncviewer &     Enable spawning of the vncviewer (only valid when vnc=1), default = 1 (enabled)
    1.77 +
    1.78 +If vnc=1 and vncviewer=0, user can use vncviewer to manually connect VMX from remote. For example:
    1.79 +
    1.80 +{\small {\tt vncviewer domain0\_IP\_address:VMX\_domain\_id}} \\
    1.81 +
    1.82 +ne2000 &        Enable ne2000, default = 0 (disabled; use pcnet)\\
    1.83 +
    1.84 +serial &        Enable redirection of VMX serial output to pty device\\
    1.85 +
    1.86 +localtime &     Set the real time clock to local time [default=0, that is, set to UTC].\\
    1.87 +
    1.88 +enable-audio &  Enable audio support. This is under development.\\
    1.89 +
    1.90 +full-screen     & Start in full screen. This is under development.\\
    1.91 +
    1.92 +nographic &     Another way to redirect serial output. If enabled, no 'sdl' or 'vnc' can work. Not recommended.\\
    1.93 +
    1.94 +\end{tabular}
    1.95 +
    1.96 +
    1.97 +\section{Creating virtual disks from scratch}
    1.98 +\subsection{Using physical disks}
    1.99 +If you are using a physical disk or physical disk partition, you need to install a Linux OS on the disk first. Then the boot loader should be installed in the correct place. For example {\small {\tt dev/sda}} for booting from the whole disk, or {\small {\tt /dev/sda1}} for booting from partition 1.
   1.100 +
   1.101 +\subsection{Using disk image files}
   1.102 +You need to create a large empty disk image file first; then, you need to install a Linux OS onto it. There are two methods you can choose. One is directly installing it using a VMX guest while booting from the OS installation CD-ROM. The other is copying an installed OS into it. The boot loader will also need to be installed.
   1.103 +
   1.104 +\subsubsection*{To create the image file:}
   1.105 +The image size should be big enough to accommodate the entire OS. This example assumes the size is 1G (which is probably too small for most OSes).
   1.106 +
   1.107 +{\small {\tt \# dd if=/dev/zero of=hd.img bs=1M count=1 seek=1023}}
   1.108 +
   1.109 +\subsubsection*{To directly install Linux OS into an image file using a VMX guest:}
   1.110 +
   1.111 +Install Xen and create VMX with the original image file with booting from CD-ROM. Then it is just like a normal Linux OS installation. The VMX configuration file should have these two entries before creating:
   1.112 +
   1.113 +{\small {\tt cdrom='/dev/cdrom'
   1.114 +boot='d'}}
   1.115 +
   1.116 +If this method does not succeed, you can choose the following method of copying an installed Linux OS into an image file.
   1.117 +
   1.118 +\subsubsection*{To copy a installed OS into an image file:}
   1.119 +Directly installing is an easier way to make partitions and install an OS in a disk image file. But if you want to create a specific OS in your disk image, then you will most likely want to use this method.
   1.120 +
   1.121 +\begin{enumerate}
   1.122 +\item {\bfseries Install a normal Linux OS on the host machine}\\
   1.123 +You can choose any way to install Linux, such as using yum to install Red Hat Linux or YAST to install Novell SuSE Linux. The rest of this example assumes the Linux OS is installed in {\small {\tt /var/guestos/}}.
   1.124 +
   1.125 +\item {\bfseries Make the partition table}\\
   1.126 +The image file will be treated as hard disk, so you should make the partition table in the image file. For example:
   1.127 +
   1.128 +{\scriptsize {\tt \# losetup /dev/loop0 hd.img\\
   1.129 +\# fdisk -b 512 -C 4096 -H 16 -S 32 /dev/loop0\\
   1.130 +press 'n' to add new partition\\
   1.131 +press 'p' to choose primary partition\\
   1.132 +press '1' to set partition number\\
   1.133 +press "Enter" keys to choose default value of "First Cylinder" parameter.\\
   1.134 +press "Enter" keys to choose default value of "Last Cylinder" parameter.\\
   1.135 +press 'w' to write partition table and exit\\
   1.136 +\# losetup -d /dev/loop0}}
   1.137 +
   1.138 +\item {\bfseries Make the file system and install grub}\\
   1.139 +{\scriptsize {\tt \# ln -s /dev/loop0 /dev/loop\\
   1.140 +\# losetup /dev/loop0 hd.img\\
   1.141 +\# losetup -o 16384 /dev/loop1 hd.img\\
   1.142 +\# mkfs.ext3 /dev/loop1\\
   1.143 +\# mount /dev/loop1 /mnt\\
   1.144 +\# mkdir -p /mnt/boot/grub\\
   1.145 +\# cp /boot/grub/stage* /boot/grub/e2fs\_stage1\_5 /mnt/boot/grub\\
   1.146 +\# umount /mnt\\
   1.147 +\# grub\\
   1.148 +grub> device (hd0) /dev/loop\\
   1.149 +grub> root (hd0,0)\\
   1.150 +grub> setup (hd0)\\
   1.151 +grub> quit\\
   1.152 +\# rm /dev/loop\\
   1.153 +\# losetup -d /dev/loop0\\
   1.154 +\# losetup -d /dev/loop1}}
   1.155 +
   1.156 +The {\small {\tt losetup}} option {\small {\tt -o 16384}} skips the partition table in the image file. It is the number of sectors times 512. We need {\small {\tt /dev/loop}} because grub is expecting a disk device \emph{name}, where \emph{name} represents the entire disk and \emph{name1} represents the first partition.
   1.157 +
   1.158 +\item {\bfseries Copy the OS files to the image}\\ 
   1.159 +If you have Xen installed, you can easily use {\small {\tt lomount}} instead of {\small {\tt losetup}} and {\small {\tt mount}} when coping files to some partitions. {\small {\tt lomount}} just needs the partition information.
   1.160 +
   1.161 +{\scriptsize {\tt \# lomount -t ext3 -diskimage hd.img -partition 1 /mnt/guest\\
   1.162 +\# cp -ax /var/guestos/\{root,dev,var,etc,usr,bin,sbin,lib\} /mnt/guest\\
   1.163 +\# mkdir /mnt/guest/\{proc,sys,home,tmp\}}}
   1.164 +
   1.165 +\item {\bfseries Edit the {\small {\tt /etc/fstab}} of the guest image}\\
   1.166 +The fstab should look like this:
   1.167 +
   1.168 +{\scriptsize {\tt \# vim /mnt/guest/etc/fstab\\
   1.169 +/dev/hda1       /               ext3            defaults 1 1\\
   1.170 +none            /dev/pts        devpts  gid=5,mode=620 0 0\\
   1.171 +none            /dev/shm        tmpfs           defaults 0 0\\
   1.172 +none            /proc           proc            defaults 0 0\\
   1.173 +none            /sys            sysfs           efaults 0 0}}
   1.174 +
   1.175 +\item {\bfseries umount the image file}\\
   1.176 +{\small {\tt \# umount /mnt/guest}}
   1.177 +\end{enumerate}
   1.178 +
   1.179 +Now, the guest OS image {\small {\tt hd.img}} is ready. You can also reference {\small {\tt http://free.oszoo.org}} for quickstart images. But make sure to install the boot loader.
   1.180 +
   1.181 +\subsection{Install Windows into an Image File using a VMX guest}
   1.182 +In order to install a Windows OS, you should keep {\small {\tt acpi=0}} in your VMX configuration file.
   1.183 +
   1.184 +\section{VMX Guests}
   1.185 +\subsection{Editing the Xen VMX config file}
   1.186 +Make a copy of the example VMX configuration file {\small {\tt /etc/xen/xmeaxmple.vmx}} and edit the line that reads
   1.187 +
   1.188 +{\small {\tt disk = [ 'file:/var/images/\emph{guest.img},ioemu:hda,w' ]}}
   1.189 +
   1.190 +replacing \emph{guest.img} with the name of the guest OS image file you just made.
   1.191 +
   1.192 +\subsection{Creating VMX guests}
   1.193 +Simply follow the usual method of creating the guest, using the -f parameter and providing the filename of your VMX configuration file:\\
   1.194 +
   1.195 +{\small {\tt \# xend start\\
   1.196 +\# xm create /etc/xen/vmxguest.vmx}}
   1.197 +
   1.198 +In the default configuration, VNC is on and SDL is off. Therefore VNC windows will open when VMX guests are created. If you want to use SDL to create VMX guests, set {\small {\tt sdl=1}} in your VMX configuration file. You can also turn off VNC by setting {\small {\tt vnc=0}}.
   1.199 + 
   1.200 +\subsection{Destroy VMX guests}
   1.201 +VMX guests can be destroyed in the same way as can paravirtualized guests. We recommend that you type the command 
   1.202 +
   1.203 +{\small {\tt poweroff}} 
   1.204 +
   1.205 +in the VMX guest's console first to prevent data loss. Then execute the command 
   1.206 +
   1.207 +{\small {\tt xm destroy \emph{vmx\_guest\_id} }} 
   1.208 +
   1.209 +at the Domain0 console.
   1.210 +
   1.211 +\subsection{VMX window (X or VNC) Hot Key}
   1.212 +If you are running in the X environment after creating a VMX guest, an X window is created. There are several hot keys for control of the VMX guest that can be used in the window.
   1.213 + 
   1.214 +{\bfseries Ctrl+Alt+2} switches from guest VGA window to the control window. Typing {\small {\tt help }} shows the control commands help. For example, 'q' is the command to destroy the VMX guest.\\
   1.215 +{\bfseries Ctrl+Alt+1} switches back to VMX guest's VGA.\\
   1.216 +{\bfseries Ctrl+Alt+3} switches to serial port output. It captures serial output from the VMX guest. It works only if the VMX guest was configured to use the serial port. \\
   1.217 +
   1.218 +\subsection{Save/Restore and Migration}
   1.219 +VMX guests currently cannot be saved and restored, nor migrated. These features are currently under active development.
   1.220 +
   1.221  %% Chapter Glossary of Terms moved to glossary.tex
   1.222  \chapter{Glossary of Terms}
   1.223  
     2.1 --- a/linux-2.6-xen-sparse/arch/xen/Makefile	Fri Jan 13 10:38:44 2006 -0600
     2.2 +++ b/linux-2.6-xen-sparse/arch/xen/Makefile	Fri Jan 13 14:12:24 2006 -0600
     2.3 @@ -77,8 +77,6 @@ install kernel_install:
     2.4  	install -m0664 .config $(INSTALL_PATH)/boot/config-$(XINSTALL_NAME)$(INSTALL_SUFFIX)
     2.5  	install -m0664 System.map $(INSTALL_PATH)/boot/System.map-$(XINSTALL_NAME)$(INSTALL_SUFFIX)
     2.6  	ln -f -s vmlinuz-$(XINSTALL_NAME)$(INSTALL_SUFFIX) $(INSTALL_PATH)/boot/vmlinuz-$(VERSION).$(PATCHLEVEL)$(XENGUEST)$(INSTALL_SUFFIX)
     2.7 -	mkdir -p $(INSTALL_PATH)/usr/include/xen/linux
     2.8 -	install -m0644 $(srctree)/include/asm-xen/linux-public/*.h $(INSTALL_PATH)/usr/include/xen/linux
     2.9  
    2.10  archclean:
    2.11  	@if [ -e arch/xen/arch ]; then $(MAKE) $(clean)=arch/xen/arch; fi;
     3.1 --- a/linux-2.6-xen-sparse/arch/xen/i386/kernel/acpi/boot.c	Fri Jan 13 10:38:44 2006 -0600
     3.2 +++ b/linux-2.6-xen-sparse/arch/xen/i386/kernel/acpi/boot.c	Fri Jan 13 14:12:24 2006 -0600
     3.3 @@ -40,8 +40,6 @@
     3.4  #include <asm/fixmap.h>
     3.5  #endif
     3.6  
     3.7 -void (*pm_power_off)(void) = NULL;
     3.8 -
     3.9  #ifdef	CONFIG_X86_64
    3.10  
    3.11  static inline void  acpi_madt_oem_check(char *oem_id, char *oem_table_id) { }
     4.1 --- a/linux-2.6-xen-sparse/arch/xen/i386/kernel/cpu/mtrr/main.c	Fri Jan 13 10:38:44 2006 -0600
     4.2 +++ b/linux-2.6-xen-sparse/arch/xen/i386/kernel/cpu/mtrr/main.c	Fri Jan 13 14:12:24 2006 -0600
     4.3 @@ -67,8 +67,11 @@ int mtrr_add_page(unsigned long base, un
     4.4  	op.u.add_memtype.pfn     = base;
     4.5  	op.u.add_memtype.nr_pfns = size;
     4.6  	op.u.add_memtype.type    = type;
     4.7 -	if ((error = HYPERVISOR_dom0_op(&op)))
     4.8 +	error = HYPERVISOR_dom0_op(&op);
     4.9 +	if (error) {
    4.10 +		BUG_ON(error > 0);
    4.11  		return error;
    4.12 +	}
    4.13  
    4.14  	if (increment)
    4.15  		++usage_table[op.u.add_memtype.reg];
    4.16 @@ -121,8 +124,12 @@ int mtrr_del_page(int reg, unsigned long
    4.17  	if (--usage_table[reg] < 1) {
    4.18  		op.cmd = DOM0_DEL_MEMTYPE;
    4.19  		op.u.del_memtype.handle = 0;
    4.20 -		op.u.add_memtype.reg    = reg;
    4.21 -		(void)HYPERVISOR_dom0_op(&op);
    4.22 +		op.u.del_memtype.reg    = reg;
    4.23 +		error = HYPERVISOR_dom0_op(&op);
    4.24 +		if (error) {
    4.25 +			BUG_ON(error > 0);
    4.26 +			goto out;
    4.27 +		}
    4.28  	}
    4.29  	error = reg;
    4.30   out:
     5.1 --- a/linux-2.6-xen-sparse/arch/xen/i386/kernel/entry.S	Fri Jan 13 10:38:44 2006 -0600
     5.2 +++ b/linux-2.6-xen-sparse/arch/xen/i386/kernel/entry.S	Fri Jan 13 14:12:24 2006 -0600
     5.3 @@ -76,7 +76,9 @@ IF_MASK		= 0x00000200
     5.4  DF_MASK		= 0x00000400 
     5.5  NT_MASK		= 0x00004000
     5.6  VM_MASK		= 0x00020000
     5.7 -
     5.8 +/* Pseudo-eflags. */
     5.9 +NMI_MASK	= 0x80000000
    5.10 +	
    5.11  /* Offsets into shared_info_t. */
    5.12  #define evtchn_upcall_pending		/* 0 */
    5.13  #define evtchn_upcall_mask		1
    5.14 @@ -305,8 +307,8 @@ restore_all:
    5.15  	je ldt_ss			# returning to user-space with LDT SS
    5.16  #endif /* XEN */
    5.17  restore_nocheck:
    5.18 -	testl $VM_MASK, EFLAGS(%esp)
    5.19 -	jnz resume_vm86
    5.20 +	testl $(VM_MASK|NMI_MASK), EFLAGS(%esp)
    5.21 +	jnz hypervisor_iret
    5.22  	movb EVENT_MASK(%esp), %al
    5.23  	notb %al			# %al == ~saved_mask
    5.24  	XEN_GET_VCPU_INFO(%esi)
    5.25 @@ -328,11 +330,11 @@ iret_exc:
    5.26  	.long 1b,iret_exc
    5.27  .previous
    5.28  
    5.29 -resume_vm86:
    5.30 -	XEN_UNBLOCK_EVENTS(%esi)
    5.31 +hypervisor_iret:
    5.32 +	andl $~NMI_MASK, EFLAGS(%esp)
    5.33  	RESTORE_REGS
    5.34  	movl %eax,(%esp)
    5.35 -	movl $__HYPERVISOR_switch_vm86,%eax
    5.36 +	movl $__HYPERVISOR_iret,%eax
    5.37  	int $0x82
    5.38  	ud2
    5.39  
    5.40 @@ -691,6 +693,15 @@ debug_stack_correct:
    5.41  	call do_debug
    5.42  	jmp ret_from_exception
    5.43  
    5.44 +ENTRY(nmi)
    5.45 +	pushl %eax
    5.46 +	SAVE_ALL
    5.47 +	xorl %edx,%edx		# zero error code
    5.48 +	movl %esp,%eax		# pt_regs pointer
    5.49 +	call do_nmi
    5.50 +	orl  $NMI_MASK, EFLAGS(%esp)
    5.51 +	jmp restore_all
    5.52 +
    5.53  #if 0 /* XEN */
    5.54  /*
    5.55   * NMI is doubly nasty. It can happen _while_ we're handling
     6.1 --- a/linux-2.6-xen-sparse/arch/xen/i386/kernel/i386_ksyms.c	Fri Jan 13 10:38:44 2006 -0600
     6.2 +++ b/linux-2.6-xen-sparse/arch/xen/i386/kernel/i386_ksyms.c	Fri Jan 13 14:12:24 2006 -0600
     6.3 @@ -76,9 +76,7 @@ EXPORT_SYMBOL(ioremap_nocache);
     6.4  EXPORT_SYMBOL(iounmap);
     6.5  EXPORT_SYMBOL(kernel_thread);
     6.6  EXPORT_SYMBOL(pm_idle);
     6.7 -#ifdef CONFIG_ACPI_BOOT
     6.8  EXPORT_SYMBOL(pm_power_off);
     6.9 -#endif
    6.10  EXPORT_SYMBOL(get_cmos_time);
    6.11  EXPORT_SYMBOL(cpu_khz);
    6.12  EXPORT_SYMBOL(apm_info);
     7.1 --- a/linux-2.6-xen-sparse/arch/xen/i386/kernel/io_apic.c	Fri Jan 13 10:38:44 2006 -0600
     7.2 +++ b/linux-2.6-xen-sparse/arch/xen/i386/kernel/io_apic.c	Fri Jan 13 14:12:24 2006 -0600
     7.3 @@ -622,9 +622,11 @@ static int balanced_irq(void *unused)
     7.4  		try_to_freeze(PF_FREEZE);
     7.5  		if (time_after(jiffies,
     7.6  				prev_balance_time+balanced_irq_interval)) {
     7.7 +			preempt_disable();
     7.8  			do_irq_balance();
     7.9  			prev_balance_time = jiffies;
    7.10  			time_remaining = balanced_irq_interval;
    7.11 +			preempt_enable();
    7.12  		}
    7.13  	}
    7.14  	return 0;
     8.1 --- a/linux-2.6-xen-sparse/arch/xen/i386/kernel/traps.c	Fri Jan 13 10:38:44 2006 -0600
     8.2 +++ b/linux-2.6-xen-sparse/arch/xen/i386/kernel/traps.c	Fri Jan 13 14:12:24 2006 -0600
     8.3 @@ -506,18 +506,11 @@ static void mem_parity_error(unsigned ch
     8.4  
     8.5  static void io_check_error(unsigned char reason, struct pt_regs * regs)
     8.6  {
     8.7 -	unsigned long i;
     8.8 -
     8.9  	printk("NMI: IOCK error (debug interrupt?)\n");
    8.10  	show_registers(regs);
    8.11  
    8.12  	/* Re-enable the IOCK line, wait for a few seconds */
    8.13 -	reason = (reason & 0xf) | 8;
    8.14 -	outb(reason, 0x61);
    8.15 -	i = 2000;
    8.16 -	while (--i) udelay(1000);
    8.17 -	reason &= ~8;
    8.18 -	outb(reason, 0x61);
    8.19 +	clear_io_check_error(reason);
    8.20  }
    8.21  
    8.22  static void unknown_nmi_error(unsigned char reason, struct pt_regs * regs)
    8.23 @@ -648,12 +641,6 @@ fastcall void do_int3(struct pt_regs *re
    8.24  }
    8.25  #endif
    8.26  
    8.27 -static inline void conditional_sti(struct pt_regs *regs)
    8.28 -{
    8.29 -	if (regs->eflags & (X86_EFLAGS_IF|VM_MASK))
    8.30 -		local_irq_enable();
    8.31 -}
    8.32 -
    8.33  /*
    8.34   * Our handling of the processor debug registers is non-trivial.
    8.35   * We do not clear them on entry and exit from the kernel. Therefore
    8.36 @@ -686,9 +673,9 @@ fastcall void do_debug(struct pt_regs * 
    8.37  	if (notify_die(DIE_DEBUG, "debug", regs, condition, error_code,
    8.38  					SIGTRAP) == NOTIFY_STOP)
    8.39  		return;
    8.40 -
    8.41  	/* It's safe to allow irq's after DR6 has been saved */
    8.42 -	conditional_sti(regs);
    8.43 +	if (regs->eflags & X86_EFLAGS_IF)
    8.44 +		local_irq_enable();
    8.45  
    8.46  	/* Mask out spurious debug traps due to lazy DR7 setting */
    8.47  	if (condition & (DR_TRAP0|DR_TRAP1|DR_TRAP2|DR_TRAP3)) {
     9.1 --- a/linux-2.6-xen-sparse/arch/xen/i386/mm/init.c	Fri Jan 13 10:38:44 2006 -0600
     9.2 +++ b/linux-2.6-xen-sparse/arch/xen/i386/mm/init.c	Fri Jan 13 14:12:24 2006 -0600
     9.3 @@ -65,7 +65,7 @@ static pmd_t * __init one_md_table_init(
     9.4  {
     9.5  	pud_t *pud;
     9.6  	pmd_t *pmd_table;
     9.7 -
     9.8 +		
     9.9  #ifdef CONFIG_X86_PAE
    9.10  	pmd_table = (pmd_t *) alloc_bootmem_low_pages(PAGE_SIZE);
    9.11  	make_lowmem_page_readonly(pmd_table);
    10.1 --- a/linux-2.6-xen-sparse/arch/xen/kernel/reboot.c	Fri Jan 13 10:38:44 2006 -0600
    10.2 +++ b/linux-2.6-xen-sparse/arch/xen/kernel/reboot.c	Fri Jan 13 14:12:24 2006 -0600
    10.3 @@ -17,6 +17,13 @@
    10.4  #include <linux/kthread.h>
    10.5  #include <asm-xen/xencons.h>
    10.6  
    10.7 +#if defined(__i386__) || defined(__x86_64__)
    10.8 +/*
    10.9 + * Power off function, if any
   10.10 + */
   10.11 +void (*pm_power_off)(void);
   10.12 +#endif
   10.13 +
   10.14  #define SHUTDOWN_INVALID  -1
   10.15  #define SHUTDOWN_POWEROFF  0
   10.16  #define SHUTDOWN_REBOOT    1
   10.17 @@ -283,15 +290,15 @@ static void shutdown_handler(struct xenb
   10.18  			     const char **vec, unsigned int len)
   10.19  {
   10.20  	char *str;
   10.21 -	struct xenbus_transaction *xbt;
   10.22 +	xenbus_transaction_t xbt;
   10.23  	int err;
   10.24  
   10.25  	if (shutting_down != SHUTDOWN_INVALID)
   10.26  		return;
   10.27  
   10.28   again:
   10.29 -	xbt = xenbus_transaction_start();
   10.30 -	if (IS_ERR(xbt))
   10.31 +	err = xenbus_transaction_start(&xbt);
   10.32 +	if (err)
   10.33  		return;
   10.34  	str = (char *)xenbus_read(xbt, "control", "shutdown", NULL);
   10.35  	/* Ignore read errors and empty reads. */
   10.36 @@ -332,12 +339,12 @@ static void sysrq_handler(struct xenbus_
   10.37  			  unsigned int len)
   10.38  {
   10.39  	char sysrq_key = '\0';
   10.40 -	struct xenbus_transaction *xbt;
   10.41 +	xenbus_transaction_t xbt;
   10.42  	int err;
   10.43  
   10.44   again:
   10.45 -	xbt  = xenbus_transaction_start();
   10.46 -	if (IS_ERR(xbt))
   10.47 +	err = xenbus_transaction_start(&xbt);
   10.48 +	if (err)
   10.49  		return;
   10.50  	if (!xenbus_scanf(xbt, "control", "sysrq", "%c", &sysrq_key)) {
   10.51  		printk(KERN_ERR "Unable to read sysrq code in "
    11.1 --- a/linux-2.6-xen-sparse/arch/xen/kernel/smpboot.c	Fri Jan 13 10:38:44 2006 -0600
    11.2 +++ b/linux-2.6-xen-sparse/arch/xen/kernel/smpboot.c	Fri Jan 13 14:12:24 2006 -0600
    11.3 @@ -298,7 +298,7 @@ static void vcpu_hotplug(unsigned int cp
    11.4  		return;
    11.5  
    11.6  	sprintf(dir, "cpu/%d", cpu);
    11.7 -	err = xenbus_scanf(NULL, dir, "availability", "%s", state);
    11.8 +	err = xenbus_scanf(XBT_NULL, dir, "availability", "%s", state);
    11.9  	if (err != 1) {
   11.10  		printk(KERN_ERR "XENBUS: Unable to read cpu state\n");
   11.11  		return;
    12.1 --- a/linux-2.6-xen-sparse/arch/xen/x86_64/kernel/e820.c	Fri Jan 13 10:38:44 2006 -0600
    12.2 +++ b/linux-2.6-xen-sparse/arch/xen/x86_64/kernel/e820.c	Fri Jan 13 14:12:24 2006 -0600
    12.3 @@ -526,7 +526,7 @@ extern union xen_start_info_union xen_st
    12.4  
    12.5  unsigned long __init e820_end_of_ram(void)
    12.6  {
    12.7 -        unsigned long max_end_pfn;
    12.8 +	unsigned long max_end_pfn;
    12.9  
   12.10  	if (xen_override_max_pfn == 0) {
   12.11  		max_end_pfn = xen_start_info->nr_pages;
   12.12 @@ -612,7 +612,7 @@ void __init parse_memopt(char *p, char *
   12.13  { 
   12.14  	end_user_pfn = memparse(p, from);
   12.15  	end_user_pfn >>= PAGE_SHIFT;	
   12.16 -        xen_override_max_pfn = (unsigned long) end_user_pfn;
   12.17 +	xen_override_max_pfn = (unsigned long) end_user_pfn;
   12.18  } 
   12.19  
   12.20  /*
    13.1 --- a/linux-2.6-xen-sparse/arch/xen/x86_64/kernel/entry.S	Fri Jan 13 10:38:44 2006 -0600
    13.2 +++ b/linux-2.6-xen-sparse/arch/xen/x86_64/kernel/entry.S	Fri Jan 13 14:12:24 2006 -0600
    13.3 @@ -57,7 +57,7 @@
    13.4  #ifndef CONFIG_PREEMPT
    13.5  #define retint_kernel retint_restore_args
    13.6  #endif	
    13.7 -
    13.8 +	
    13.9  /*
   13.10   * C code is not supposed to know about undefined top of stack. Every time 
   13.11   * a C function with an pt_regs argument is called from the SYSCALL based 
   13.12 @@ -65,7 +65,7 @@
   13.13   * RESTORE_TOP_OF_STACK syncs the syscall state after any possible ptregs
   13.14   * manipulation.
   13.15   */        	
   13.16 -
   13.17 +		
   13.18  	/* %rsp:at FRAMEEND */ 
   13.19  	.macro FIXUP_TOP_OF_STACK tmp
   13.20  	movq    $__USER_CS,CS(%rsp)
   13.21 @@ -121,19 +121,19 @@
   13.22  	.endm
   13.23  
   13.24          /*
   13.25 -         * Must be consistent with the definition in arch_x86_64.h:    
   13.26 -         *     struct switch_to_user {
   13.27 +         * Must be consistent with the definition in arch-x86_64.h:    
   13.28 +         *     struct iret_context {
   13.29           *        u64 rax, r11, rcx, flags, rip, cs, rflags, rsp, ss;
   13.30           *     };
   13.31           * #define VGCF_IN_SYSCALL (1<<8) 
   13.32           */
   13.33 -        .macro SWITCH_TO_USER flag
   13.34 +        .macro HYPERVISOR_IRET flag
   13.35          subq $8*4,%rsp                   # reuse rip, cs, rflags, rsp, ss in the stack
   13.36          movq %rax,(%rsp)
   13.37          movq %r11,1*8(%rsp)
   13.38          movq %rcx,2*8(%rsp)              # we saved %rcx upon exceptions
   13.39          movq $\flag,3*8(%rsp)
   13.40 -        movq $__HYPERVISOR_switch_to_user,%rax
   13.41 +        movq $__HYPERVISOR_iret,%rax
   13.42          syscall
   13.43          .endm
   13.44  
   13.45 @@ -225,7 +225,7 @@ sysret_check:
   13.46  	jnz  sysret_careful 
   13.47          XEN_UNBLOCK_EVENTS(%rsi)                
   13.48  	RESTORE_ARGS 0,8,0
   13.49 -        SWITCH_TO_USER VGCF_IN_SYSCALL
   13.50 +        HYPERVISOR_IRET VGCF_IN_SYSCALL
   13.51  
   13.52  	/* Handle reschedules */
   13.53  	/* edx:	work, edi: workmask */	
   13.54 @@ -418,7 +418,6 @@ ENTRY(stub_rt_sigreturn)
   13.55  	jmp int_ret_from_sys_call
   13.56  	CFI_ENDPROC
   13.57  
   13.58 -
   13.59  /* 
   13.60   * Interrupt entry/exit.
   13.61   *
   13.62 @@ -479,7 +478,7 @@ kernel_mode:
   13.63          orb   $3,1*8(%rsp)
   13.64  	iretq
   13.65  user_mode:
   13.66 -	SWITCH_TO_USER 0                        
   13.67 +	HYPERVISOR_IRET 0
   13.68  	
   13.69  	/* edi: workmask, edx: work */	
   13.70  retint_careful:
   13.71 @@ -720,6 +719,18 @@ 11:	movb $0, EVENT_MASK(%rsp)
   13.72  	call evtchn_do_upcall
   13.73          jmp  error_exit
   13.74  
   13.75 +#ifdef CONFIG_X86_LOCAL_APIC
   13.76 +ENTRY(nmi)
   13.77 +	zeroentry do_nmi_callback
   13.78 +ENTRY(do_nmi_callback)
   13.79 +        addq $8, %rsp
   13.80 +        call do_nmi
   13.81 +        RESTORE_REST
   13.82 +        XEN_BLOCK_EVENTS(%rsi)
   13.83 +        GET_THREAD_INFO(%rcx)
   13.84 +        jmp  retint_restore_args
   13.85 +#endif
   13.86 +
   13.87          ALIGN
   13.88  restore_all_enable_events:  
   13.89  	XEN_UNBLOCK_EVENTS(%rsi)        # %rsi is already set up...
   13.90 @@ -734,7 +745,7 @@ scrit:	/**** START OF CRITICAL REGION **
   13.91          orb   $3,1*8(%rsp)
   13.92          iretq
   13.93  crit_user_mode:
   13.94 -        SWITCH_TO_USER 0
   13.95 +        HYPERVISOR_IRET 0
   13.96          
   13.97  14:	XEN_LOCKED_BLOCK_EVENTS(%rsi)
   13.98  	XEN_PUT_VCPU_INFO(%rsi)
    14.1 --- a/linux-2.6-xen-sparse/arch/xen/x86_64/kernel/head64.c	Fri Jan 13 10:38:44 2006 -0600
    14.2 +++ b/linux-2.6-xen-sparse/arch/xen/x86_64/kernel/head64.c	Fri Jan 13 14:12:24 2006 -0600
    14.3 @@ -68,13 +68,13 @@ static void __init copy_bootdata(char *r
    14.4  
    14.5  static void __init setup_boot_cpu_data(void)
    14.6  {
    14.7 -	int dummy, eax;
    14.8 +	unsigned int dummy, eax;
    14.9  
   14.10  	/* get vendor info */
   14.11 -	cpuid(0, &boot_cpu_data.cpuid_level,
   14.12 -	      (int *)&boot_cpu_data.x86_vendor_id[0],
   14.13 -	      (int *)&boot_cpu_data.x86_vendor_id[8],
   14.14 -	      (int *)&boot_cpu_data.x86_vendor_id[4]);
   14.15 +	cpuid(0, (unsigned int *)&boot_cpu_data.cpuid_level,
   14.16 +	      (unsigned int *)&boot_cpu_data.x86_vendor_id[0],
   14.17 +	      (unsigned int *)&boot_cpu_data.x86_vendor_id[8],
   14.18 +	      (unsigned int *)&boot_cpu_data.x86_vendor_id[4]);
   14.19  
   14.20  	/* get cpu type */
   14.21  	cpuid(1, &eax, &dummy, &dummy,
   14.22 @@ -109,7 +109,6 @@ void __init x86_64_start_kernel(char * r
   14.23  	if (s != NULL)
   14.24  		setup_early_printk(s);
   14.25  #endif
   14.26 -
   14.27  #ifdef CONFIG_DISCONTIGMEM
   14.28  	s = strstr(saved_command_line, "numa=");
   14.29  	if (s != NULL)
    15.1 --- a/linux-2.6-xen-sparse/arch/xen/x86_64/kernel/io_apic.c	Fri Jan 13 10:38:44 2006 -0600
    15.2 +++ b/linux-2.6-xen-sparse/arch/xen/x86_64/kernel/io_apic.c	Fri Jan 13 14:12:24 2006 -0600
    15.3 @@ -255,11 +255,9 @@ static int __init enable_ioapic_setup(ch
    15.4  	return 1;
    15.5  }
    15.6  
    15.7 -
    15.8  __setup("noapic", disable_ioapic_setup);
    15.9  __setup("apic", enable_ioapic_setup);
   15.10  
   15.11 -
   15.12  #include <asm/pci-direct.h>
   15.13  #include <linux/pci_ids.h>
   15.14  #include <linux/pci.h>
   15.15 @@ -1146,6 +1144,7 @@ void __apicdebuginit print_PIC(void)
   15.16  	v = inb(0x4d1) << 8 | inb(0x4d0);
   15.17  	printk(KERN_DEBUG "... PIC ELCR: %04x\n", v);
   15.18  }
   15.19 +
   15.20  #endif  /*  0  */
   15.21  
   15.22  #else
   15.23 @@ -1191,6 +1190,7 @@ void disable_IO_APIC(void)
   15.24  	 * Clear the IO-APIC before rebooting:
   15.25  	 */
   15.26  	clear_IO_APIC();
   15.27 +
   15.28  #ifndef CONFIG_XEN
   15.29  	disconnect_bsp_APIC();
   15.30  #endif
   15.31 @@ -1202,6 +1202,7 @@ void disable_IO_APIC(void)
   15.32   *
   15.33   * by Matt Domsch <Matt_Domsch@dell.com>  Tue Dec 21 12:25:05 CST 1999
   15.34   */
   15.35 +
   15.36  #ifndef CONFIG_XEN
   15.37  static void __init setup_ioapic_ids_from_mpc (void)
   15.38  {
    16.1 --- a/linux-2.6-xen-sparse/arch/xen/x86_64/kernel/irq.c	Fri Jan 13 10:38:44 2006 -0600
    16.2 +++ b/linux-2.6-xen-sparse/arch/xen/x86_64/kernel/irq.c	Fri Jan 13 14:12:24 2006 -0600
    16.3 @@ -9,18 +9,15 @@
    16.4   * x86_64-specific irq controller code. (e.g. i8259.c and
    16.5   * io_apic.c.)
    16.6   */
    16.7 -#include <asm/uaccess.h>
    16.8 -#include <linux/module.h>
    16.9 -#include <linux/seq_file.h>
   16.10 -#include <linux/interrupt.h>
   16.11 +
   16.12  #include <linux/kernel_stat.h>
   16.13 -
   16.14 -/*
   16.15 - * Interrupt statistics:
   16.16 - */
   16.17 +#include <linux/interrupt.h>
   16.18 +#include <linux/seq_file.h>
   16.19 +#include <linux/module.h>
   16.20 +#include <asm/uaccess.h>
   16.21 +#include <asm/io_apic.h>
   16.22  
   16.23  atomic_t irq_err_count;
   16.24 -
   16.25  #ifdef CONFIG_X86_IO_APIC
   16.26  #ifdef APIC_MISMATCH_DEBUG
   16.27  atomic_t irq_mis_count;
    17.1 --- a/linux-2.6-xen-sparse/arch/xen/x86_64/kernel/ldt.c	Fri Jan 13 10:38:44 2006 -0600
    17.2 +++ b/linux-2.6-xen-sparse/arch/xen/x86_64/kernel/ldt.c	Fri Jan 13 14:12:24 2006 -0600
    17.3 @@ -62,6 +62,7 @@ static int alloc_ldt(mm_context_t *pc, u
    17.4  	if (reload) {
    17.5  #ifdef CONFIG_SMP
    17.6  		cpumask_t mask;
    17.7 +
    17.8  		preempt_disable();
    17.9  #endif
   17.10  		make_pages_readonly(pc->ldt, (pc->size * LDT_ENTRY_SIZE) /
   17.11 @@ -201,6 +202,7 @@ static int write_ldt(void __user * ptr, 
   17.12  	struct user_desc ldt_info;
   17.13  
   17.14  	error = -EINVAL;
   17.15 +
   17.16  	if (bytecount != sizeof(ldt_info))
   17.17  		goto out;
   17.18  	error = -EFAULT; 	
    18.1 --- a/linux-2.6-xen-sparse/arch/xen/x86_64/kernel/setup.c	Fri Jan 13 10:38:44 2006 -0600
    18.2 +++ b/linux-2.6-xen-sparse/arch/xen/x86_64/kernel/setup.c	Fri Jan 13 14:12:24 2006 -0600
    18.3 @@ -62,6 +62,7 @@
    18.4  #include <asm-xen/xen-public/physdev.h>
    18.5  #include "setup_arch_pre.h"
    18.6  #include <asm/hypervisor.h>
    18.7 +#include <asm-xen/xen-public/nmi.h>
    18.8  #define PFN_UP(x)       (((x) + PAGE_SIZE-1) >> PAGE_SHIFT)
    18.9  #define PFN_PHYS(x)     ((x) << PAGE_SHIFT)
   18.10  #define end_pfn_map end_pfn
   18.11 @@ -304,7 +305,6 @@ static void __init probe_roms(void)
   18.12  }
   18.13  #endif
   18.14  
   18.15 -
   18.16  static __init void parse_cmdline_early (char ** cmdline_p)
   18.17  {
   18.18  	char c = ' ', *to = command_line, *from = COMMAND_LINE;
   18.19 @@ -379,6 +379,7 @@ static __init void parse_cmdline_early (
   18.20  			acpi_skip_timer_override = 1;
   18.21  #endif
   18.22  #endif
   18.23 +
   18.24  #ifndef CONFIG_XEN
   18.25  		if (!memcmp(from, "nolapic", 7) ||
   18.26  		    !memcmp(from, "disableapic", 11))
   18.27 @@ -391,7 +392,8 @@ static __init void parse_cmdline_early (
   18.28  			skip_ioapic_setup = 0;
   18.29  			ioapic_force = 1;
   18.30  		}
   18.31 -#endif			
   18.32 +#endif
   18.33 +			
   18.34  		if (!memcmp(from, "mem=", 4))
   18.35  			parse_memopt(from+4, &from); 
   18.36  
   18.37 @@ -588,7 +590,7 @@ void __init setup_arch(char **cmdline_p)
   18.38  	HYPERVISOR_vm_assist(VMASST_CMD_enable,
   18.39  			     VMASST_TYPE_writable_pagetables);
   18.40  
   18.41 -        ARCH_SETUP
   18.42 +	ARCH_SETUP
   18.43  #else
   18.44   	ROOT_DEV = old_decode_dev(ORIG_ROOT_DEV);
   18.45   	drive_info = DRIVE_INFO;
   18.46 @@ -612,7 +614,7 @@ void __init setup_arch(char **cmdline_p)
   18.47  	init_mm.end_code = (unsigned long) &_etext;
   18.48  	init_mm.end_data = (unsigned long) &_edata;
   18.49  #ifdef CONFIG_XEN
   18.50 -        init_mm.brk = start_pfn << PAGE_SHIFT;
   18.51 +	init_mm.brk = start_pfn << PAGE_SHIFT;
   18.52  #else
   18.53  	init_mm.brk = (unsigned long) &_end;	
   18.54  
   18.55 @@ -668,7 +670,6 @@ void __init setup_arch(char **cmdline_p)
   18.56  	reserve_ebda_region();
   18.57  #endif
   18.58  
   18.59 -
   18.60  #ifdef CONFIG_SMP
   18.61  	/*
   18.62  	 * But first pinch a few for the stack/trampoline stuff
   18.63 @@ -790,8 +791,6 @@ void __init setup_arch(char **cmdline_p)
   18.64  
   18.65  	}
   18.66  
   18.67 -
   18.68 -
   18.69  	if ( ! (xen_start_info->flags & SIF_INITDOMAIN))
   18.70  	{
   18.71  		acpi_disabled = 1;
   18.72 @@ -835,7 +834,7 @@ void __init setup_arch(char **cmdline_p)
   18.73  	 * and also for regions reported as reserved by the e820.
   18.74  	 */
   18.75  	probe_roms();
   18.76 -	e820_reserve_resources();
   18.77 +	e820_reserve_resources(); 
   18.78  #endif
   18.79  
   18.80  	request_resource(&iomem_resource, &video_ram_resource);
    19.1 --- a/linux-2.6-xen-sparse/arch/xen/x86_64/kernel/setup64.c	Fri Jan 13 10:38:44 2006 -0600
    19.2 +++ b/linux-2.6-xen-sparse/arch/xen/x86_64/kernel/setup64.c	Fri Jan 13 14:12:24 2006 -0600
    19.3 @@ -33,6 +33,7 @@
    19.4  #ifdef CONFIG_XEN
    19.5  #include <asm/hypervisor.h>
    19.6  #endif
    19.7 +
    19.8  char x86_boot_params[BOOT_PARAM_SIZE] __initdata = {0,};
    19.9  
   19.10  cpumask_t cpu_initialized __cpuinitdata = CPU_MASK_NONE;
   19.11 @@ -165,7 +166,6 @@ void __init cpu_gdt_init(struct desc_ptr
   19.12  }
   19.13  #endif
   19.14  
   19.15 -
   19.16  void pda_init(int cpu)
   19.17  { 
   19.18  	struct x8664_pda *pda = &cpu_pda[cpu];
   19.19 @@ -175,9 +175,10 @@ void pda_init(int cpu)
   19.20  #ifndef CONFIG_XEN
   19.21  	wrmsrl(MSR_GS_BASE, cpu_pda + cpu);
   19.22  #else
   19.23 -        HYPERVISOR_set_segment_base(SEGBASE_GS_KERNEL, 
   19.24 -                                    (unsigned long)(cpu_pda + cpu));
   19.25 +	HYPERVISOR_set_segment_base(SEGBASE_GS_KERNEL, 
   19.26 +				    (unsigned long)(cpu_pda + cpu));
   19.27  #endif
   19.28 +
   19.29  	pda->me = pda;
   19.30  	pda->cpunumber = cpu; 
   19.31  	pda->irqcount = -1;
   19.32 @@ -201,6 +202,7 @@ void pda_init(int cpu)
   19.33  	}
   19.34  
   19.35  	switch_pt();
   19.36 +
   19.37  	pda->irqstackptr += IRQSTACKSIZE-64;
   19.38  } 
   19.39  
    20.1 --- a/linux-2.6-xen-sparse/arch/xen/x86_64/kernel/smp.c	Fri Jan 13 10:38:44 2006 -0600
    20.2 +++ b/linux-2.6-xen-sparse/arch/xen/x86_64/kernel/smp.c	Fri Jan 13 14:12:24 2006 -0600
    20.3 @@ -30,8 +30,9 @@
    20.4  #include <asm/apicdef.h>
    20.5  #ifdef CONFIG_XEN
    20.6  #include <asm-xen/evtchn.h>
    20.7 +#endif
    20.8  
    20.9 -#else
   20.10 +#ifndef CONFIG_XEN
   20.11  /*
   20.12   *	Smarter SMP flushing macros. 
   20.13   *		c/o Linus Torvalds.
    21.1 --- a/linux-2.6-xen-sparse/arch/xen/x86_64/kernel/traps.c	Fri Jan 13 10:38:44 2006 -0600
    21.2 +++ b/linux-2.6-xen-sparse/arch/xen/x86_64/kernel/traps.c	Fri Jan 13 14:12:24 2006 -0600
    21.3 @@ -559,9 +559,11 @@ static void mem_parity_error(unsigned ch
    21.4  	printk("Uhhuh. NMI received. Dazed and confused, but trying to continue\n");
    21.5  	printk("You probably have a hardware problem with your RAM chips\n");
    21.6  
    21.7 +#if 0 /* XEN */
    21.8  	/* Clear and disable the memory parity error line. */
    21.9  	reason = (reason & 0xf) | 4;
   21.10  	outb(reason, 0x61);
   21.11 +#endif /* XEN */
   21.12  }
   21.13  
   21.14  static void io_check_error(unsigned char reason, struct pt_regs * regs)
   21.15 @@ -569,12 +571,14 @@ static void io_check_error(unsigned char
   21.16  	printk("NMI: IOCK error (debug interrupt?)\n");
   21.17  	show_registers(regs);
   21.18  
   21.19 +#if 0 /* XEN */
   21.20  	/* Re-enable the IOCK line, wait for a few seconds */
   21.21  	reason = (reason & 0xf) | 8;
   21.22  	outb(reason, 0x61);
   21.23  	mdelay(2000);
   21.24  	reason &= ~8;
   21.25  	outb(reason, 0x61);
   21.26 +#endif /* XEN */
   21.27  }
   21.28  
   21.29  static void unknown_nmi_error(unsigned char reason, struct pt_regs * regs)
   21.30 @@ -890,7 +894,6 @@ asmlinkage void __attribute__((weak)) sm
   21.31  asmlinkage void math_state_restore(void)
   21.32  {
   21.33  	struct task_struct *me = current;
   21.34 -        
   21.35          /* clts(); */ /* 'clts' is done for us by Xen during virtual trap. */
   21.36  
   21.37  	if (!used_math())
    22.1 --- a/linux-2.6-xen-sparse/arch/xen/x86_64/kernel/x8664_ksyms.c	Fri Jan 13 10:38:44 2006 -0600
    22.2 +++ b/linux-2.6-xen-sparse/arch/xen/x86_64/kernel/x8664_ksyms.c	Fri Jan 13 14:12:24 2006 -0600
    22.3 @@ -59,9 +59,7 @@ EXPORT_SYMBOL(disable_irq_nosync);
    22.4  EXPORT_SYMBOL(probe_irq_mask);
    22.5  EXPORT_SYMBOL(kernel_thread);
    22.6  EXPORT_SYMBOL(pm_idle);
    22.7 -#ifdef CONFIG_ACPI_BOOT
    22.8  EXPORT_SYMBOL(pm_power_off);
    22.9 -#endif
   22.10  EXPORT_SYMBOL(get_cmos_time);
   22.11  
   22.12  EXPORT_SYMBOL(__down_failed);
    23.1 --- a/linux-2.6-xen-sparse/arch/xen/x86_64/mm/fault.c	Fri Jan 13 10:38:44 2006 -0600
    23.2 +++ b/linux-2.6-xen-sparse/arch/xen/x86_64/mm/fault.c	Fri Jan 13 14:12:24 2006 -0600
    23.3 @@ -153,7 +153,6 @@ void dump_pagetable(unsigned long addres
    23.4  	pgd = (pgd_t *)per_cpu(cur_pgd, smp_processor_id());
    23.5  	preempt_enable();
    23.6  	pgd += pgd_index(address);
    23.7 -
    23.8  	printk("PGD %lx ", pgd_val(*pgd));
    23.9  	if (bad_address(pgd)) goto bad;
   23.10  	if (!pgd_present(*pgd)) goto ret; 
   23.11 @@ -258,7 +257,6 @@ static int vmalloc_fault(unsigned long a
   23.12  	pgd = (pgd_t *)per_cpu(cur_pgd, smp_processor_id());
   23.13  	preempt_enable();
   23.14  	pgd += pgd_index(address);
   23.15 -
   23.16  	pgd_ref = pgd_offset_k(address);
   23.17  	if (pgd_none(*pgd_ref))
   23.18  		return -1;
    24.1 --- a/linux-2.6-xen-sparse/arch/xen/x86_64/mm/init.c	Fri Jan 13 10:38:44 2006 -0600
    24.2 +++ b/linux-2.6-xen-sparse/arch/xen/x86_64/mm/init.c	Fri Jan 13 14:12:24 2006 -0600
    24.3 @@ -40,16 +40,16 @@
    24.4  #include <asm/proto.h>
    24.5  #include <asm/smp.h>
    24.6  
    24.7 +#ifndef Dprintk
    24.8 +#define Dprintk(x...)
    24.9 +#endif
   24.10 +
   24.11  extern unsigned long *contiguous_bitmap;
   24.12  
   24.13  #if defined(CONFIG_SWIOTLB)
   24.14  extern void swiotlb_init(void);
   24.15  #endif
   24.16  
   24.17 -#ifndef Dprintk
   24.18 -#define Dprintk(x...)
   24.19 -#endif
   24.20 -
   24.21  extern char _stext[];
   24.22  
   24.23  DEFINE_PER_CPU(struct mmu_gather, mmu_gathers);
   24.24 @@ -200,9 +200,9 @@ static void *spp_getpage(void)
   24.25  
   24.26  static inline pud_t *pud_offset_u(unsigned long address)
   24.27  {
   24.28 -        pud_t *pud = level3_user_pgt;
   24.29 +	pud_t *pud = level3_user_pgt;
   24.30  
   24.31 -        return pud + pud_index(address);
   24.32 +	return pud + pud_index(address);
   24.33  }
   24.34  
   24.35  static void set_pte_phys(unsigned long vaddr,
   24.36 @@ -215,34 +215,27 @@ static void set_pte_phys(unsigned long v
   24.37  
   24.38  	Dprintk("set_pte_phys %lx to %lx\n", vaddr, phys);
   24.39  
   24.40 -        pgd = (user_mode ? pgd_offset_u(vaddr) : pgd_offset_k(vaddr));
   24.41 -
   24.42 +	pgd = (user_mode ? pgd_offset_u(vaddr) : pgd_offset_k(vaddr));
   24.43  	if (pgd_none(*pgd)) {
   24.44  		printk("PGD FIXMAP MISSING, it should be setup in head.S!\n");
   24.45  		return;
   24.46  	}
   24.47 -        
   24.48 -        pud = (user_mode ? pud_offset_u(vaddr) : pud_offset(pgd, vaddr));
   24.49 -
   24.50 +	pud = (user_mode ? pud_offset_u(vaddr) : pud_offset(pgd, vaddr));
   24.51  	if (pud_none(*pud)) {
   24.52  		pmd = (pmd_t *) spp_getpage(); 
   24.53 -
   24.54 -                make_page_readonly(pmd);
   24.55 -                xen_pmd_pin(__pa(pmd));
   24.56 +		make_page_readonly(pmd);
   24.57 +		xen_pmd_pin(__pa(pmd));
   24.58  		set_pud(pud, __pud(__pa(pmd) | _KERNPG_TABLE | _PAGE_USER));
   24.59  		if (pmd != pmd_offset(pud, 0)) {
   24.60  			printk("PAGETABLE BUG #01! %p <-> %p\n", pmd, pmd_offset(pud,0));
   24.61  			return;
   24.62  		}
   24.63  	}
   24.64 -
   24.65  	pmd = pmd_offset(pud, vaddr);
   24.66 -
   24.67  	if (pmd_none(*pmd)) {
   24.68  		pte = (pte_t *) spp_getpage();
   24.69 -                make_page_readonly(pte);
   24.70 -
   24.71 -                xen_pte_pin(__pa(pte));
   24.72 +		make_page_readonly(pte);
   24.73 +		xen_pte_pin(__pa(pte));
   24.74  		set_pmd(pmd, __pmd(__pa(pte) | _KERNPG_TABLE | _PAGE_USER));
   24.75  		if (pte != pte_offset_kernel(pmd, 0)) {
   24.76  			printk("PAGETABLE BUG #02!\n");
   24.77 @@ -252,11 +245,10 @@ static void set_pte_phys(unsigned long v
   24.78  	new_pte = pfn_pte(phys >> PAGE_SHIFT, prot);
   24.79  
   24.80  	pte = pte_offset_kernel(pmd, vaddr);
   24.81 -
   24.82  	if (!pte_none(*pte) &&
   24.83  	    pte_val(*pte) != (pte_val(new_pte) & __supported_pte_mask))
   24.84  		pte_ERROR(*pte);
   24.85 -        set_pte(pte, new_pte);
   24.86 +	set_pte(pte, new_pte);
   24.87  
   24.88  	/*
   24.89  	 * It's enough to flush this one mapping.
   24.90 @@ -284,11 +276,11 @@ static void set_pte_phys_ma(unsigned lon
   24.91  	if (pud_none(*pud)) {
   24.92  
   24.93  		pmd = (pmd_t *) spp_getpage(); 
   24.94 -                make_page_readonly(pmd);
   24.95 -                xen_pmd_pin(__pa(pmd));
   24.96 +		make_page_readonly(pmd);
   24.97 +		xen_pmd_pin(__pa(pmd));
   24.98  
   24.99  		set_pud(pud, __pud(__pa(pmd) | _KERNPG_TABLE | _PAGE_USER));
  24.100 -         
  24.101 +
  24.102  		if (pmd != pmd_offset(pud, 0)) {
  24.103  			printk("PAGETABLE BUG #01! %p <-> %p\n", pmd, pmd_offset(pud,0));
  24.104  			return;
  24.105 @@ -298,8 +290,8 @@ static void set_pte_phys_ma(unsigned lon
  24.106  
  24.107  	if (pmd_none(*pmd)) {
  24.108  		pte = (pte_t *) spp_getpage();
  24.109 -                make_page_readonly(pte);  
  24.110 -                xen_pte_pin(__pa(pte));
  24.111 +		make_page_readonly(pte);  
  24.112 +		xen_pte_pin(__pa(pte));
  24.113  
  24.114  		set_pmd(pmd, __pmd(__pa(pte) | _KERNPG_TABLE | _PAGE_USER));
  24.115  		if (pte != pte_offset_kernel(pmd, 0)) {
  24.116 @@ -311,12 +303,12 @@ static void set_pte_phys_ma(unsigned lon
  24.117  	new_pte = pfn_pte_ma(phys >> PAGE_SHIFT, prot);
  24.118  	pte = pte_offset_kernel(pmd, vaddr);
  24.119  
  24.120 -        /* 
  24.121 -         * Note that the pte page is already RO, thus we want to use
  24.122 -         * xen_l1_entry_update(), not set_pte().
  24.123 -         */
  24.124 -        xen_l1_entry_update(pte, 
  24.125 -                            pfn_pte_ma(phys >> PAGE_SHIFT, prot));
  24.126 +	/* 
  24.127 +	 * Note that the pte page is already RO, thus we want to use
  24.128 +	 * xen_l1_entry_update(), not set_pte().
  24.129 +	 */
  24.130 +	xen_l1_entry_update(pte, 
  24.131 +			    pfn_pte_ma(phys >> PAGE_SHIFT, prot));
  24.132  
  24.133  	/*
  24.134  	 * It's enough to flush this one mapping.
  24.135 @@ -347,7 +339,6 @@ void __set_fixmap (enum fixed_addresses 
  24.136  	}
  24.137  }
  24.138  
  24.139 -
  24.140  /*
  24.141   * At this point it only supports vsyscall area.
  24.142   */
  24.143 @@ -360,18 +351,18 @@ void __set_fixmap_user (enum fixed_addre
  24.144  		return;
  24.145  	}
  24.146  
  24.147 -        set_pte_phys(address, phys, prot, SET_FIXMAP_USER); 
  24.148 +	set_pte_phys(address, phys, prot, SET_FIXMAP_USER); 
  24.149  }
  24.150  
  24.151  unsigned long __initdata table_start, tables_space; 
  24.152  
  24.153  unsigned long get_machine_pfn(unsigned long addr)
  24.154  {
  24.155 -        pud_t* pud = pud_offset_k(addr);
  24.156 -        pmd_t* pmd = pmd_offset(pud, addr);
  24.157 -        pte_t *pte = pte_offset_kernel(pmd, addr);
  24.158 -        
  24.159 -        return pte_mfn(*pte);
  24.160 +	pud_t* pud = pud_offset_k(addr);
  24.161 +	pmd_t* pmd = pmd_offset(pud, addr);
  24.162 +	pte_t *pte = pte_offset_kernel(pmd, addr);
  24.163 +
  24.164 +	return pte_mfn(*pte);
  24.165  } 
  24.166  
  24.167  static __init void *alloc_static_page(unsigned long *phys)
  24.168 @@ -411,12 +402,11 @@ static inline int make_readonly(unsigned
  24.169  
  24.170  static void __init phys_pud_init(pud_t *pud, unsigned long address, unsigned long end)
  24.171  { 
  24.172 -        long i, j, k; 
  24.173 -        unsigned long paddr;
  24.174 +	long i, j, k; 
  24.175 +	unsigned long paddr;
  24.176  
  24.177  	i = pud_index(address);
  24.178  	pud = pud + i;
  24.179 -
  24.180  	for (; i < PTRS_PER_PUD; pud++, i++) {
  24.181  		unsigned long pmd_phys;
  24.182  		pmd_t *pmd;
  24.183 @@ -429,38 +419,37 @@ static void __init phys_pud_init(pud_t *
  24.184  		} 
  24.185  
  24.186  		pmd = alloc_static_page(&pmd_phys);
  24.187 -                early_make_page_readonly(pmd);
  24.188 -                xen_pmd_pin(pmd_phys);
  24.189 +		early_make_page_readonly(pmd);
  24.190 +		xen_pmd_pin(pmd_phys);
  24.191  		set_pud(pud, __pud(pmd_phys | _KERNPG_TABLE));
  24.192 -
  24.193        		for (j = 0; j < PTRS_PER_PMD; pmd++, j++) {
  24.194 -                        unsigned long pte_phys;
  24.195 -                        pte_t *pte, *pte_save;
  24.196 +			unsigned long pte_phys;
  24.197 +			pte_t *pte, *pte_save;
  24.198  
  24.199  			if (paddr >= end) { 
  24.200  				for (; j < PTRS_PER_PMD; j++, pmd++)
  24.201  					set_pmd(pmd,  __pmd(0)); 
  24.202  				break;
  24.203  			}
  24.204 -                        pte = alloc_static_page(&pte_phys);
  24.205 -                        pte_save = pte;
  24.206 -                        for (k = 0; k < PTRS_PER_PTE; pte++, k++, paddr += PTE_SIZE) {
  24.207 -                                if ((paddr >= end) ||
  24.208 -                                    ((paddr >> PAGE_SHIFT) >=
  24.209 -                                     xen_start_info->nr_pages)) { 
  24.210 -                                        __set_pte(pte, __pte(0)); 
  24.211 -                                        continue;
  24.212 -                                }
  24.213 -                                if (make_readonly(paddr)) {
  24.214 -                                        __set_pte(pte, 
  24.215 -                                                __pte(paddr | (_KERNPG_TABLE & ~_PAGE_RW)));
  24.216 -                                        continue;
  24.217 -                                }
  24.218 -                                __set_pte(pte, __pte(paddr | _KERNPG_TABLE));
  24.219 -                        }
  24.220 -                        pte = pte_save;
  24.221 -                        early_make_page_readonly(pte);  
  24.222 -                        xen_pte_pin(pte_phys);
  24.223 +			pte = alloc_static_page(&pte_phys);
  24.224 +			pte_save = pte;
  24.225 +			for (k = 0; k < PTRS_PER_PTE; pte++, k++, paddr += PTE_SIZE) {
  24.226 +				if ((paddr >= end) ||
  24.227 +				    ((paddr >> PAGE_SHIFT) >=
  24.228 +				     xen_start_info->nr_pages)) { 
  24.229 +					__set_pte(pte, __pte(0)); 
  24.230 +					continue;
  24.231 +				}
  24.232 +				if (make_readonly(paddr)) {
  24.233 +					__set_pte(pte, 
  24.234 +						__pte(paddr | (_KERNPG_TABLE & ~_PAGE_RW)));
  24.235 +					continue;
  24.236 +				}
  24.237 +				__set_pte(pte, __pte(paddr | _KERNPG_TABLE));
  24.238 +			}
  24.239 +			pte = pte_save;
  24.240 +			early_make_page_readonly(pte);  
  24.241 +			xen_pte_pin(pte_phys);
  24.242  			set_pmd(pmd, __pmd(pte_phys | _KERNPG_TABLE));
  24.243  		}
  24.244  	}
  24.245 @@ -506,7 +495,7 @@ void __init xen_init_pt(void)
  24.246  	level3_kernel_pgt[pud_index(__START_KERNEL_map)] = 
  24.247  		__pud(__pa_symbol(level2_kernel_pgt) |
  24.248  		      _KERNPG_TABLE | _PAGE_USER);
  24.249 -        memcpy((void *)level2_kernel_pgt, page, PAGE_SIZE);
  24.250 +	memcpy((void *)level2_kernel_pgt, page, PAGE_SIZE);
  24.251  
  24.252  	early_make_page_readonly(init_level4_pgt);
  24.253  	early_make_page_readonly(init_level4_user_pgt);
  24.254 @@ -618,7 +607,7 @@ extern struct x8664_pda cpu_pda[NR_CPUS]
  24.255  
  24.256  void zap_low_mappings(void)
  24.257  {
  24.258 -        /* this is not required for Xen */
  24.259 +	/* this is not required for Xen */
  24.260  #if 0
  24.261  	swap_low_mappings();
  24.262  #endif
  24.263 @@ -629,11 +618,11 @@ void __init paging_init(void)
  24.264  {
  24.265  	{
  24.266  		unsigned long zones_size[MAX_NR_ZONES] = {0, 0, 0};
  24.267 -                /*	unsigned int max_dma; */
  24.268 -                /* max_dma = virt_to_phys((char *)MAX_DMA_ADDRESS) >> PAGE_SHIFT; */
  24.269 -                /* if (end_pfn < max_dma) */
  24.270 +		/*	unsigned int max_dma; */
  24.271 +		/* max_dma = virt_to_phys((char *)MAX_DMA_ADDRESS) >> PAGE_SHIFT; */
  24.272 +		/* if (end_pfn < max_dma) */
  24.273  			zones_size[ZONE_DMA] = end_pfn;
  24.274 -#if 0                
  24.275 +#if 0
  24.276  		else {
  24.277  			zones_size[ZONE_DMA] = max_dma;
  24.278  			zones_size[ZONE_NORMAL] = end_pfn - max_dma;
  24.279 @@ -642,16 +631,16 @@ void __init paging_init(void)
  24.280  		free_area_init(zones_size);
  24.281  	}
  24.282  
  24.283 -        set_fixmap(FIX_SHARED_INFO, xen_start_info->shared_info);
  24.284 -        HYPERVISOR_shared_info = (shared_info_t *)fix_to_virt(FIX_SHARED_INFO);
  24.285 +	set_fixmap(FIX_SHARED_INFO, xen_start_info->shared_info);
  24.286 +	HYPERVISOR_shared_info = (shared_info_t *)fix_to_virt(FIX_SHARED_INFO);
  24.287  
  24.288 -        memset(empty_zero_page, 0, sizeof(empty_zero_page));
  24.289 +	memset(empty_zero_page, 0, sizeof(empty_zero_page));
  24.290  	init_mm.context.pinned = 1;
  24.291  
  24.292  #ifdef CONFIG_XEN_PHYSDEV_ACCESS
  24.293  	{
  24.294  		int i;
  24.295 -        /* Setup mapping of lower 1st MB */
  24.296 +		/* Setup mapping of lower 1st MB */
  24.297  		for (i = 0; i < NR_FIX_ISAMAPS; i++)
  24.298  			if (xen_start_info->flags & SIF_PRIVILEGED)
  24.299  				set_fixmap(FIX_ISAMAP_BEGIN - i, i * PAGE_SIZE);
  24.300 @@ -701,7 +690,7 @@ void __init clear_kernel_mapping(unsigne
  24.301  
  24.302  static inline int page_is_ram (unsigned long pagenr)
  24.303  {
  24.304 -        return 1;
  24.305 +	return 1;
  24.306  }
  24.307  
  24.308  static struct kcore_list kcore_mem, kcore_vmalloc, kcore_kernel, kcore_modules,
  24.309 @@ -790,10 +779,10 @@ extern char __initdata_begin[], __initda
  24.310  void free_initmem(void)
  24.311  {
  24.312  #ifdef __DO_LATER__
  24.313 -        /*
  24.314 -         * Some pages can be pinned, but some are not. Unpinning such pages 
  24.315 -         * triggers BUG(). 
  24.316 -         */
  24.317 +	/*
  24.318 +	 * Some pages can be pinned, but some are not. Unpinning such pages 
  24.319 +	 * triggers BUG(). 
  24.320 +	 */
  24.321  	unsigned long addr;
  24.322  
  24.323  	addr = (unsigned long)(&__init_begin);
  24.324 @@ -801,12 +790,12 @@ void free_initmem(void)
  24.325  		ClearPageReserved(virt_to_page(addr));
  24.326  		set_page_count(virt_to_page(addr), 1);
  24.327  		memset((void *)(addr & ~(PAGE_SIZE-1)), 0xcc, PAGE_SIZE); 
  24.328 -                xen_pte_unpin(__pa(addr));
  24.329 -                make_page_writable(__va(__pa(addr)));
  24.330 -                /*
  24.331 -                 * Make pages from __PAGE_OFFSET address as well
  24.332 -                 */
  24.333 -                make_page_writable((void *)addr);
  24.334 +		xen_pte_unpin(__pa(addr));
  24.335 +		make_page_writable(__va(__pa(addr)));
  24.336 +		/*
  24.337 +		 * Make pages from __PAGE_OFFSET address as well
  24.338 +		 */
  24.339 +		make_page_writable((void *)addr);
  24.340  		free_page(addr);
  24.341  		totalram_pages++;
  24.342  	}
  24.343 @@ -856,7 +845,7 @@ int kern_addr_valid(unsigned long addr)
  24.344  	if (pgd_none(*pgd))
  24.345  		return 0;
  24.346  
  24.347 -        pud = pud_offset_k(addr);
  24.348 +	pud = pud_offset_k(addr);
  24.349  	if (pud_none(*pud))
  24.350  		return 0; 
  24.351  
    25.1 --- a/linux-2.6-xen-sparse/drivers/xen/balloon/balloon.c	Fri Jan 13 10:38:44 2006 -0600
    25.2 +++ b/linux-2.6-xen-sparse/drivers/xen/balloon/balloon.c	Fri Jan 13 14:12:24 2006 -0600
    25.3 @@ -354,7 +354,7 @@ static void watch_target(struct xenbus_w
    25.4  	unsigned long long new_target;
    25.5  	int err;
    25.6  
    25.7 -	err = xenbus_scanf(NULL, "memory", "target", "%llu", &new_target);
    25.8 +	err = xenbus_scanf(XBT_NULL, "memory", "target", "%llu", &new_target);
    25.9  	if (err != 1) {
   25.10  		/* This is ok (for domain0 at least) - so just return */
   25.11  		return;
    26.1 --- a/linux-2.6-xen-sparse/drivers/xen/blkback/blkback.c	Fri Jan 13 10:38:44 2006 -0600
    26.2 +++ b/linux-2.6-xen-sparse/drivers/xen/blkback/blkback.c	Fri Jan 13 14:12:24 2006 -0600
    26.3 @@ -540,6 +540,9 @@ static int __init blkif_init(void)
    26.4  	pending_vaddrs        = kmalloc(sizeof(pending_vaddrs[0]) *
    26.5  					mmap_pages, GFP_KERNEL);
    26.6  	if (!pending_reqs || !pending_grant_handles || !pending_vaddrs) {
    26.7 +		kfree(pending_reqs);
    26.8 +		kfree(pending_grant_handles);
    26.9 +		kfree(pending_vaddrs);
   26.10  		printk("%s: out of memory\n", __FUNCTION__);
   26.11  		return -1;
   26.12  	}
    27.1 --- a/linux-2.6-xen-sparse/drivers/xen/blkback/xenbus.c	Fri Jan 13 10:38:44 2006 -0600
    27.2 +++ b/linux-2.6-xen-sparse/drivers/xen/blkback/xenbus.c	Fri Jan 13 14:12:24 2006 -0600
    27.3 @@ -145,7 +145,7 @@ static int blkback_probe(struct xenbus_d
    27.4  	if (err)
    27.5  		goto fail;
    27.6  
    27.7 -	err = xenbus_switch_state(dev, NULL, XenbusStateInitWait);
    27.8 +	err = xenbus_switch_state(dev, XBT_NULL, XenbusStateInitWait);
    27.9  	if (err)
   27.10  		goto fail;
   27.11  
   27.12 @@ -175,7 +175,7 @@ static void backend_changed(struct xenbu
   27.13  
   27.14  	DPRINTK("");
   27.15  
   27.16 -	err = xenbus_scanf(NULL, dev->nodename, "physical-device", "%x:%x",
   27.17 +	err = xenbus_scanf(XBT_NULL, dev->nodename, "physical-device", "%x:%x",
   27.18  			   &major, &minor);
   27.19  	if (XENBUS_EXIST_ERR(err)) {
   27.20  		/* Since this watch will fire once immediately after it is
   27.21 @@ -197,7 +197,7 @@ static void backend_changed(struct xenbu
   27.22  		return;
   27.23  	}
   27.24  
   27.25 -	be->mode = xenbus_read(NULL, dev->nodename, "mode", NULL);
   27.26 +	be->mode = xenbus_read(XBT_NULL, dev->nodename, "mode", NULL);
   27.27  	if (IS_ERR(be->mode)) {
   27.28  		err = PTR_ERR(be->mode);
   27.29  		be->mode = NULL;
   27.30 @@ -268,7 +268,7 @@ static void frontend_changed(struct xenb
   27.31  		break;
   27.32  
   27.33  	case XenbusStateClosing:
   27.34 -		xenbus_switch_state(dev, NULL, XenbusStateClosing);
   27.35 +		xenbus_switch_state(dev, XBT_NULL, XenbusStateClosing);
   27.36  		break;
   27.37  
   27.38  	case XenbusStateClosed:
   27.39 @@ -302,7 +302,7 @@ static void maybe_connect(struct backend
   27.40   */
   27.41  static void connect(struct backend_info *be)
   27.42  {
   27.43 -	struct xenbus_transaction *xbt;
   27.44 +	xenbus_transaction_t xbt;
   27.45  	int err;
   27.46  	struct xenbus_device *dev = be->dev;
   27.47  
   27.48 @@ -310,10 +310,9 @@ static void connect(struct backend_info 
   27.49  
   27.50  	/* Supply the information about the device the frontend needs */
   27.51  again:
   27.52 -	xbt = xenbus_transaction_start();
   27.53 +	err = xenbus_transaction_start(&xbt);
   27.54  
   27.55 -	if (IS_ERR(xbt)) {
   27.56 -		err = PTR_ERR(xbt);
   27.57 +	if (err) {
   27.58  		xenbus_dev_fatal(dev, err, "starting transaction");
   27.59  		return;
   27.60  	}
   27.61 @@ -366,7 +365,7 @@ static int connect_ring(struct backend_i
   27.62  
   27.63  	DPRINTK("%s", dev->otherend);
   27.64  
   27.65 -	err = xenbus_gather(NULL, dev->otherend, "ring-ref", "%lu", &ring_ref,
   27.66 +	err = xenbus_gather(XBT_NULL, dev->otherend, "ring-ref", "%lu", &ring_ref,
   27.67  			    "event-channel", "%u", &evtchn, NULL);
   27.68  	if (err) {
   27.69  		xenbus_dev_fatal(dev, err,
    28.1 --- a/linux-2.6-xen-sparse/drivers/xen/blkfront/blkfront.c	Fri Jan 13 10:38:44 2006 -0600
    28.2 +++ b/linux-2.6-xen-sparse/drivers/xen/blkfront/blkfront.c	Fri Jan 13 14:12:24 2006 -0600
    28.3 @@ -91,7 +91,7 @@ static int blkfront_probe(struct xenbus_
    28.4  	struct blkfront_info *info;
    28.5  
    28.6  	/* FIXME: Use dynamic device id if this is not set. */
    28.7 -	err = xenbus_scanf(NULL, dev->nodename,
    28.8 +	err = xenbus_scanf(XBT_NULL, dev->nodename,
    28.9  			   "virtual-device", "%i", &vdevice);
   28.10  	if (err != 1) {
   28.11  		xenbus_dev_fatal(dev, err, "reading virtual-device");
   28.12 @@ -161,7 +161,7 @@ static int talk_to_backend(struct xenbus
   28.13  			   struct blkfront_info *info)
   28.14  {
   28.15  	const char *message = NULL;
   28.16 -	struct xenbus_transaction *xbt;
   28.17 +	xenbus_transaction_t xbt;
   28.18  	int err;
   28.19  
   28.20  	/* Create shared ring, alloc event channel. */
   28.21 @@ -170,8 +170,8 @@ static int talk_to_backend(struct xenbus
   28.22  		goto out;
   28.23  
   28.24  again:
   28.25 -	xbt = xenbus_transaction_start();
   28.26 -	if (IS_ERR(xbt)) {
   28.27 +	err = xenbus_transaction_start(&xbt);
   28.28 +	if (err) {
   28.29  		xenbus_dev_fatal(dev, err, "starting transaction");
   28.30  		goto destroy_blkring;
   28.31  	}
   28.32 @@ -319,7 +319,7 @@ static void connect(struct blkfront_info
   28.33  
   28.34  	DPRINTK("blkfront.c:connect:%s.\n", info->xbdev->otherend);
   28.35  
   28.36 -	err = xenbus_gather(NULL, info->xbdev->otherend,
   28.37 +	err = xenbus_gather(XBT_NULL, info->xbdev->otherend,
   28.38  			    "sectors", "%lu", &sectors,
   28.39  			    "info", "%u", &binfo,
   28.40  			    "sector-size", "%lu", &sector_size,
   28.41 @@ -338,7 +338,7 @@ static void connect(struct blkfront_info
   28.42  		return;
   28.43  	}
   28.44  
   28.45 -	(void)xenbus_switch_state(info->xbdev, NULL, XenbusStateConnected); 
   28.46 +	(void)xenbus_switch_state(info->xbdev, XBT_NULL, XenbusStateConnected); 
   28.47  
   28.48  	/* Kick pending requests. */
   28.49  	spin_lock_irq(&blkif_io_lock);
   28.50 @@ -367,7 +367,7 @@ static void blkfront_closing(struct xenb
   28.51  		info->mi = NULL;
   28.52  	}
   28.53  
   28.54 -	xenbus_switch_state(dev, NULL, XenbusStateClosed);
   28.55 +	xenbus_switch_state(dev, XBT_NULL, XenbusStateClosed);
   28.56  }
   28.57  
   28.58  
   28.59 @@ -775,7 +775,7 @@ static void blkif_recover(struct blkfron
   28.60  
   28.61  	kfree(copy);
   28.62  
   28.63 -	(void)xenbus_switch_state(info->xbdev, NULL, XenbusStateConnected); 
   28.64 +	(void)xenbus_switch_state(info->xbdev, XBT_NULL, XenbusStateConnected); 
   28.65  	
   28.66  	/* Now safe for us to use the shared ring */
   28.67  	spin_lock_irq(&blkif_io_lock);
    29.1 --- a/linux-2.6-xen-sparse/drivers/xen/blktap/blktap.c	Fri Jan 13 10:38:44 2006 -0600
    29.2 +++ b/linux-2.6-xen-sparse/drivers/xen/blktap/blktap.c	Fri Jan 13 14:12:24 2006 -0600
    29.3 @@ -208,7 +208,7 @@ static struct page *blktap_nopage(struct
    29.4  }
    29.5  
    29.6  struct vm_operations_struct blktap_vm_ops = {
    29.7 -	nopage:   blktap_nopage,
    29.8 +	.nopage = blktap_nopage,
    29.9  };
   29.10  
   29.11  /******************************************************************
   29.12 @@ -225,7 +225,7 @@ static int blktap_open(struct inode *ino
   29.13  	/* Allocate the fe ring. */
   29.14  	sring = (blkif_sring_t *)get_zeroed_page(GFP_KERNEL);
   29.15  	if (sring == NULL)
   29.16 -		goto fail_nomem;
   29.17 +		return -ENOMEM;
   29.18  
   29.19  	SetPageReserved(virt_to_page(sring));
   29.20      
   29.21 @@ -233,9 +233,6 @@ static int blktap_open(struct inode *ino
   29.22  	FRONT_RING_INIT(&blktap_ufe_ring, sring, PAGE_SIZE);
   29.23  
   29.24  	return 0;
   29.25 -
   29.26 - fail_nomem:
   29.27 -	return -ENOMEM;
   29.28  }
   29.29  
   29.30  static int blktap_release(struct inode *inode, struct file *filp)
   29.31 @@ -391,12 +388,12 @@ void blktap_kick_user(void)
   29.32  }
   29.33  
   29.34  static struct file_operations blktap_fops = {
   29.35 -	owner:    THIS_MODULE,
   29.36 -	poll:     blktap_poll,
   29.37 -	ioctl:    blktap_ioctl,
   29.38 -	open:     blktap_open,
   29.39 -	release:  blktap_release,
   29.40 -	mmap:     blktap_mmap,
   29.41 +	.owner   = THIS_MODULE,
   29.42 +	.poll    = blktap_poll,
   29.43 +	.ioctl   = blktap_ioctl,
   29.44 +	.open    = blktap_open,
   29.45 +	.release = blktap_release,
   29.46 +	.mmap    = blktap_mmap,
   29.47  };
   29.48  
   29.49  
    30.1 --- a/linux-2.6-xen-sparse/drivers/xen/console/console.c	Fri Jan 13 10:38:44 2006 -0600
    30.2 +++ b/linux-2.6-xen-sparse/drivers/xen/console/console.c	Fri Jan 13 14:12:24 2006 -0600
    30.3 @@ -314,39 +314,31 @@ static void __xencons_tx_flush(void)
    30.4  {
    30.5  	int sent, sz, work_done = 0;
    30.6  
    30.7 -	if (xen_start_info->flags & SIF_INITDOMAIN) {
    30.8 -		if (x_char) {
    30.9 +	if (x_char) {
   30.10 +		if (xen_start_info->flags & SIF_INITDOMAIN)
   30.11  			kcons_write_dom0(NULL, &x_char, 1);
   30.12 -			x_char = 0;
   30.13 -			work_done = 1;
   30.14 -		}
   30.15 +		else
   30.16 +			while (x_char)
   30.17 +				if (xencons_ring_send(&x_char, 1) == 1)
   30.18 +					break;
   30.19 +		x_char = 0;
   30.20 +		work_done = 1;
   30.21 +	}
   30.22  
   30.23 -		while (wc != wp) {
   30.24 -			sz = wp - wc;
   30.25 -			if (sz > (wbuf_size - WBUF_MASK(wc)))
   30.26 -				sz = wbuf_size - WBUF_MASK(wc);
   30.27 +	while (wc != wp) {
   30.28 +		sz = wp - wc;
   30.29 +		if (sz > (wbuf_size - WBUF_MASK(wc)))
   30.30 +			sz = wbuf_size - WBUF_MASK(wc);
   30.31 +		if (xen_start_info->flags & SIF_INITDOMAIN) {
   30.32  			kcons_write_dom0(NULL, &wbuf[WBUF_MASK(wc)], sz);
   30.33  			wc += sz;
   30.34 -			work_done = 1;
   30.35 -		}
   30.36 -	} else {
   30.37 -		while (x_char) {
   30.38 -			if (xencons_ring_send(&x_char, 1) == 1) {
   30.39 -				x_char = 0;
   30.40 -				work_done = 1;
   30.41 -			}
   30.42 -		}
   30.43 -
   30.44 -		while (wc != wp) {
   30.45 -			sz = wp - wc;
   30.46 -			if (sz > (wbuf_size - WBUF_MASK(wc)))
   30.47 -				sz = wbuf_size - WBUF_MASK(wc);
   30.48 +		} else {
   30.49  			sent = xencons_ring_send(&wbuf[WBUF_MASK(wc)], sz);
   30.50  			if (sent == 0)
   30.51  				break;
   30.52  			wc += sent;
   30.53 -			work_done = 1;
   30.54  		}
   30.55 +		work_done = 1;
   30.56  	}
   30.57  
   30.58  	if (work_done && (xencons_tty != NULL)) {
    31.1 --- a/linux-2.6-xen-sparse/drivers/xen/net_driver_util.c	Fri Jan 13 10:38:44 2006 -0600
    31.2 +++ b/linux-2.6-xen-sparse/drivers/xen/net_driver_util.c	Fri Jan 13 14:12:24 2006 -0600
    31.3 @@ -38,7 +38,7 @@ int xen_net_read_mac(struct xenbus_devic
    31.4  	char *s;
    31.5  	int i;
    31.6  	char *e;
    31.7 -	char *macstr = xenbus_read(NULL, dev->nodename, "mac", NULL);
    31.8 +	char *macstr = xenbus_read(XBT_NULL, dev->nodename, "mac", NULL);
    31.9  	if (IS_ERR(macstr)) {
   31.10  		return PTR_ERR(macstr);
   31.11  	}
    32.1 --- a/linux-2.6-xen-sparse/drivers/xen/netback/interface.c	Fri Jan 13 10:38:44 2006 -0600
    32.2 +++ b/linux-2.6-xen-sparse/drivers/xen/netback/interface.c	Fri Jan 13 14:12:24 2006 -0600
    32.3 @@ -183,7 +183,7 @@ static void unmap_frontend_pages(netif_t
    32.4  int netif_map(netif_t *netif, unsigned long tx_ring_ref,
    32.5  	      unsigned long rx_ring_ref, unsigned int evtchn)
    32.6  {
    32.7 -	int err;
    32.8 +	int err = -ENOMEM;
    32.9  	netif_tx_sring_t *txs;
   32.10  	netif_rx_sring_t *rxs;
   32.11  	evtchn_op_t op = {
   32.12 @@ -199,25 +199,16 @@ int netif_map(netif_t *netif, unsigned l
   32.13  	if (netif->tx_comms_area == NULL)
   32.14  		return -ENOMEM;
   32.15  	netif->rx_comms_area = alloc_vm_area(PAGE_SIZE);
   32.16 -	if (netif->rx_comms_area == NULL) {
   32.17 -		free_vm_area(netif->tx_comms_area);
   32.18 -		return -ENOMEM;
   32.19 -	}
   32.20 +	if (netif->rx_comms_area == NULL)
   32.21 +		goto err_rx;
   32.22  
   32.23  	err = map_frontend_pages(netif, tx_ring_ref, rx_ring_ref);
   32.24 -	if (err) {
   32.25 -		free_vm_area(netif->tx_comms_area);
   32.26 -		free_vm_area(netif->rx_comms_area);
   32.27 -		return err;
   32.28 -	}
   32.29 +	if (err)
   32.30 +		goto err_map;
   32.31  
   32.32  	err = HYPERVISOR_event_channel_op(&op);
   32.33 -	if (err) {
   32.34 -		unmap_frontend_pages(netif);
   32.35 -		free_vm_area(netif->tx_comms_area);
   32.36 -		free_vm_area(netif->rx_comms_area);
   32.37 -		return err;
   32.38 -	}
   32.39 +	if (err)
   32.40 +		goto err_hypervisor;
   32.41  
   32.42  	netif->evtchn = op.u.bind_interdomain.local_port;
   32.43  
   32.44 @@ -245,6 +236,13 @@ int netif_map(netif_t *netif, unsigned l
   32.45  	rtnl_unlock();
   32.46  
   32.47  	return 0;
   32.48 +err_hypervisor:
   32.49 +	unmap_frontend_pages(netif);
   32.50 +err_map:
   32.51 +	free_vm_area(netif->rx_comms_area);
   32.52 +err_rx:
   32.53 +	free_vm_area(netif->tx_comms_area);
   32.54 +	return err;
   32.55  }
   32.56  
   32.57  static void free_netif_callback(void *arg)
    33.1 --- a/linux-2.6-xen-sparse/drivers/xen/netback/netback.c	Fri Jan 13 10:38:44 2006 -0600
    33.2 +++ b/linux-2.6-xen-sparse/drivers/xen/netback/netback.c	Fri Jan 13 14:12:24 2006 -0600
    33.3 @@ -14,6 +14,7 @@
    33.4  #include <asm-xen/balloon.h>
    33.5  #include <asm-xen/xen-public/memory.h>
    33.6  
    33.7 +/*#define NETBE_DEBUG_INTERRUPT*/
    33.8  
    33.9  static void netif_idx_release(u16 pending_idx);
   33.10  static void netif_page_release(struct page *page);
   33.11 @@ -727,6 +728,7 @@ static int make_rx_response(netif_t *net
   33.12  	return notify;
   33.13  }
   33.14  
   33.15 +#ifdef NETBE_DEBUG_INTERRUPT
   33.16  static irqreturn_t netif_be_dbg(int irq, void *dev_id, struct pt_regs *regs)
   33.17  {
   33.18  	struct list_head *ent;
   33.19 @@ -758,6 +760,7 @@ static irqreturn_t netif_be_dbg(int irq,
   33.20  
   33.21  	return IRQ_HANDLED;
   33.22  }
   33.23 +#endif
   33.24  
   33.25  static int __init netback_init(void)
   33.26  {
   33.27 @@ -794,6 +797,7 @@ static int __init netback_init(void)
   33.28  
   33.29  	netif_xenbus_init();
   33.30  
   33.31 +#ifdef NETBE_DEBUG_INTERRUPT
   33.32  	(void)bind_virq_to_irqhandler(
   33.33  		VIRQ_DEBUG,
   33.34  		0,
   33.35 @@ -801,6 +805,7 @@ static int __init netback_init(void)
   33.36  		SA_SHIRQ, 
   33.37  		"net-be-dbg",
   33.38  		&netif_be_dbg);
   33.39 +#endif
   33.40  
   33.41  	return 0;
   33.42  }
    34.1 --- a/linux-2.6-xen-sparse/drivers/xen/netback/xenbus.c	Fri Jan 13 10:38:44 2006 -0600
    34.2 +++ b/linux-2.6-xen-sparse/drivers/xen/netback/xenbus.c	Fri Jan 13 14:12:24 2006 -0600
    34.3 @@ -93,7 +93,7 @@ static int netback_probe(struct xenbus_d
    34.4  	if (err)
    34.5  		goto fail;
    34.6  
    34.7 -	err = xenbus_switch_state(dev, NULL, XenbusStateInitWait);
    34.8 +	err = xenbus_switch_state(dev, XBT_NULL, XenbusStateInitWait);
    34.9  	if (err) {
   34.10  		goto fail;
   34.11  	}
   34.12 @@ -122,7 +122,7 @@ static int netback_hotplug(struct xenbus
   34.13  
   34.14  	DPRINTK("netback_hotplug");
   34.15  
   34.16 -	val = xenbus_read(NULL, xdev->nodename, "script", NULL);
   34.17 +	val = xenbus_read(XBT_NULL, xdev->nodename, "script", NULL);
   34.18  	if (IS_ERR(val)) {
   34.19  		int err = PTR_ERR(val);
   34.20  		xenbus_dev_fatal(xdev, err, "reading script");
   34.21 @@ -160,7 +160,7 @@ static void backend_changed(struct xenbu
   34.22  
   34.23  	DPRINTK("");
   34.24  
   34.25 -	err = xenbus_scanf(NULL, dev->nodename, "handle", "%li", &handle);
   34.26 +	err = xenbus_scanf(XBT_NULL, dev->nodename, "handle", "%li", &handle);
   34.27  	if (XENBUS_EXIST_ERR(err)) {
   34.28  		/* Since this watch will fire once immediately after it is
   34.29  		   registered, we expect this.  Ignore it, and wait for the
   34.30 @@ -212,7 +212,7 @@ static void frontend_changed(struct xenb
   34.31  		break;
   34.32  
   34.33  	case XenbusStateClosing:
   34.34 -		xenbus_switch_state(dev, NULL, XenbusStateClosing);
   34.35 +		xenbus_switch_state(dev, XBT_NULL, XenbusStateClosing);
   34.36  		break;
   34.37  
   34.38  	case XenbusStateClosed:
   34.39 @@ -256,7 +256,7 @@ static void connect(struct backend_info 
   34.40  		return;
   34.41  	}
   34.42  
   34.43 -	xenbus_switch_state(dev, NULL, XenbusStateConnected);
   34.44 +	xenbus_switch_state(dev, XBT_NULL, XenbusStateConnected);
   34.45  }
   34.46  
   34.47  
   34.48 @@ -269,7 +269,7 @@ static int connect_rings(struct backend_
   34.49  
   34.50  	DPRINTK("");
   34.51  
   34.52 -	err = xenbus_gather(NULL, dev->otherend,
   34.53 +	err = xenbus_gather(XBT_NULL, dev->otherend,
   34.54  			    "tx-ring-ref", "%lu", &tx_ring_ref,
   34.55  			    "rx-ring-ref", "%lu", &rx_ring_ref,
   34.56  			    "event-channel", "%u", &evtchn, NULL);
    35.1 --- a/linux-2.6-xen-sparse/drivers/xen/netfront/netfront.c	Fri Jan 13 10:38:44 2006 -0600
    35.2 +++ b/linux-2.6-xen-sparse/drivers/xen/netfront/netfront.c	Fri Jan 13 14:12:24 2006 -0600
    35.3 @@ -212,7 +212,7 @@ static int netfront_probe(struct xenbus_
    35.4  	struct netfront_info *info;
    35.5  	unsigned int handle;
    35.6  
    35.7 -	err = xenbus_scanf(NULL, dev->nodename, "handle", "%u", &handle);
    35.8 +	err = xenbus_scanf(XBT_NULL, dev->nodename, "handle", "%u", &handle);
    35.9  	if (err != 1) {
   35.10  		xenbus_dev_fatal(dev, err, "reading handle");
   35.11  		return err;
   35.12 @@ -260,7 +260,7 @@ static int talk_to_backend(struct xenbus
   35.13  			   struct netfront_info *info)
   35.14  {
   35.15  	const char *message;
   35.16 -	struct xenbus_transaction *xbt;
   35.17 +	xenbus_transaction_t xbt;
   35.18  	int err;
   35.19  
   35.20  	err = xen_net_read_mac(dev, info->mac);
   35.21 @@ -275,8 +275,8 @@ static int talk_to_backend(struct xenbus
   35.22  		goto out;
   35.23  
   35.24  again:
   35.25 -	xbt = xenbus_transaction_start();
   35.26 -	if (IS_ERR(xbt)) {
   35.27 +	err = xenbus_transaction_start(&xbt);
   35.28 +	if (err) {
   35.29  		xenbus_dev_fatal(dev, err, "starting transaction");
   35.30  		goto destroy_ring;
   35.31  	}
   35.32 @@ -1199,7 +1199,7 @@ static void netfront_closing(struct xenb
   35.33  
   35.34  	close_netdev(info);
   35.35  
   35.36 -	xenbus_switch_state(dev, NULL, XenbusStateClosed);
   35.37 +	xenbus_switch_state(dev, XBT_NULL, XenbusStateClosed);
   35.38  }
   35.39  
   35.40  
   35.41 @@ -1218,22 +1218,14 @@ static int netfront_remove(struct xenbus
   35.42  
   35.43  static void close_netdev(struct netfront_info *info)
   35.44  {
   35.45 -	/* Stop old i/f to prevent errors whilst we rebuild the state. */
   35.46 -	spin_lock_irq(&info->tx_lock);
   35.47 -	spin_lock(&info->rx_lock);
   35.48 +	spin_lock_irq(&info->netdev->xmit_lock);
   35.49  	netif_stop_queue(info->netdev);
   35.50 -	/* info->backend_state = BEST_DISCONNECTED; */
   35.51 -	spin_unlock(&info->rx_lock);
   35.52 -	spin_unlock_irq(&info->tx_lock);
   35.53 +	spin_unlock_irq(&info->netdev->xmit_lock);
   35.54  
   35.55  #ifdef CONFIG_PROC_FS
   35.56  	xennet_proc_delif(info->netdev);
   35.57  #endif
   35.58  
   35.59 -	if (info->irq)
   35.60 -		unbind_from_irqhandler(info->irq, info->netdev);
   35.61 -	info->evtchn = info->irq = 0;
   35.62 -
   35.63  	del_timer_sync(&info->rx_refill_timer);
   35.64  
   35.65  	unregister_netdev(info->netdev);
   35.66 @@ -1242,6 +1234,17 @@ static void close_netdev(struct netfront
   35.67  
   35.68  static void netif_disconnect_backend(struct netfront_info *info)
   35.69  {
   35.70 +	/* Stop old i/f to prevent errors whilst we rebuild the state. */
   35.71 +	spin_lock_irq(&info->tx_lock);
   35.72 +	spin_lock(&info->rx_lock);
   35.73 +	info->backend_state = BEST_DISCONNECTED;
   35.74 +	spin_unlock(&info->rx_lock);
   35.75 +	spin_unlock_irq(&info->tx_lock);
   35.76 +
   35.77 +	if (info->irq)
   35.78 +		unbind_from_irqhandler(info->irq, info->netdev);
   35.79 +	info->evtchn = info->irq = 0;
   35.80 +
   35.81  	end_access(info->tx_ring_ref, info->tx.sring);
   35.82  	end_access(info->rx_ring_ref, info->rx.sring);
   35.83  	info->tx_ring_ref = GRANT_INVALID_REF;
    36.1 --- a/linux-2.6-xen-sparse/drivers/xen/tpmback/xenbus.c	Fri Jan 13 10:38:44 2006 -0600
    36.2 +++ b/linux-2.6-xen-sparse/drivers/xen/tpmback/xenbus.c	Fri Jan 13 14:12:24 2006 -0600
    36.3 @@ -89,7 +89,7 @@ static int tpmback_probe(struct xenbus_d
    36.4  		goto fail;
    36.5  	}
    36.6  
    36.7 -	err = xenbus_switch_state(dev, NULL, XenbusStateInitWait);
    36.8 +	err = xenbus_switch_state(dev, XBT_NULL, XenbusStateInitWait);
    36.9  	if (err) {
   36.10  		goto fail;
   36.11  	}
   36.12 @@ -109,7 +109,7 @@ static void backend_changed(struct xenbu
   36.13  		= container_of(watch, struct backend_info, backend_watch);
   36.14  	struct xenbus_device *dev = be->dev;
   36.15  
   36.16 -	err = xenbus_scanf(NULL, dev->nodename,
   36.17 +	err = xenbus_scanf(XBT_NULL, dev->nodename,
   36.18  	                   "instance","%li", &instance);
   36.19  	if (XENBUS_EXIST_ERR(err)) {
   36.20  		return;
   36.21 @@ -177,7 +177,7 @@ static void frontend_changed(struct xenb
   36.22  		break;
   36.23  
   36.24  	case XenbusStateClosing:
   36.25 -		xenbus_switch_state(dev, NULL, XenbusStateClosing);
   36.26 +		xenbus_switch_state(dev, XBT_NULL, XenbusStateClosing);
   36.27  		break;
   36.28  
   36.29  	case XenbusStateClosed:
   36.30 @@ -230,15 +230,14 @@ static void maybe_connect(struct backend
   36.31  
   36.32  static void connect(struct backend_info *be)
   36.33  {
   36.34 -	struct xenbus_transaction *xbt;
   36.35 +	xenbus_transaction_t xbt;
   36.36  	int err;
   36.37  	struct xenbus_device *dev = be->dev;
   36.38  	unsigned long ready = 1;
   36.39  
   36.40  again:
   36.41 -	xbt = xenbus_transaction_start();
   36.42 -	if (IS_ERR(xbt)) {
   36.43 -		err = PTR_ERR(xbt);
   36.44 +	err = xenbus_transaction_start(&xbt);
   36.45 +	if (err) {
   36.46  		xenbus_dev_fatal(be->dev, err, "starting transaction");
   36.47  		return;
   36.48  	}
   36.49 @@ -275,7 +274,7 @@ static int connect_ring(struct backend_i
   36.50  	unsigned int evtchn;
   36.51  	int err;
   36.52  
   36.53 -	err = xenbus_gather(NULL, dev->otherend,
   36.54 +	err = xenbus_gather(XBT_NULL, dev->otherend,
   36.55  	                    "ring-ref", "%lu", &ring_ref,
   36.56  			    "event-channel", "%u", &evtchn, NULL);
   36.57  	if (err) {
    37.1 --- a/linux-2.6-xen-sparse/drivers/xen/tpmfront/tpmfront.c	Fri Jan 13 10:38:44 2006 -0600
    37.2 +++ b/linux-2.6-xen-sparse/drivers/xen/tpmfront/tpmfront.c	Fri Jan 13 14:12:24 2006 -0600
    37.3 @@ -271,7 +271,7 @@ static int talk_to_backend(struct xenbus
    37.4  {
    37.5  	const char *message = NULL;
    37.6  	int err;
    37.7 -	struct xenbus_transaction *xbt;
    37.8 +	xenbus_transaction_t xbt;
    37.9  
   37.10  	err = setup_tpmring(dev, info);
   37.11  	if (err) {
   37.12 @@ -280,8 +280,8 @@ static int talk_to_backend(struct xenbus
   37.13  	}
   37.14  
   37.15  again:
   37.16 -	xbt = xenbus_transaction_start();
   37.17 -	if (IS_ERR(xbt)) {
   37.18 +	err = xenbus_transaction_start(&xbt);
   37.19 +	if (err) {
   37.20  		xenbus_dev_fatal(dev, err, "starting transaction");
   37.21  		goto destroy_tpmring;
   37.22  	}
    38.1 --- a/linux-2.6-xen-sparse/drivers/xen/xenbus/xenbus_client.c	Fri Jan 13 10:38:44 2006 -0600
    38.2 +++ b/linux-2.6-xen-sparse/drivers/xen/xenbus/xenbus_client.c	Fri Jan 13 14:12:24 2006 -0600
    38.3 @@ -87,7 +87,7 @@ EXPORT_SYMBOL(xenbus_watch_path2);
    38.4  
    38.5  
    38.6  int xenbus_switch_state(struct xenbus_device *dev,
    38.7 -			struct xenbus_transaction *xbt,
    38.8 +			xenbus_transaction_t xbt,
    38.9  			XenbusState state)
   38.10  {
   38.11  	/* We check whether the state is currently set to the given value, and
   38.12 @@ -152,7 +152,7 @@ void _dev_error(struct xenbus_device *de
   38.13  		goto fail;
   38.14  	}
   38.15  
   38.16 -	if (xenbus_write(NULL, path_buffer, "error", printf_buffer) != 0) {
   38.17 +	if (xenbus_write(XBT_NULL, path_buffer, "error", printf_buffer) != 0) {
   38.18  		printk("xenbus: failed to write error node for %s (%s)\n",
   38.19  		       dev->nodename, printf_buffer);
   38.20  		goto fail;
   38.21 @@ -187,7 +187,7 @@ void xenbus_dev_fatal(struct xenbus_devi
   38.22  	_dev_error(dev, err, fmt, ap);
   38.23  	va_end(ap);
   38.24  	
   38.25 -	xenbus_switch_state(dev, NULL, XenbusStateClosing);
   38.26 +	xenbus_switch_state(dev, XBT_NULL, XenbusStateClosing);
   38.27  }
   38.28  EXPORT_SYMBOL(xenbus_dev_fatal);
   38.29  
   38.30 @@ -223,7 +223,7 @@ XenbusState xenbus_read_driver_state(con
   38.31  {
   38.32  	XenbusState result;
   38.33  
   38.34 -	int err = xenbus_gather(NULL, path, "state", "%d", &result, NULL);
   38.35 +	int err = xenbus_gather(XBT_NULL, path, "state", "%d", &result, NULL);
   38.36  	if (err)
   38.37  		result = XenbusStateClosed;
   38.38  
    39.1 --- a/linux-2.6-xen-sparse/drivers/xen/xenbus/xenbus_dev.c	Fri Jan 13 10:38:44 2006 -0600
    39.2 +++ b/linux-2.6-xen-sparse/drivers/xen/xenbus/xenbus_dev.c	Fri Jan 13 14:12:24 2006 -0600
    39.3 @@ -47,7 +47,7 @@
    39.4  
    39.5  struct xenbus_dev_transaction {
    39.6  	struct list_head list;
    39.7 -	struct xenbus_transaction *handle;
    39.8 +	xenbus_transaction_t handle;
    39.9  };
   39.10  
   39.11  struct xenbus_dev_data {
   39.12 @@ -109,9 +109,8 @@ static ssize_t xenbus_dev_write(struct f
   39.13  				size_t len, loff_t *ppos)
   39.14  {
   39.15  	struct xenbus_dev_data *u = filp->private_data;
   39.16 -	struct xenbus_dev_transaction *trans;
   39.17 +	struct xenbus_dev_transaction *trans = NULL;
   39.18  	void *reply;
   39.19 -	int err = 0;
   39.20  
   39.21  	if ((len + u->len) > sizeof(u->u.buffer))
   39.22  		return -EINVAL;
   39.23 @@ -135,42 +134,40 @@ static ssize_t xenbus_dev_write(struct f
   39.24  	case XS_MKDIR:
   39.25  	case XS_RM:
   39.26  	case XS_SET_PERMS:
   39.27 +		if (u->u.msg.type == XS_TRANSACTION_START) {
   39.28 +			trans = kmalloc(sizeof(*trans), GFP_KERNEL);
   39.29 +			if (!trans)
   39.30 +				return -ENOMEM;
   39.31 +		}
   39.32 +
   39.33  		reply = xenbus_dev_request_and_reply(&u->u.msg);
   39.34  		if (IS_ERR(reply)) {
   39.35 -			err = PTR_ERR(reply);
   39.36 -		} else {
   39.37 -			if (u->u.msg.type == XS_TRANSACTION_START) {
   39.38 -				trans = kmalloc(sizeof(*trans), GFP_KERNEL);
   39.39 -				trans->handle = (struct xenbus_transaction *)
   39.40 -					simple_strtoul(reply, NULL, 0);
   39.41 -				list_add(&trans->list, &u->transactions);
   39.42 -			} else if (u->u.msg.type == XS_TRANSACTION_END) {
   39.43 -				list_for_each_entry(trans, &u->transactions,
   39.44 -						    list)
   39.45 -					if ((unsigned long)trans->handle ==
   39.46 -					    (unsigned long)u->u.msg.tx_id)
   39.47 -						break;
   39.48 -				BUG_ON(&trans->list == &u->transactions);
   39.49 -				list_del(&trans->list);
   39.50 -				kfree(trans);
   39.51 -			}
   39.52 -			queue_reply(u, (char *)&u->u.msg, sizeof(u->u.msg));
   39.53 -			queue_reply(u, (char *)reply, u->u.msg.len);
   39.54 -			kfree(reply);
   39.55 +			kfree(trans);
   39.56 +			return PTR_ERR(reply);
   39.57  		}
   39.58 +
   39.59 +		if (u->u.msg.type == XS_TRANSACTION_START) {
   39.60 +			trans->handle = simple_strtoul(reply, NULL, 0);
   39.61 +			list_add(&trans->list, &u->transactions);
   39.62 +		} else if (u->u.msg.type == XS_TRANSACTION_END) {
   39.63 +			list_for_each_entry(trans, &u->transactions, list)
   39.64 +				if (trans->handle == u->u.msg.tx_id)
   39.65 +					break;
   39.66 +			BUG_ON(&trans->list == &u->transactions);
   39.67 +			list_del(&trans->list);
   39.68 +			kfree(trans);
   39.69 +		}
   39.70 +		queue_reply(u, (char *)&u->u.msg, sizeof(u->u.msg));
   39.71 +		queue_reply(u, (char *)reply, u->u.msg.len);
   39.72 +		kfree(reply);
   39.73  		break;
   39.74  
   39.75  	default:
   39.76 -		err = -EINVAL;
   39.77 -		break;
   39.78 +		return -EINVAL;
   39.79  	}
   39.80  
   39.81 -	if (err == 0) {
   39.82 -		u->len = 0;
   39.83 -		err = len;
   39.84 -	}
   39.85 -
   39.86 -	return err;
   39.87 +	u->len = 0;
   39.88 +	return len;
   39.89  }
   39.90  
   39.91  static int xenbus_dev_open(struct inode *inode, struct file *filp)
    40.1 --- a/linux-2.6-xen-sparse/drivers/xen/xenbus/xenbus_probe.c	Fri Jan 13 10:38:44 2006 -0600
    40.2 +++ b/linux-2.6-xen-sparse/drivers/xen/xenbus/xenbus_probe.c	Fri Jan 13 14:12:24 2006 -0600
    40.3 @@ -115,7 +115,7 @@ static int frontend_bus_id(char bus_id[B
    40.4  static int read_otherend_details(struct xenbus_device *xendev,
    40.5  				 char *id_node, char *path_node)
    40.6  {
    40.7 -	int err = xenbus_gather(NULL, xendev->nodename,
    40.8 +	int err = xenbus_gather(XBT_NULL, xendev->nodename,
    40.9  				id_node, "%i", &xendev->otherend_id,
   40.10  				path_node, NULL, &xendev->otherend,
   40.11  				NULL);
   40.12 @@ -126,7 +126,7 @@ static int read_otherend_details(struct 
   40.13  		return err;
   40.14  	}
   40.15  	if (strlen(xendev->otherend) == 0 ||
   40.16 -	    !xenbus_exists(NULL, xendev->otherend, "")) {
   40.17 +	    !xenbus_exists(XBT_NULL, xendev->otherend, "")) {
   40.18  		xenbus_dev_fatal(xendev, -ENOENT, "missing other end from %s",
   40.19  				 xendev->nodename);
   40.20  		kfree(xendev->otherend);
   40.21 @@ -200,14 +200,14 @@ static int backend_bus_id(char bus_id[BU
   40.22  
   40.23  	devid = strrchr(nodename, '/') + 1;
   40.24  
   40.25 -	err = xenbus_gather(NULL, nodename, "frontend-id", "%i", &domid,
   40.26 +	err = xenbus_gather(XBT_NULL, nodename, "frontend-id", "%i", &domid,
   40.27  			    "frontend", NULL, &frontend,
   40.28  			    NULL);
   40.29  	if (err)
   40.30  		return err;
   40.31  	if (strlen(frontend) == 0)
   40.32  		err = -ERANGE;
   40.33 -	if (!err && !xenbus_exists(NULL, frontend, ""))
   40.34 +	if (!err && !xenbus_exists(XBT_NULL, frontend, ""))
   40.35  		err = -ENOENT;
   40.36  
   40.37  	kfree(frontend);
   40.38 @@ -373,7 +373,7 @@ static int xenbus_dev_probe(struct devic
   40.39  	return 0;
   40.40  fail:
   40.41  	xenbus_dev_error(dev, err, "xenbus_dev_probe on %s", dev->nodename);
   40.42 -	xenbus_switch_state(dev, NULL, XenbusStateClosed);
   40.43 +	xenbus_switch_state(dev, XBT_NULL, XenbusStateClosed);
   40.44  	return -ENODEV;
   40.45  	
   40.46  }
   40.47 @@ -391,7 +391,7 @@ static int xenbus_dev_remove(struct devi
   40.48  	if (drv->remove)
   40.49  		drv->remove(dev);
   40.50  
   40.51 -	xenbus_switch_state(dev, NULL, XenbusStateClosed);
   40.52 +	xenbus_switch_state(dev, XBT_NULL, XenbusStateClosed);
   40.53  	return 0;
   40.54  }
   40.55  
   40.56 @@ -470,12 +470,17 @@ static int cleanup_dev(struct device *de
   40.57  
   40.58  	DPRINTK("%s", info->nodename);
   40.59  
   40.60 -	if (!strncmp(xendev->nodename, info->nodename, len)) {
   40.61 -		info->dev = xendev;
   40.62 -		get_device(dev);
   40.63 -		return 1;
   40.64 -	}
   40.65 -	return 0;
   40.66 +	/* Match the info->nodename path, or any subdirectory of that path. */
   40.67 +	if (strncmp(xendev->nodename, info->nodename, len))
   40.68 +		return 0;
   40.69 +
   40.70 +	/* If the node name is longer, ensure it really is a subdirectory. */
   40.71 +	if ((strlen(xendev->nodename) > len) && (xendev->nodename[len] != '/'))
   40.72 +		return 0;
   40.73 +
   40.74 +	info->dev = xendev;
   40.75 +	get_device(dev);
   40.76 +	return 1;
   40.77  }
   40.78  
   40.79  static void xenbus_cleanup_devices(const char *path, struct bus_type *bus)
   40.80 @@ -542,14 +547,6 @@ static int xenbus_probe_node(struct xen_
   40.81  			     const char *type,
   40.82  			     const char *nodename)
   40.83  {
   40.84 -#define CHECK_FAIL				\
   40.85 -	do {					\
   40.86 -		if (err)			\
   40.87 -			goto fail;		\
   40.88 -	}					\
   40.89 -	while (0)				\
   40.90 -
   40.91 -
   40.92  	int err;
   40.93  	struct xenbus_device *xendev;
   40.94  	size_t stringlen;
   40.95 @@ -584,19 +581,18 @@ static int xenbus_probe_node(struct xen_
   40.96  	xendev->dev.release = xenbus_dev_release;
   40.97  
   40.98  	err = bus->get_bus_id(xendev->dev.bus_id, xendev->nodename);
   40.99 -	CHECK_FAIL;
  40.100 +	if (err)
  40.101 +		goto fail;
  40.102  
  40.103  	/* Register with generic device framework. */
  40.104  	err = device_register(&xendev->dev);
  40.105 -	CHECK_FAIL;
  40.106 +	if (err)
  40.107 +		goto fail;
  40.108  
  40.109  	device_create_file(&xendev->dev, &dev_attr_nodename);
  40.110  	device_create_file(&xendev->dev, &dev_attr_devtype);
  40.111  
  40.112  	return 0;
  40.113 -
  40.114 -#undef CHECK_FAIL
  40.115 -
  40.116  fail:
  40.117  	xenbus_dev_free(xendev);
  40.118  	return err;
  40.119 @@ -652,7 +648,7 @@ static int xenbus_probe_backend(const ch
  40.120  	if (!nodename)
  40.121  		return -ENOMEM;
  40.122  
  40.123 -	dir = xenbus_directory(NULL, nodename, "", &dir_n);
  40.124 +	dir = xenbus_directory(XBT_NULL, nodename, "", &dir_n);
  40.125  	if (IS_ERR(dir)) {
  40.126  		kfree(nodename);
  40.127  		return PTR_ERR(dir);
  40.128 @@ -675,7 +671,7 @@ static int xenbus_probe_device_type(stru
  40.129  	unsigned int dir_n = 0;
  40.130  	int i;
  40.131  
  40.132 -	dir = xenbus_directory(NULL, bus->root, type, &dir_n);
  40.133 +	dir = xenbus_directory(XBT_NULL, bus->root, type, &dir_n);
  40.134  	if (IS_ERR(dir))
  40.135  		return PTR_ERR(dir);
  40.136  
  40.137 @@ -694,7 +690,7 @@ static int xenbus_probe_devices(struct x
  40.138  	char **dir;
  40.139  	unsigned int i, dir_n;
  40.140  
  40.141 -	dir = xenbus_directory(NULL, bus->root, "", &dir_n);
  40.142 +	dir = xenbus_directory(XBT_NULL, bus->root, "", &dir_n);
  40.143  	if (IS_ERR(dir))
  40.144  		return PTR_ERR(dir);
  40.145  
  40.146 @@ -740,7 +736,7 @@ static void dev_changed(const char *node
  40.147  	if (char_count(node, '/') < 2)
  40.148   		return;
  40.149  
  40.150 -	exists = xenbus_exists(NULL, node, "");
  40.151 +	exists = xenbus_exists(XBT_NULL, node, "");
  40.152  	if (!exists) {
  40.153  		xenbus_cleanup_devices(node, &bus->bus);
  40.154  		return;
    41.1 --- a/linux-2.6-xen-sparse/drivers/xen/xenbus/xenbus_xs.c	Fri Jan 13 10:38:44 2006 -0600
    41.2 +++ b/linux-2.6-xen-sparse/drivers/xen/xenbus/xenbus_xs.c	Fri Jan 13 14:12:24 2006 -0600
    41.3 @@ -190,7 +190,7 @@ void *xenbus_dev_request_and_reply(struc
    41.4  }
    41.5  
    41.6  /* Send message to xs, get kmalloc'ed reply.  ERR_PTR() on error. */
    41.7 -static void *xs_talkv(struct xenbus_transaction *t,
    41.8 +static void *xs_talkv(xenbus_transaction_t t,
    41.9  		      enum xsd_sockmsg_type type,
   41.10  		      const struct kvec *iovec,
   41.11  		      unsigned int num_vecs,
   41.12 @@ -201,7 +201,7 @@ static void *xs_talkv(struct xenbus_tran
   41.13  	unsigned int i;
   41.14  	int err;
   41.15  
   41.16 -	msg.tx_id = (u32)(unsigned long)t;
   41.17 +	msg.tx_id = t;
   41.18  	msg.req_id = 0;
   41.19  	msg.type = type;
   41.20  	msg.len = 0;
   41.21 @@ -242,7 +242,7 @@ static void *xs_talkv(struct xenbus_tran
   41.22  }
   41.23  
   41.24  /* Simplified version of xs_talkv: single message. */
   41.25 -static void *xs_single(struct xenbus_transaction *t,
   41.26 +static void *xs_single(xenbus_transaction_t t,
   41.27  		       enum xsd_sockmsg_type type,
   41.28  		       const char *string,
   41.29  		       unsigned int *len)
   41.30 @@ -309,7 +309,7 @@ static char **split(char *strings, unsig
   41.31  	return ret;
   41.32  }
   41.33  
   41.34 -char **xenbus_directory(struct xenbus_transaction *t,
   41.35 +char **xenbus_directory(xenbus_transaction_t t,
   41.36  			const char *dir, const char *node, unsigned int *num)
   41.37  {
   41.38  	char *strings, *path;
   41.39 @@ -329,7 +329,7 @@ char **xenbus_directory(struct xenbus_tr
   41.40  EXPORT_SYMBOL(xenbus_directory);
   41.41  
   41.42  /* Check if a path exists. Return 1 if it does. */
   41.43 -int xenbus_exists(struct xenbus_transaction *t,
   41.44 +int xenbus_exists(xenbus_transaction_t t,
   41.45  		  const char *dir, const char *node)
   41.46  {
   41.47  	char **d;
   41.48 @@ -347,7 +347,7 @@ EXPORT_SYMBOL(xenbus_exists);
   41.49   * Returns a kmalloced value: call free() on it after use.
   41.50   * len indicates length in bytes.
   41.51   */
   41.52 -void *xenbus_read(struct xenbus_transaction *t,
   41.53 +void *xenbus_read(xenbus_transaction_t t,
   41.54  		  const char *dir, const char *node, unsigned int *len)
   41.55  {
   41.56  	char *path;
   41.57 @@ -366,7 +366,7 @@ EXPORT_SYMBOL(xenbus_read);
   41.58  /* Write the value of a single file.
   41.59   * Returns -err on failure.
   41.60   */
   41.61 -int xenbus_write(struct xenbus_transaction *t,
   41.62 +int xenbus_write(xenbus_transaction_t t,
   41.63  		 const char *dir, const char *node, const char *string)
   41.64  {
   41.65  	const char *path;
   41.66 @@ -389,7 +389,7 @@ int xenbus_write(struct xenbus_transacti
   41.67  EXPORT_SYMBOL(xenbus_write);
   41.68  
   41.69  /* Create a new directory. */
   41.70 -int xenbus_mkdir(struct xenbus_transaction *t,
   41.71 +int xenbus_mkdir(xenbus_transaction_t t,
   41.72  		 const char *dir, const char *node)
   41.73  {
   41.74  	char *path;
   41.75 @@ -406,7 +406,7 @@ int xenbus_mkdir(struct xenbus_transacti
   41.76  EXPORT_SYMBOL(xenbus_mkdir);
   41.77  
   41.78  /* Destroy a file or directory (directories must be empty). */
   41.79 -int xenbus_rm(struct xenbus_transaction *t, const char *dir, const char *node)
   41.80 +int xenbus_rm(xenbus_transaction_t t, const char *dir, const char *node)
   41.81  {
   41.82  	char *path;
   41.83  	int ret;
   41.84 @@ -424,30 +424,28 @@ EXPORT_SYMBOL(xenbus_rm);
   41.85  /* Start a transaction: changes by others will not be seen during this
   41.86   * transaction, and changes will not be visible to others until end.
   41.87   */
   41.88 -struct xenbus_transaction *xenbus_transaction_start(void)
   41.89 +int xenbus_transaction_start(xenbus_transaction_t *t)
   41.90  {
   41.91  	char *id_str;
   41.92 -	unsigned long id;
   41.93  
   41.94  	down_read(&xs_state.suspend_mutex);
   41.95  
   41.96 -	id_str = xs_single(NULL, XS_TRANSACTION_START, "", NULL);
   41.97 +	id_str = xs_single(XBT_NULL, XS_TRANSACTION_START, "", NULL);
   41.98  	if (IS_ERR(id_str)) {
   41.99  		up_read(&xs_state.suspend_mutex);
  41.100 -		return (struct xenbus_transaction *)id_str;
  41.101 +		return PTR_ERR(id_str);
  41.102  	}
  41.103  
  41.104 -	id = simple_strtoul(id_str, NULL, 0);
  41.105 +	*t = simple_strtoul(id_str, NULL, 0);
  41.106  	kfree(id_str);
  41.107 -
  41.108 -	return (struct xenbus_transaction *)id;
  41.109 +	return 0;
  41.110  }
  41.111  EXPORT_SYMBOL(xenbus_transaction_start);
  41.112  
  41.113  /* End a transaction.
  41.114   * If abandon is true, transaction is discarded instead of committed.
  41.115   */
  41.116 -int xenbus_transaction_end(struct xenbus_transaction *t, int abort)
  41.117 +int xenbus_transaction_end(xenbus_transaction_t t, int abort)
  41.118  {
  41.119  	char abortstr[2];
  41.120  	int err;
  41.121 @@ -466,7 +464,7 @@ int xenbus_transaction_end(struct xenbus
  41.122  EXPORT_SYMBOL(xenbus_transaction_end);
  41.123  
  41.124  /* Single read and scanf: returns -errno or num scanned. */
  41.125 -int xenbus_scanf(struct xenbus_transaction *t,
  41.126 +int xenbus_scanf(xenbus_transaction_t t,
  41.127  		 const char *dir, const char *node, const char *fmt, ...)
  41.128  {
  41.129  	va_list ap;
  41.130 @@ -489,7 +487,7 @@ int xenbus_scanf(struct xenbus_transacti
  41.131  EXPORT_SYMBOL(xenbus_scanf);
  41.132  
  41.133  /* Single printf and write: returns -errno or 0. */
  41.134 -int xenbus_printf(struct xenbus_transaction *t,
  41.135 +int xenbus_printf(xenbus_transaction_t t,
  41.136  		  const char *dir, const char *node, const char *fmt, ...)
  41.137  {
  41.138  	va_list ap;
  41.139 @@ -515,7 +513,7 @@ int xenbus_printf(struct xenbus_transact
  41.140  EXPORT_SYMBOL(xenbus_printf);
  41.141  
  41.142  /* Takes tuples of names, scanf-style args, and void **, NULL terminated. */
  41.143 -int xenbus_gather(struct xenbus_transaction *t, const char *dir, ...)
  41.144 +int xenbus_gather(xenbus_transaction_t t, const char *dir, ...)
  41.145  {
  41.146  	va_list ap;
  41.147  	const char *name;
  41.148 @@ -553,7 +551,7 @@ static int xs_watch(const char *path, co
  41.149  	iov[1].iov_base = (void *)token;
  41.150  	iov[1].iov_len = strlen(token) + 1;
  41.151  
  41.152 -	return xs_error(xs_talkv(NULL, XS_WATCH, iov,
  41.153 +	return xs_error(xs_talkv(XBT_NULL, XS_WATCH, iov,
  41.154  				 ARRAY_SIZE(iov), NULL));
  41.155  }
  41.156  
  41.157 @@ -566,7 +564,7 @@ static int xs_unwatch(const char *path, 
  41.158  	iov[1].iov_base = (char *)token;
  41.159  	iov[1].iov_len = strlen(token) + 1;
  41.160  
  41.161 -	return xs_error(xs_talkv(NULL, XS_UNWATCH, iov,
  41.162 +	return xs_error(xs_talkv(XBT_NULL, XS_UNWATCH, iov,
  41.163  				 ARRAY_SIZE(iov), NULL));
  41.164  }
  41.165  
    42.1 --- a/linux-2.6-xen-sparse/include/asm-xen/asm-i386/hypercall.h	Fri Jan 13 10:38:44 2006 -0600
    42.2 +++ b/linux-2.6-xen-sparse/include/asm-xen/asm-i386/hypercall.h	Fri Jan 13 14:12:24 2006 -0600
    42.3 @@ -32,6 +32,7 @@
    42.4  
    42.5  #include <asm-xen/xen-public/xen.h>
    42.6  #include <asm-xen/xen-public/sched.h>
    42.7 +#include <asm-xen/xen-public/nmi.h>
    42.8  
    42.9  #define _hypercall0(type, name)			\
   42.10  ({						\
   42.11 @@ -300,6 +301,14 @@ HYPERVISOR_suspend(
   42.12  			   SHUTDOWN_suspend, srec);
   42.13  }
   42.14  
   42.15 +static inline int
   42.16 +HYPERVISOR_nmi_op(
   42.17 +	unsigned long op,
   42.18 +	unsigned long arg)
   42.19 +{
   42.20 +	return _hypercall2(int, nmi_op, op, arg);
   42.21 +}
   42.22 +
   42.23  #endif /* __HYPERCALL_H__ */
   42.24  
   42.25  /*
    43.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
    43.2 +++ b/linux-2.6-xen-sparse/include/asm-xen/asm-i386/mach-xen/mach_traps.h	Fri Jan 13 14:12:24 2006 -0600
    43.3 @@ -0,0 +1,33 @@
    43.4 +/*
    43.5 + *  include/asm-xen/asm-i386/mach-xen/mach_traps.h
    43.6 + *
    43.7 + *  Machine specific NMI handling for Xen
    43.8 + */
    43.9 +#ifndef _MACH_TRAPS_H
   43.10 +#define _MACH_TRAPS_H
   43.11 +
   43.12 +#include <linux/bitops.h>
   43.13 +#include <asm-xen/xen-public/nmi.h>
   43.14 +
   43.15 +static inline void clear_mem_error(unsigned char reason) {}
   43.16 +static inline void clear_io_check_error(unsigned char reason) {}
   43.17 +
   43.18 +static inline unsigned char get_nmi_reason(void)
   43.19 +{
   43.20 +	shared_info_t *s = HYPERVISOR_shared_info;
   43.21 +	unsigned char reason = 0;
   43.22 +
   43.23 +	/* construct a value which looks like it came from
   43.24 +	 * port 0x61.
   43.25 +	 */
   43.26 +	if (test_bit(_XEN_NMIREASON_io_error, &s->arch.nmi_reason))
   43.27 +		reason |= 0x40;
   43.28 +	if (test_bit(_XEN_NMIREASON_parity_error, &s->arch.nmi_reason))
   43.29 +		reason |= 0x80;
   43.30 +
   43.31 +        return reason;
   43.32 +}
   43.33 +
   43.34 +static inline void reassert_nmi(void) {}
   43.35 +
   43.36 +#endif /* !_MACH_TRAPS_H */
    44.1 --- a/linux-2.6-xen-sparse/include/asm-xen/asm-i386/mach-xen/setup_arch_post.h	Fri Jan 13 10:38:44 2006 -0600
    44.2 +++ b/linux-2.6-xen-sparse/include/asm-xen/asm-i386/mach-xen/setup_arch_post.h	Fri Jan 13 14:12:24 2006 -0600
    44.3 @@ -29,6 +29,7 @@ void __init machine_specific_modify_cpu_
    44.4  
    44.5  extern void hypervisor_callback(void);
    44.6  extern void failsafe_callback(void);
    44.7 +extern void nmi(void);
    44.8  
    44.9  static void __init machine_specific_arch_setup(void)
   44.10  {
   44.11 @@ -36,5 +37,7 @@ static void __init machine_specific_arch
   44.12  	    __KERNEL_CS, (unsigned long)hypervisor_callback,
   44.13  	    __KERNEL_CS, (unsigned long)failsafe_callback);
   44.14  
   44.15 +	HYPERVISOR_nmi_op(XENNMI_register_callback, (unsigned long)&nmi);
   44.16 +
   44.17  	machine_specific_modify_cpu_capabilities(&boot_cpu_data);
   44.18  }
    45.1 --- a/linux-2.6-xen-sparse/include/asm-xen/asm-x86_64/hypercall.h	Fri Jan 13 10:38:44 2006 -0600
    45.2 +++ b/linux-2.6-xen-sparse/include/asm-xen/asm-x86_64/hypercall.h	Fri Jan 13 14:12:24 2006 -0600
    45.3 @@ -287,9 +287,9 @@ HYPERVISOR_vcpu_op(
    45.4  }
    45.5  
    45.6  static inline int
    45.7 -HYPERVISOR_switch_to_user(void)
    45.8 +HYPERVISOR_iret(void)
    45.9  {
   45.10 -	return _hypercall0(int, switch_to_user);
   45.11 +	return _hypercall0(int, iret);
   45.12  }
   45.13  
   45.14  static inline int
   45.15 @@ -307,6 +307,14 @@ HYPERVISOR_suspend(
   45.16  			   SHUTDOWN_suspend, srec);
   45.17  }
   45.18  
   45.19 +static inline int
   45.20 +HYPERVISOR_nmi_op(
   45.21 +	unsigned long op,
   45.22 +	unsigned long arg)
   45.23 +{
   45.24 +	return _hypercall2(int, nmi_op, op, arg);
   45.25 +}
   45.26 +
   45.27  #endif /* __HYPERCALL_H__ */
   45.28  
   45.29  /*
    46.1 --- a/linux-2.6-xen-sparse/include/asm-xen/asm-x86_64/mach-xen/setup_arch_post.h	Fri Jan 13 10:38:44 2006 -0600
    46.2 +++ b/linux-2.6-xen-sparse/include/asm-xen/asm-x86_64/mach-xen/setup_arch_post.h	Fri Jan 13 14:12:24 2006 -0600
    46.3 @@ -35,6 +35,7 @@ void __init machine_specific_modify_cpu_
    46.4  
    46.5  extern void hypervisor_callback(void);
    46.6  extern void failsafe_callback(void);
    46.7 +extern void nmi(void);
    46.8  
    46.9  static void __init machine_specific_arch_setup(void)
   46.10  {
   46.11 @@ -43,5 +44,9 @@ static void __init machine_specific_arch
   46.12                  (unsigned long) failsafe_callback,
   46.13                  (unsigned long) system_call);
   46.14  
   46.15 +#ifdef CONFIG_X86_LOCAL_APIC
   46.16 +	HYPERVISOR_nmi_op(XENNMI_register_callback, (unsigned long)&nmi);
   46.17 +#endif
   46.18 +
   46.19  	machine_specific_modify_cpu_capabilities(&boot_cpu_data);
   46.20  }
    47.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
    47.2 +++ b/linux-2.6-xen-sparse/include/asm-xen/asm-x86_64/nmi.h	Fri Jan 13 14:12:24 2006 -0600
    47.3 @@ -0,0 +1,75 @@
    47.4 +/*
    47.5 + *  linux/include/asm-i386/nmi.h
    47.6 + */
    47.7 +#ifndef ASM_NMI_H
    47.8 +#define ASM_NMI_H
    47.9 +
   47.10 +#include <linux/pm.h>
   47.11 +
   47.12 +#include <asm-xen/xen-public/nmi.h>
   47.13 +
   47.14 +struct pt_regs;
   47.15 + 
   47.16 +typedef int (*nmi_callback_t)(struct pt_regs * regs, int cpu);
   47.17 + 
   47.18 +/** 
   47.19 + * set_nmi_callback
   47.20 + *
   47.21 + * Set a handler for an NMI. Only one handler may be
   47.22 + * set. Return 1 if the NMI was handled.
   47.23 + */
   47.24 +void set_nmi_callback(nmi_callback_t callback);
   47.25 + 
   47.26 +/** 
   47.27 + * unset_nmi_callback
   47.28 + *
   47.29 + * Remove the handler previously set.
   47.30 + */
   47.31 +void unset_nmi_callback(void);
   47.32 + 
   47.33 +#ifdef CONFIG_PM
   47.34 + 
   47.35 +/** Replace the PM callback routine for NMI. */
   47.36 +struct pm_dev * set_nmi_pm_callback(pm_callback callback);
   47.37 +
   47.38 +/** Unset the PM callback routine back to the default. */
   47.39 +void unset_nmi_pm_callback(struct pm_dev * dev);
   47.40 +
   47.41 +#else
   47.42 +
   47.43 +static inline struct pm_dev * set_nmi_pm_callback(pm_callback callback)
   47.44 +{
   47.45 +	return 0;
   47.46 +} 
   47.47 + 
   47.48 +static inline void unset_nmi_pm_callback(struct pm_dev * dev)
   47.49 +{
   47.50 +}
   47.51 +
   47.52 +#endif /* CONFIG_PM */
   47.53 + 
   47.54 +extern void default_do_nmi(struct pt_regs *);
   47.55 +extern void die_nmi(char *str, struct pt_regs *regs);
   47.56 +
   47.57 +static inline unsigned char get_nmi_reason(void)
   47.58 +{
   47.59 +        shared_info_t *s = HYPERVISOR_shared_info;
   47.60 +        unsigned char reason = 0;
   47.61 +
   47.62 +        /* construct a value which looks like it came from
   47.63 +         * port 0x61.
   47.64 +         */
   47.65 +        if (test_bit(_XEN_NMIREASON_io_error, &s->arch.nmi_reason))
   47.66 +                reason |= 0x40;
   47.67 +        if (test_bit(_XEN_NMIREASON_parity_error, &s->arch.nmi_reason))
   47.68 +                reason |= 0x80;
   47.69 +
   47.70 +        return reason;
   47.71 +}
   47.72 +
   47.73 +extern int panic_on_timeout;
   47.74 +extern int unknown_nmi_panic;
   47.75 +
   47.76 +extern int check_nmi_watchdog(void);
   47.77 + 
   47.78 +#endif /* ASM_NMI_H */
    48.1 --- a/linux-2.6-xen-sparse/include/asm-xen/xenbus.h	Fri Jan 13 10:38:44 2006 -0600
    48.2 +++ b/linux-2.6-xen-sparse/include/asm-xen/xenbus.h	Fri Jan 13 14:12:24 2006 -0600
    48.3 @@ -37,6 +37,8 @@
    48.4  #include <asm-xen/xen-public/io/xenbus.h>
    48.5  #include <asm-xen/xen-public/io/xs_wire.h>
    48.6  
    48.7 +#define XBT_NULL 0
    48.8 +
    48.9  /* Register callback to watch this node. */
   48.10  struct xenbus_watch
   48.11  {
   48.12 @@ -100,35 +102,35 @@ int xenbus_register_frontend(struct xenb
   48.13  int xenbus_register_backend(struct xenbus_driver *drv);
   48.14  void xenbus_unregister_driver(struct xenbus_driver *drv);
   48.15  
   48.16 -struct xenbus_transaction;
   48.17 +typedef u32 xenbus_transaction_t;
   48.18  
   48.19 -char **xenbus_directory(struct xenbus_transaction *t,
   48.20 +char **xenbus_directory(xenbus_transaction_t t,
   48.21  			const char *dir, const char *node, unsigned int *num);
   48.22 -void *xenbus_read(struct xenbus_transaction *t,
   48.23 +void *xenbus_read(xenbus_transaction_t t,
   48.24  		  const char *dir, const char *node, unsigned int *len);
   48.25 -int xenbus_write(struct xenbus_transaction *t,
   48.26 +int xenbus_write(xenbus_transaction_t t,
   48.27  		 const char *dir, const char *node, const char *string);
   48.28 -int xenbus_mkdir(struct xenbus_transaction *t,
   48.29 +int xenbus_mkdir(xenbus_transaction_t t,
   48.30  		 const char *dir, const char *node);
   48.31 -int xenbus_exists(struct xenbus_transaction *t,
   48.32 +int xenbus_exists(xenbus_transaction_t t,
   48.33  		  const char *dir, const char *node);
   48.34 -int xenbus_rm(struct xenbus_transaction *t, const char *dir, const char *node);
   48.35 -struct xenbus_transaction *xenbus_transaction_start(void);
   48.36 -int xenbus_transaction_end(struct xenbus_transaction *t, int abort);
   48.37 +int xenbus_rm(xenbus_transaction_t t, const char *dir, const char *node);
   48.38 +int xenbus_transaction_start(xenbus_transaction_t *t);
   48.39 +int xenbus_transaction_end(xenbus_transaction_t t, int abort);
   48.40  
   48.41  /* Single read and scanf: returns -errno or num scanned if > 0. */
   48.42 -int xenbus_scanf(struct xenbus_transaction *t,
   48.43 +int xenbus_scanf(xenbus_transaction_t t,
   48.44  		 const char *dir, const char *node, const char *fmt, ...)
   48.45  	__attribute__((format(scanf, 4, 5)));
   48.46  
   48.47  /* Single printf and write: returns -errno or 0. */
   48.48 -int xenbus_printf(struct xenbus_transaction *t,
   48.49 +int xenbus_printf(xenbus_transaction_t t,
   48.50  		  const char *dir, const char *node, const char *fmt, ...)
   48.51  	__attribute__((format(printf, 4, 5)));
   48.52  
   48.53  /* Generic read function: NULL-terminated triples of name,
   48.54   * sprintf-style type string, and pointer. Returns 0 or errno.*/
   48.55 -int xenbus_gather(struct xenbus_transaction *t, const char *dir, ...);
   48.56 +int xenbus_gather(xenbus_transaction_t t, const char *dir, ...);
   48.57  
   48.58  /* notifer routines for when the xenstore comes up */
   48.59  int register_xenstore_notifier(struct notifier_block *nb);
   48.60 @@ -194,7 +196,7 @@ int xenbus_watch_path2(struct xenbus_dev
   48.61   * XenbusStateClosing, and the error will be saved in the store.
   48.62   */
   48.63  int xenbus_switch_state(struct xenbus_device *dev,
   48.64 -			struct xenbus_transaction *xbt,
   48.65 +			xenbus_transaction_t xbt,
   48.66  			XenbusState new_state);
   48.67  
   48.68  
    49.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
    49.2 +++ b/patches/linux-2.6.12/i386-mach-io-check-nmi.patch	Fri Jan 13 14:12:24 2006 -0600
    49.3 @@ -0,0 +1,43 @@
    49.4 +--- ref-linux-2.6.12/arch/i386/kernel/traps.c	2005-12-19 09:23:44.000000000 +0000
    49.5 ++++ linux-2.6.12-xen0/arch/i386/kernel/traps.c	2006-01-05 15:51:52.000000000 +0000
    49.6 +@@ -521,18 +521,11 @@
    49.7 + 
    49.8 + static void io_check_error(unsigned char reason, struct pt_regs * regs)
    49.9 + {
   49.10 +-	unsigned long i;
   49.11 +-
   49.12 + 	printk("NMI: IOCK error (debug interrupt?)\n");
   49.13 + 	show_registers(regs);
   49.14 + 
   49.15 + 	/* Re-enable the IOCK line, wait for a few seconds */
   49.16 +-	reason = (reason & 0xf) | 8;
   49.17 +-	outb(reason, 0x61);
   49.18 +-	i = 2000;
   49.19 +-	while (--i) udelay(1000);
   49.20 +-	reason &= ~8;
   49.21 +-	outb(reason, 0x61);
   49.22 ++	clear_io_check_error(reason);
   49.23 + }
   49.24 + 
   49.25 + static void unknown_nmi_error(unsigned char reason, struct pt_regs * regs)
   49.26 +--- ref-linux-2.6.12/include/asm-i386/mach-default/mach_traps.h	2005-06-17 20:48:29.000000000 +0100
   49.27 ++++ linux-2.6.12-xen0/include/asm-i386/mach-default/mach_traps.h	2006-01-05 15:52:33.000000000 +0000
   49.28 +@@ -15,6 +15,18 @@
   49.29 + 	outb(reason, 0x61);
   49.30 + }
   49.31 + 
   49.32 ++static inline void clear_io_check_error(unsigned char reason)
   49.33 ++{
   49.34 ++	unsigned long i;
   49.35 ++
   49.36 ++	reason = (reason & 0xf) | 8;
   49.37 ++	outb(reason, 0x61);
   49.38 ++	i = 2000;
   49.39 ++	while (--i) udelay(1000);
   49.40 ++	reason &= ~8;
   49.41 ++	outb(reason, 0x61);
   49.42 ++}
   49.43 ++
   49.44 + static inline unsigned char get_nmi_reason(void)
   49.45 + {
   49.46 + 	return inb(0x61);
    50.1 --- a/tools/Makefile	Fri Jan 13 10:38:44 2006 -0600
    50.2 +++ b/tools/Makefile	Fri Jan 13 14:12:24 2006 -0600
    50.3 @@ -12,6 +12,7 @@ SUBDIRS += firmware
    50.4  SUBDIRS += security
    50.5  SUBDIRS += console
    50.6  SUBDIRS += xenmon
    50.7 +SUBDIRS += guest-headers
    50.8  ifeq ($(VTPM_TOOLS),y)
    50.9  SUBDIRS += vtpm_manager
   50.10  SUBDIRS += vtpm
    51.1 --- a/tools/Rules.mk	Fri Jan 13 10:38:44 2006 -0600
    51.2 +++ b/tools/Rules.mk	Fri Jan 13 14:12:24 2006 -0600
    51.3 @@ -35,6 +35,8 @@ mk-symlinks: LINUX_ROOT=$(XEN_ROOT)/linu
    51.4  mk-symlinks:
    51.5  	mkdir -p xen
    51.6  	( cd xen && ln -sf ../$(XEN_ROOT)/xen/include/public/*.h . )
    51.7 +	mkdir -p xen/hvm
    51.8 +	( cd xen/hvm && ln -sf ../../$(XEN_ROOT)/xen/include/public/hvm/*.h . )
    51.9  	mkdir -p xen/io
   51.10  	( cd xen/io && ln -sf ../../$(XEN_ROOT)/xen/include/public/io/*.h . )
   51.11  	mkdir -p xen/linux
    52.1 --- a/tools/console/client/main.c	Fri Jan 13 10:38:44 2006 -0600
    52.2 +++ b/tools/console/client/main.c	Fri Jan 13 14:12:24 2006 -0600
    52.3 @@ -220,7 +220,7 @@ int main(int argc, char **argv)
    52.4  	if (path == NULL)
    52.5  		err(ENOMEM, "realloc");
    52.6  	strcat(path, "/console/tty");
    52.7 -	str_pty = xs_read(xs, NULL, path, &len);
    52.8 +	str_pty = xs_read(xs, XBT_NULL, path, &len);
    52.9  
   52.10  	/* FIXME consoled currently does not assume domain-0 doesn't have a
   52.11  	   console which is good when we break domain-0 up.  To keep us
   52.12 @@ -245,7 +245,7 @@ int main(int argc, char **argv)
   52.13  		struct timeval tv = { 0, 500 };
   52.14  		select(0, NULL, NULL, NULL, &tv); /* pause briefly */
   52.15  
   52.16 -		str_pty = xs_read(xs, NULL, path, &len);
   52.17 +		str_pty = xs_read(xs, XBT_NULL, path, &len);
   52.18  	}
   52.19  
   52.20  	if (str_pty == NULL) {
    53.1 --- a/tools/console/daemon/io.c	Fri Jan 13 10:38:44 2006 -0600
    53.2 +++ b/tools/console/daemon/io.c	Fri Jan 13 14:12:24 2006 -0600
    53.3 @@ -174,7 +174,7 @@ static int domain_create_tty(struct doma
    53.4  		success = asprintf(&path, "%s/limit", dom->conspath) != -1;
    53.5  		if (!success)
    53.6  			goto out;
    53.7 -		data = xs_read(xs, NULL, path, &len);
    53.8 +		data = xs_read(xs, XBT_NULL, path, &len);
    53.9  		if (data) {
   53.10  			dom->buffer.max_capacity = strtoul(data, 0, 0);
   53.11  			free(data);
   53.12 @@ -184,7 +184,7 @@ static int domain_create_tty(struct doma
   53.13  		success = asprintf(&path, "%s/tty", dom->conspath) != -1;
   53.14  		if (!success)
   53.15  			goto out;
   53.16 -		success = xs_write(xs, NULL, path, slave, strlen(slave));
   53.17 +		success = xs_write(xs, XBT_NULL, path, slave, strlen(slave));
   53.18  		free(path);
   53.19  		if (!success)
   53.20  			goto out;
   53.21 @@ -214,7 +214,7 @@ int xs_gather(struct xs_handle *xs, cons
   53.22  		char *p;
   53.23  
   53.24  		asprintf(&path, "%s/%s", dir, name);
   53.25 -		p = xs_read(xs, NULL, path, NULL);
   53.26 +		p = xs_read(xs, XBT_NULL, path, NULL);
   53.27  		free(path);
   53.28  		if (p == NULL) {
   53.29  			ret = ENOENT;
    54.1 --- a/tools/debugger/libxendebug/xendebug.c	Fri Jan 13 10:38:44 2006 -0600
    54.2 +++ b/tools/debugger/libxendebug/xendebug.c	Fri Jan 13 14:12:24 2006 -0600
    54.3 @@ -119,8 +119,8 @@ xendebug_get_context (int xc_handle, uin
    54.4  
    54.5      if ( !ctxt->valid[vcpu] )
    54.6      {
    54.7 -        if ( (rc = xc_domain_get_vcpu_context(xc_handle, domid, vcpu, 
    54.8 -                                              &ctxt->context[vcpu])) )
    54.9 +        if ( (rc = xc_vcpu_getcontext(xc_handle, domid, vcpu, 
   54.10 +                                      &ctxt->context[vcpu])) )
   54.11              return NULL;
   54.12  
   54.13          ctxt->valid[vcpu] = true;
   54.14 @@ -139,10 +139,10 @@ xendebug_set_context (int xc_handle, dom
   54.15          return -EINVAL;
   54.16  
   54.17      op.interface_version = DOM0_INTERFACE_VERSION;
   54.18 -    op.cmd = DOM0_SETDOMAININFO;
   54.19 -    op.u.setdomaininfo.domain = ctxt->domid;
   54.20 -    op.u.setdomaininfo.vcpu = vcpu;
   54.21 -    op.u.setdomaininfo.ctxt = &ctxt->context[vcpu];
   54.22 +    op.cmd = DOM0_SETVCPUCONTEXT;
   54.23 +    op.u.setvcpucontext.domain = ctxt->domid;
   54.24 +    op.u.setvcpucontext.vcpu = vcpu;
   54.25 +    op.u.setvcpucontext.ctxt = &ctxt->context[vcpu];
   54.26  
   54.27      if ( (rc = mlock(&ctxt->context[vcpu], sizeof(vcpu_guest_context_t))) )
   54.28          return rc;
    55.1 --- a/tools/examples/network-bridge	Fri Jan 13 10:38:44 2006 -0600
    55.2 +++ b/tools/examples/network-bridge	Fri Jan 13 14:12:24 2006 -0600
    55.3 @@ -68,48 +68,19 @@ pdev="p${netdev}"
    55.4  vdev="veth${vifnum}"
    55.5  vif0="vif0.${vifnum}"
    55.6  
    55.7 -legacy_mask_to_prefix() {
    55.8 -    mask=$1
    55.9 -    first=${mask%%.*}
   55.10 -    second=${mask#*.}
   55.11 -    third=${second#*.}
   55.12 -    fourth=${third#*.}
   55.13 -    second=${second%%.*}
   55.14 -    third=${third%%.*}
   55.15 -    declare -i INT FULLMASK BIT
   55.16 -    INT=$((((($first*256)+$second)*256+$third)*256+$fourth))
   55.17 -    FULLMASK=4294967295
   55.18 -    BIT=1
   55.19 -    for bit in `seq 32 -1 0`; do
   55.20 -	if test $FULLMASK -eq $INT; then PREFIX=$bit; return; fi
   55.21 -	FULLMASK=$(($FULLMASK-$BIT))
   55.22 -	BIT=$((BIT*2))
   55.23 -    done
   55.24 -    echo "ERROR converting netmask $mask to prefix"
   55.25 -    exit 1
   55.26 +get_ip_info() {
   55.27 +    addr_pfx=`ip addr show dev $1 | egrep '^ *inet' | sed -e 's/ *inet //' -e 's/ .*//'`
   55.28 +    gateway=`ip route show dev $1 | fgrep default | sed 's/default via //'`
   55.29  }
   55.30 -
   55.31 -parse_kernel_ip() {
   55.32 -    if egrep 'ip=[0-9]+\.[0-9]+\.[0-9]+\.[0-9]+:' /proc/cmdline; then
   55.33 -	kip=`sed -e 's!.*ip=\([0-9]\+\.[0-9]\+\.[0-9]\+\.[0-9]\+\):.*!\1!' /proc/cmdline`
   55.34 -	kmask=`sed -e 's!.*ip=[^:]*:[^:]*:[^:]*:\([^:]*\):.*!\1!' /proc/cmdline` 
   55.35 -	kgate=`sed -e 's!.*ip=[^:]*:[^:]*:\([^:]*\):.*!\1!' /proc/cmdline`
   55.36 -    fi
   55.37 -}
   55.38 -
   55.39 +    
   55.40  do_ifup() {
   55.41      if ! ifup $1 ; then
   55.42 -        if [ ${kip} ] ; then
   55.43 -            # use the addresses we grocked from /proc/cmdline
   55.44 -            if [ -z "${kmask}" ]; then 
   55.45 -                PREFIX=32
   55.46 -            else 
   55.47 -                legacy_mask_to_prefix ${kmask}
   55.48 -            fi
   55.49 +        if [ ${addr_pfx} ] ; then
   55.50 +            # use the info from get_ip_info()
   55.51              ip addr flush $1
   55.52 -            ip addr add ${kip}/${PREFIX} dev $1
   55.53 +            ip addr add ${addr_pfx} dev $1
   55.54              ip link set dev $1 up
   55.55 -            [ ${kgate} ] && ip route add default via ${kgate}
   55.56 +            [ ${gateway} ] && ip route add default via ${gateway}
   55.57          fi
   55.58      fi
   55.59  }
   55.60 @@ -171,7 +142,7 @@ transfer_routes () {
   55.61  #
   55.62  link_exists()
   55.63  {
   55.64 -    if ip link show "$1" >&/dev/null
   55.65 +    if ip link show "$1" >/dev/null 2>/dev/null
   55.66      then
   55.67          return 0
   55.68      else
   55.69 @@ -231,7 +202,7 @@ show_status () {
   55.70  }
   55.71  
   55.72  op_start () {
   55.73 -    if [ "${bridge}" == "null" ] ; then
   55.74 +    if [ "${bridge}" = "null" ] ; then
   55.75  	return
   55.76      fi
   55.77  
   55.78 @@ -259,9 +230,8 @@ using loopback.nloopbacks=<N> on the dom
   55.79  	preiftransfer ${netdev}
   55.80  	transfer_addrs ${netdev} ${vdev}
   55.81  	if ! ifdown ${netdev}; then
   55.82 -	    # If ifdown fails, take the IP details from the kernel command
   55.83 -	    # line.
   55.84 -	    parse_kernel_ip
   55.85 +	    # If ifdown fails, remember the IP details.
   55.86 +	    get_ip_info ${netdev}
   55.87  	    ip link set ${netdev} down
   55.88  	    ip addr flush ${netdev}
   55.89  	fi
   55.90 @@ -283,13 +253,13 @@ using loopback.nloopbacks=<N> on the dom
   55.91  	transfer_routes ${netdev} ${bridge}
   55.92      fi
   55.93  
   55.94 -    if [ ${antispoof} == 'yes' ] ; then
   55.95 +    if [ ${antispoof} = 'yes' ] ; then
   55.96  	antispoofing
   55.97      fi
   55.98  }
   55.99  
  55.100  op_stop () {
  55.101 -    if [ "${bridge}" == "null" ]; then
  55.102 +    if [ "${bridge}" = "null" ]; then
  55.103  	return
  55.104      fi
  55.105      if ! link_exists "$bridge"; then
  55.106 @@ -301,7 +271,7 @@ op_stop () {
  55.107  	mac=`ip link show ${netdev} | grep 'link\/ether' | sed -e 's/.*ether \(..:..:..:..:..:..\).*/\1/'`
  55.108  	transfer_addrs ${netdev} ${pdev}
  55.109  	if ! ifdown ${netdev}; then
  55.110 -	    parse_kernel_ip
  55.111 +	    get_ip_info ${netdev}
  55.112  	fi
  55.113  	ip link set ${netdev} down arp off
  55.114  	ip link set ${netdev} addr fe:ff:ff:ff:ff:ff
    56.1 --- a/tools/examples/xen-network-common.sh	Fri Jan 13 10:38:44 2006 -0600
    56.2 +++ b/tools/examples/xen-network-common.sh	Fri Jan 13 14:12:24 2006 -0600
    56.3 @@ -42,7 +42,7 @@ then
    56.4    {
    56.5      /sbin/ifup ${HWD_CONFIG_0} $1
    56.6    }
    56.7 -elif ! which ifup >&/dev/null
    56.8 +elif ! which ifup >/dev/null 2>/dev/null
    56.9  then
   56.10    if [ -e /etc/conf.d/net ]
   56.11    then
   56.12 @@ -59,9 +59,18 @@ then
   56.13        /etc/init.d/net.$1 stop
   56.14      }
   56.15    else
   56.16 -    logger -p "daemon.crit" -- \
   56.17 -      "You don't have ifup and don't seem to be running Gentoo either!"
   56.18 -    exit 1
   56.19 +    preiftransfer()
   56.20 +    {
   56.21 +      true
   56.22 +    }
   56.23 +    ifup()
   56.24 +    {
   56.25 +      false
   56.26 +    }
   56.27 +    ifdown()
   56.28 +    {
   56.29 +      false
   56.30 +    }
   56.31    fi
   56.32  else
   56.33    preiftransfer()
    57.1 --- a/tools/examples/xmexample.vmx	Fri Jan 13 10:38:44 2006 -0600
    57.2 +++ b/tools/examples/xmexample.vmx	Fri Jan 13 14:12:24 2006 -0600
    57.3 @@ -28,11 +28,14 @@ name = "ExampleVMXDomain"
    57.4  
    57.5  #-----------------------------------------------------------------------------
    57.6  # the number of cpus guest platform has, default=1
    57.7 -vcpus=1
    57.8 +#vcpus=1
    57.9  
   57.10  # enable/disalbe vmx guest ACPI, default=0 (disabled)
   57.11  #acpi=0
   57.12  
   57.13 +# enable/disalbe vmx guest APIC, default=0 (disabled)
   57.14 +#apic=0
   57.15 +
   57.16  # List of which CPUS this domain is allowed to use, default Xen picks
   57.17  #cpus = ""         # leave to Xen to pick
   57.18  #cpus = "0"        # all vcpus run on CPU0
    58.1 --- a/tools/firmware/vmxassist/acpi_madt.c	Fri Jan 13 10:38:44 2006 -0600
    58.2 +++ b/tools/firmware/vmxassist/acpi_madt.c	Fri Jan 13 14:12:24 2006 -0600
    58.3 @@ -17,34 +17,34 @@
    58.4   * this program; if not, write to the Free Software Foundation, Inc., 59 Temple
    58.5   * Place - Suite 330, Boston, MA 02111-1307 USA.
    58.6   */
    58.7 +
    58.8  #include "../acpi/acpi2_0.h"
    58.9  #include "../acpi/acpi_madt.h"
   58.10  
   58.11 +#include <xen/hvm/hvm_info_table.h>
   58.12 +
   58.13  #define NULL ((void*)0)
   58.14  
   58.15  extern int puts(const char *s);
   58.16  
   58.17 -#define HVM_INFO_PAGE	0x0009F000
   58.18 -#define HVM_INFO_OFFSET	0x00000800
   58.19 -
   58.20 -struct hvm_info_table {
   58.21 -	char     signature[8]; /* "HVM INFO" */
   58.22 -	uint32_t length;
   58.23 -	uint8_t  checksum;
   58.24 -	uint8_t  acpi_enabled;
   58.25 -	uint8_t  pad[2];
   58.26 -	uint32_t nr_vcpus;
   58.27 -};
   58.28 -
   58.29  static struct hvm_info_table *table = NULL;
   58.30  
   58.31 -static int
   58.32 -checksum_valid(uint8_t *ptr, int len)
   58.33 +static int validate_hvm_info(struct hvm_info_table *t)
   58.34  {
   58.35 -	uint8_t sum=0;
   58.36 +	char signature[] = "HVM INFO";
   58.37 +	uint8_t *ptr = (uint8_t *)t;
   58.38 +	uint8_t sum = 0;
   58.39  	int i;
   58.40  
   58.41 -	for (i = 0; i < len; i++)
   58.42 +	/* strncmp(t->signature, "HVM INFO", 8) */
   58.43 +	for (i = 0; i < 8; i++) {
   58.44 +		if (signature[i] != t->signature[i]) {
   58.45 +			puts("Bad hvm info signature\n");
   58.46 +			return 0;
   58.47 +		}
   58.48 +	}
   58.49 +
   58.50 +	for (i = 0; i < t->length; i++)
   58.51  		sum += ptr[i];
   58.52  
   58.53  	return (sum == 0);
   58.54 @@ -55,24 +55,15 @@ static struct hvm_info_table *
   58.55  get_hvm_info_table(void)
   58.56  {
   58.57  	struct hvm_info_table *t;
   58.58 -	char signature[] = "HVM INFO";
   58.59  	int i;
   58.60  
   58.61  	if (table != NULL)
   58.62  		return table;
   58.63  
   58.64 -	t = (struct hvm_info_table *)(HVM_INFO_PAGE + HVM_INFO_OFFSET);
   58.65 +	t = (struct hvm_info_table *)HVM_INFO_PADDR;
   58.66  
   58.67 -	/* strncmp(t->signature, "HVM INFO", 8) */
   58.68 -	for (i = 0; i < 8; i++) {
   58.69 -		if (signature[i] != t->signature[i]) {
   58.70 -			puts("Bad hvm info signature\n");
   58.71 -			return NULL;
   58.72 -		}
   58.73 -	}
   58.74 -
   58.75 -	if (!checksum_valid((uint8_t *)t, t->length)) {
   58.76 -		puts("Bad hvm info checksum\n");
   58.77 +	if (!validate_hvm_info(t)) {
   58.78 +		puts("Bad hvm info table\n");
   58.79  		return NULL;
   58.80  	}
   58.81  
   58.82 @@ -126,10 +117,10 @@ acpi_madt_get_madt(unsigned char *acpi_s
   58.83  	return madt;
   58.84  }
   58.85  
   58.86 -static void 
   58.87 +static void
   58.88  set_checksum(void *start, int checksum_offset, int len)
   58.89  {
   58.90 -	unsigned char sum = 0;  
   58.91 +	unsigned char sum = 0;
   58.92  	unsigned char *ptr;
   58.93  
   58.94  	ptr = start;
   58.95 @@ -141,9 +132,9 @@ set_checksum(void *start, int checksum_o
   58.96  	ptr[checksum_offset] = -sum;
   58.97  }
   58.98  
   58.99 -static int 
  58.100 +static int
  58.101  acpi_madt_set_local_apics(
  58.102 -	int nr_vcpu, 
  58.103 +	int nr_vcpu,
  58.104  	ACPI_MULTIPLE_APIC_DESCRIPTION_TABLE *madt)
  58.105  {
  58.106  	int i;
  58.107 @@ -156,14 +147,14 @@ acpi_madt_set_local_apics(
  58.108  		madt->LocalApic[i].Length          = sizeof (ACPI_LOCAL_APIC_STRUCTURE);
  58.109  		madt->LocalApic[i].AcpiProcessorId = i;
  58.110  		madt->LocalApic[i].ApicId          = i;
  58.111 -		madt->LocalApic[i].Flags           = 1; 
  58.112 +		madt->LocalApic[i].Flags           = 1;
  58.113  	}
  58.114  
  58.115  	madt->Header.Header.Length =
  58.116 -		sizeof(ACPI_MULTIPLE_APIC_DESCRIPTION_TABLE) - 
  58.117 +		sizeof(ACPI_MULTIPLE_APIC_DESCRIPTION_TABLE) -
  58.118  		(MAX_VIRT_CPUS - nr_vcpu)* sizeof(ACPI_LOCAL_APIC_STRUCTURE);
  58.119  
  58.120 -	return 0;                            
  58.121 +	return 0;
  58.122  }
  58.123  
  58.124  #define FIELD_OFFSET(TYPE,Field) ((unsigned int)(&(((TYPE *) 0)->Field)))
  58.125 @@ -185,7 +176,7 @@ int acpi_madt_update(unsigned char *acpi
  58.126  		madt, FIELD_OFFSET(ACPI_TABLE_HEADER, Checksum),
  58.127  		madt->Header.Header.Length);
  58.128  
  58.129 -	return 0;              
  58.130 +	return 0;
  58.131  }
  58.132  
  58.133  /*
    59.1 --- a/tools/firmware/vmxassist/vm86.h	Fri Jan 13 10:38:44 2006 -0600
    59.2 +++ b/tools/firmware/vmxassist/vm86.h	Fri Jan 13 14:12:24 2006 -0600
    59.3 @@ -24,7 +24,7 @@
    59.4  #include <stdint.h>
    59.5  #endif
    59.6  
    59.7 -#include <xen/vmx_assist.h>
    59.8 +#include <xen/hvm/vmx_assist.h>
    59.9  
   59.10  #define	NR_EXCEPTION_HANDLER	32
   59.11  #define	NR_INTERRUPT_HANDLERS	16
    60.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
    60.2 +++ b/tools/guest-headers/Makefile	Fri Jan 13 14:12:24 2006 -0600
    60.3 @@ -0,0 +1,13 @@
    60.4 +
    60.5 +XEN_ROOT=../..
    60.6 +linuxsparsetree = $(XEN_ROOT)/linux-2.6-xen-sparse
    60.7 +
    60.8 +all:
    60.9 +
   60.10 +check:
   60.11 +
   60.12 +install:
   60.13 +	mkdir -p $(DESTDIR)/usr/include/xen/linux
   60.14 +	install -m0644 $(linuxsparsetree)/include/asm-xen/linux-public/*.h $(DESTDIR)/usr/include/xen/linux
   60.15 +
   60.16 +clean:
    61.1 --- a/tools/ioemu/hw/i8254.c	Fri Jan 13 10:38:44 2006 -0600
    61.2 +++ b/tools/ioemu/hw/i8254.c	Fri Jan 13 14:12:24 2006 -0600
    61.3 @@ -23,7 +23,7 @@
    61.4   */
    61.5  #include "vl.h"
    61.6  #include <xenctrl.h>
    61.7 -#include <xen/io/ioreq.h>
    61.8 +#include <xen/hvm/ioreq.h>
    61.9  
   61.10  //#define DEBUG_PIT
   61.11  
    62.1 --- a/tools/ioemu/hw/i8259.c	Fri Jan 13 10:38:44 2006 -0600
    62.2 +++ b/tools/ioemu/hw/i8259.c	Fri Jan 13 14:12:24 2006 -0600
    62.3 @@ -23,7 +23,7 @@
    62.4   */
    62.5  #include "vl.h"
    62.6  #include <xenctrl.h>
    62.7 -#include <xen/io/ioreq.h>
    62.8 +#include <xen/hvm/ioreq.h>
    62.9  
   62.10  /* debug PIC */
   62.11  //#define DEBUG_PIC
    63.1 --- a/tools/ioemu/hw/i8259_stub.c	Fri Jan 13 10:38:44 2006 -0600
    63.2 +++ b/tools/ioemu/hw/i8259_stub.c	Fri Jan 13 14:12:24 2006 -0600
    63.3 @@ -22,7 +22,7 @@
    63.4   * THE SOFTWARE.
    63.5   */
    63.6  #include "xenctrl.h"
    63.7 -#include <xen/io/ioreq.h>
    63.8 +#include <xen/hvm/ioreq.h>
    63.9  #include <stdio.h>
   63.10  #include "cpu.h"
   63.11  #include "cpu-all.h"
    64.1 --- a/tools/ioemu/target-i386-dm/helper2.c	Fri Jan 13 10:38:44 2006 -0600
    64.2 +++ b/tools/ioemu/target-i386-dm/helper2.c	Fri Jan 13 14:12:24 2006 -0600
    64.3 @@ -48,7 +48,7 @@
    64.4  #include <sys/ioctl.h>
    64.5  
    64.6  #include <xenctrl.h>
    64.7 -#include <xen/io/ioreq.h>
    64.8 +#include <xen/hvm/ioreq.h>
    64.9  #include <xen/linux/evtchn.h>
   64.10  
   64.11  #include "cpu.h"
    65.1 --- a/tools/libxc/xc_core.c	Fri Jan 13 10:38:44 2006 -0600
    65.2 +++ b/tools/libxc/xc_core.c	Fri Jan 13 14:12:24 2006 -0600
    65.3 @@ -55,7 +55,7 @@ xc_domain_dumpcore(int xc_handle,
    65.4      }
    65.5   
    65.6      for (i = 0; i < info.max_vcpu_id; i++)
    65.7 -        if (xc_domain_get_vcpu_context(xc_handle, domid,
    65.8 +        if (xc_vcpu_getcontext(xc_handle, domid,
    65.9                                         i, &ctxt[nr_vcpus]) == 0)
   65.10              nr_vcpus++;
   65.11   
    66.1 --- a/tools/libxc/xc_domain.c	Fri Jan 13 10:38:44 2006 -0600
    66.2 +++ b/tools/libxc/xc_domain.c	Fri Jan 13 14:12:24 2006 -0600
    66.3 @@ -58,16 +58,16 @@ int xc_domain_destroy(int xc_handle,
    66.4      return do_dom0_op(xc_handle, &op);
    66.5  }
    66.6  
    66.7 -int xc_domain_pincpu(int xc_handle,
    66.8 -                     uint32_t domid, 
    66.9 -                     int vcpu,
   66.10 -                     cpumap_t cpumap)
   66.11 +int xc_vcpu_setaffinity(int xc_handle,
   66.12 +                        uint32_t domid, 
   66.13 +                        int vcpu,
   66.14 +                        cpumap_t cpumap)
   66.15  {
   66.16      DECLARE_DOM0_OP;
   66.17 -    op.cmd = DOM0_PINCPUDOMAIN;
   66.18 -    op.u.pincpudomain.domain  = (domid_t)domid;
   66.19 -    op.u.pincpudomain.vcpu    = vcpu;
   66.20 -    op.u.pincpudomain.cpumap  = cpumap;
   66.21 +    op.cmd = DOM0_SETVCPUAFFINITY;
   66.22 +    op.u.setvcpuaffinity.domain  = (domid_t)domid;
   66.23 +    op.u.setvcpuaffinity.vcpu    = vcpu;
   66.24 +    op.u.setvcpuaffinity.cpumap  = cpumap;
   66.25      return do_dom0_op(xc_handle, &op);
   66.26  }
   66.27  
   66.28 @@ -155,7 +155,7 @@ int xc_domain_getinfolist(int xc_handle,
   66.29      return ret;
   66.30  }
   66.31  
   66.32 -int xc_domain_get_vcpu_context(int xc_handle,
   66.33 +int xc_vcpu_getcontext(int xc_handle,
   66.34                                 uint32_t domid,
   66.35                                 uint32_t vcpu,
   66.36                                 vcpu_guest_context_t *ctxt)
   66.37 @@ -345,10 +345,10 @@ int xc_domain_sethandle(int xc_handle, u
   66.38      return do_dom0_op(xc_handle, &op);
   66.39  }
   66.40  
   66.41 -int xc_domain_get_vcpu_info(int xc_handle,
   66.42 -                            uint32_t domid,
   66.43 -                            uint32_t vcpu,
   66.44 -                            xc_vcpuinfo_t *info)
   66.45 +int xc_vcpu_getinfo(int xc_handle,
   66.46 +                    uint32_t domid,
   66.47 +                    uint32_t vcpu,
   66.48 +                    xc_vcpuinfo_t *info)
   66.49  {
   66.50      int rc;
   66.51      DECLARE_DOM0_OP;
   66.52 @@ -380,18 +380,18 @@ int xc_domain_ioport_permission(int xc_h
   66.53      return do_dom0_op(xc_handle, &op);
   66.54  }
   66.55  
   66.56 -int xc_domain_setinfo(int xc_handle,
   66.57 -                      uint32_t domid,
   66.58 -                      uint32_t vcpu,
   66.59 -                      vcpu_guest_context_t *ctxt)
   66.60 +int xc_vcpu_setcontext(int xc_handle,
   66.61 +                       uint32_t domid,
   66.62 +                       uint32_t vcpu,
   66.63 +                       vcpu_guest_context_t *ctxt)
   66.64  {
   66.65      dom0_op_t op;
   66.66      int rc;
   66.67  
   66.68 -    op.cmd = DOM0_SETDOMAININFO;
   66.69 -    op.u.setdomaininfo.domain = domid;
   66.70 -    op.u.setdomaininfo.vcpu = vcpu;
   66.71 -    op.u.setdomaininfo.ctxt = ctxt;
   66.72 +    op.cmd = DOM0_SETVCPUCONTEXT;
   66.73 +    op.u.setvcpucontext.domain = domid;
   66.74 +    op.u.setvcpucontext.vcpu = vcpu;
   66.75 +    op.u.setvcpucontext.ctxt = ctxt;
   66.76  
   66.77      if ( (rc = mlock(ctxt, sizeof(*ctxt))) != 0 )
   66.78          return rc;
    67.1 --- a/tools/libxc/xc_ia64_stubs.c	Fri Jan 13 10:38:44 2006 -0600
    67.2 +++ b/tools/libxc/xc_ia64_stubs.c	Fri Jan 13 14:12:24 2006 -0600
    67.3 @@ -5,7 +5,7 @@
    67.4  #include <stdlib.h>
    67.5  #include <zlib.h>
    67.6  #include "xen/arch-ia64.h"
    67.7 -#include <xen/io/ioreq.h>
    67.8 +#include <xen/hvm/ioreq.h>
    67.9  
   67.10  /* this is a very ugly way of getting FPSR_DEFAULT.  struct ia64_fpreg is
   67.11   * mysteriously declared in two places: /usr/include/asm/fpu.h and
   67.12 @@ -23,7 +23,8 @@ unsigned long xc_ia64_fpsr_default(void)
   67.13  }
   67.14  
   67.15  int xc_linux_save(int xc_handle, int io_fd, uint32_t dom, uint32_t max_iters, 
   67.16 -                  uint32_t max_factor, uint32_t flags)
   67.17 +                  uint32_t max_factor, uint32_t flags /* XCFLAGS_xxx */,
   67.18 +                  int (*suspend)(int domid))
   67.19  {
   67.20      PERROR("xc_linux_save not implemented\n");
   67.21      return -1;
   67.22 @@ -664,7 +665,7 @@ int xc_vmx_build(int xc_handle,
   67.23          goto error_out;
   67.24      }
   67.25  
   67.26 -    if ( xc_domain_get_vcpu_context(xc_handle, domid, 0, ctxt) ){
   67.27 +    if ( xc_vcpu_getcontext(xc_handle, domid, 0, ctxt) ){
   67.28          PERROR("Could not get vcpu context");
   67.29          goto error_out;
   67.30      }
   67.31 @@ -688,11 +689,11 @@ int xc_vmx_build(int xc_handle,
   67.32  
   67.33      memset( &launch_op, 0, sizeof(launch_op) );
   67.34  
   67.35 -    launch_op.u.setdomaininfo.domain = (domid_t)domid;
   67.36 -    launch_op.u.setdomaininfo.vcpu   = 0;
   67.37 -    launch_op.u.setdomaininfo.ctxt   = ctxt;
   67.38 +    launch_op.u.setvcpucontext.domain = (domid_t)domid;
   67.39 +    launch_op.u.setvcpucontext.vcpu   = 0;
   67.40 +    launch_op.u.setvcpucontext.ctxt   = ctxt;
   67.41  
   67.42 -    launch_op.cmd = DOM0_SETDOMAININFO;
   67.43 +    launch_op.cmd = DOM0_SETVCPUCONTEXT;
   67.44      rc = do_dom0_op(xc_handle, &launch_op);
   67.45      return rc;
   67.46  
    68.1 --- a/tools/libxc/xc_linux_build.c	Fri Jan 13 10:38:44 2006 -0600
    68.2 +++ b/tools/libxc/xc_linux_build.c	Fri Jan 13 14:12:24 2006 -0600
    68.3 @@ -402,8 +402,11 @@ static int setup_guest(int xc_handle,
    68.4          ctxt->initrd.start    = 0;
    68.5          ctxt->initrd.size     = 0;
    68.6      }
    68.7 -    strncpy((char *)ctxt->cmdline, cmdline, IA64_COMMAND_LINE_SIZE);
    68.8 -    ctxt->cmdline[IA64_COMMAND_LINE_SIZE-1] = '\0';
    68.9 +    if ( cmdline != NULL )
   68.10 +    {
   68.11 +        strncpy((char *)ctxt->cmdline, cmdline, IA64_COMMAND_LINE_SIZE);
   68.12 +        ctxt->cmdline[IA64_COMMAND_LINE_SIZE-1] = '\0';
   68.13 +    }
   68.14      munmap(start_info, PAGE_SIZE);
   68.15  
   68.16      free(page_array);
   68.17 @@ -693,8 +696,11 @@ static int setup_guest(int xc_handle,
   68.18          start_info->mod_start    = vinitrd_start;
   68.19          start_info->mod_len      = initrd_len;
   68.20      }
   68.21 -    strncpy((char *)start_info->cmd_line, cmdline, MAX_GUEST_CMDLINE);
   68.22 -    start_info->cmd_line[MAX_GUEST_CMDLINE-1] = '\0';
   68.23 +    if ( cmdline != NULL )
   68.24 +    {
   68.25 +        strncpy((char *)start_info->cmd_line, cmdline, MAX_GUEST_CMDLINE);
   68.26 +        start_info->cmd_line[MAX_GUEST_CMDLINE-1] = '\0';
   68.27 +    }
   68.28      munmap(start_info, PAGE_SIZE);
   68.29  
   68.30      /* shared_info page starts its life empty. */
   68.31 @@ -794,7 +800,7 @@ int xc_linux_build(int xc_handle,
   68.32          goto error_out;
   68.33      }
   68.34  
   68.35 -    if ( xc_domain_get_vcpu_context(xc_handle, domid, 0, ctxt) )
   68.36 +    if ( xc_vcpu_getcontext(xc_handle, domid, 0, ctxt) )
   68.37      {
   68.38          PERROR("Could not get vcpu context");
   68.39          goto error_out;
   68.40 @@ -897,11 +903,11 @@ int xc_linux_build(int xc_handle,
   68.41  
   68.42      memset( &launch_op, 0, sizeof(launch_op) );
   68.43  
   68.44 -    launch_op.u.setdomaininfo.domain = (domid_t)domid;
   68.45 -    launch_op.u.setdomaininfo.vcpu   = 0;
   68.46 -    launch_op.u.setdomaininfo.ctxt   = ctxt;
   68.47 +    launch_op.u.setvcpucontext.domain = (domid_t)domid;
   68.48 +    launch_op.u.setvcpucontext.vcpu   = 0;
   68.49 +    launch_op.u.setvcpucontext.ctxt   = ctxt;
   68.50  
   68.51 -    launch_op.cmd = DOM0_SETDOMAININFO;
   68.52 +    launch_op.cmd = DOM0_SETVCPUCONTEXT;
   68.53      rc = xc_dom0_op(xc_handle, &launch_op);
   68.54      
   68.55      return rc;
    69.1 --- a/tools/libxc/xc_linux_restore.c	Fri Jan 13 10:38:44 2006 -0600
    69.2 +++ b/tools/libxc/xc_linux_restore.c	Fri Jan 13 14:12:24 2006 -0600
    69.3 @@ -171,7 +171,7 @@ int xc_linux_restore(int xc_handle, int 
    69.4  
    69.5  
    69.6      /* Only have to worry about vcpu 0 even for SMP */
    69.7 -    if (xc_domain_get_vcpu_context( xc_handle, dom, 0, &ctxt)) {
    69.8 +    if (xc_vcpu_getcontext( xc_handle, dom, 0, &ctxt)) {
    69.9          ERR("Could not get vcpu context");
   69.10          goto out;
   69.11      }
   69.12 @@ -735,10 +735,10 @@ int xc_linux_restore(int xc_handle, int 
   69.13  
   69.14      DPRINTF("Domain ready to be built.\n");
   69.15  
   69.16 -    op.cmd = DOM0_SETDOMAININFO;
   69.17 -    op.u.setdomaininfo.domain = (domid_t)dom;
   69.18 -    op.u.setdomaininfo.vcpu   = 0;
   69.19 -    op.u.setdomaininfo.ctxt   = &ctxt;
   69.20 +    op.cmd = DOM0_SETVCPUCONTEXT;
   69.21 +    op.u.setvcpucontext.domain = (domid_t)dom;
   69.22 +    op.u.setvcpucontext.vcpu   = 0;
   69.23 +    op.u.setvcpucontext.ctxt   = &ctxt;
   69.24      rc = xc_dom0_op(xc_handle, &op);
   69.25  
   69.26      if (rc != 0) {
    70.1 --- a/tools/libxc/xc_linux_save.c	Fri Jan 13 10:38:44 2006 -0600
    70.2 +++ b/tools/libxc/xc_linux_save.c	Fri Jan 13 14:12:24 2006 -0600
    70.3 @@ -357,21 +357,14 @@ static int analysis_phase(int xc_handle,
    70.4  }
    70.5  
    70.6  
    70.7 -static int suspend_and_state(int xc_handle, int io_fd, int dom,       
    70.8 -                             xc_dominfo_t *info,
    70.9 +static int suspend_and_state(int (*suspend)(int), int xc_handle, int io_fd,
   70.10 +                             int dom, xc_dominfo_t *info,
   70.11                               vcpu_guest_context_t *ctxt)
   70.12  {
   70.13      int i = 0;
   70.14 -    char ans[30];
   70.15  
   70.16 -    printf("suspend\n");
   70.17 -    fflush(stdout);
   70.18 -    if (fgets(ans, sizeof(ans), stdin) == NULL) {
   70.19 -        ERR("failed reading suspend reply");
   70.20 -        return -1;
   70.21 -    }
   70.22 -    if (strncmp(ans, "done\n", 5)) {
   70.23 -        ERR("suspend reply incorrect: %s", ans);
   70.24 +    if (!(*suspend)(dom)) {
   70.25 +        ERR("Suspend request failed");
   70.26          return -1;
   70.27      }
   70.28  
   70.29 @@ -382,7 +375,7 @@ static int suspend_and_state(int xc_hand
   70.30          return -1;
   70.31      }
   70.32  
   70.33 -    if ( xc_domain_get_vcpu_context(xc_handle, dom, 0 /* XXX */, ctxt)) 
   70.34 +    if ( xc_vcpu_getcontext(xc_handle, dom, 0 /* XXX */, ctxt)) 
   70.35          ERR("Could not get vcpu context");
   70.36  
   70.37  
   70.38 @@ -568,7 +561,7 @@ static unsigned long *xc_map_m2p(int xc_
   70.39  
   70.40  
   70.41  int xc_linux_save(int xc_handle, int io_fd, uint32_t dom, uint32_t max_iters, 
   70.42 -                  uint32_t max_factor, uint32_t flags)
   70.43 +                  uint32_t max_factor, uint32_t flags, int (*suspend)(int))
   70.44  {
   70.45      xc_dominfo_t info;
   70.46  
   70.47 @@ -643,7 +636,7 @@ int xc_linux_save(int xc_handle, int io_
   70.48      }
   70.49      
   70.50      /* Only have to worry about vcpu 0 even for SMP */
   70.51 -    if (xc_domain_get_vcpu_context(xc_handle, dom, 0, &ctxt)) {
   70.52 +    if (xc_vcpu_getcontext(xc_handle, dom, 0, &ctxt)) {
   70.53          ERR("Could not get vcpu context");
   70.54          goto out;
   70.55      }
   70.56 @@ -748,7 +741,7 @@ int xc_linux_save(int xc_handle, int io_
   70.57          
   70.58          last_iter = 1;
   70.59          
   70.60 -        if (suspend_and_state( xc_handle, io_fd, dom, &info, &ctxt)) {
   70.61 +        if (suspend_and_state(suspend, xc_handle, io_fd, dom, &info, &ctxt)) {
   70.62              ERR("Domain appears not to have suspended");
   70.63              goto out;
   70.64          }
   70.65 @@ -1054,7 +1047,8 @@ int xc_linux_save(int xc_handle, int io_
   70.66                  DPRINTF("Start last iteration\n");
   70.67                  last_iter = 1;
   70.68                  
   70.69 -                if (suspend_and_state(xc_handle, io_fd, dom, &info, &ctxt)) {
   70.70 +                if (suspend_and_state(suspend, xc_handle, io_fd, dom, &info,
   70.71 +                                      &ctxt)) {
   70.72                      ERR("Domain appears not to have suspended");
   70.73                      goto out;
   70.74                  }
   70.75 @@ -1164,6 +1158,9 @@ int xc_linux_save(int xc_handle, int io_
   70.76      if (live_shinfo)
   70.77          munmap(live_shinfo, PAGE_SIZE);
   70.78      
   70.79 +    if (live_p2m_frame_list_list) 
   70.80 +        munmap(live_p2m_frame_list_list, PAGE_SIZE); 
   70.81 +
   70.82      if (live_p2m_frame_list) 
   70.83          munmap(live_p2m_frame_list, P2M_FLL_ENTRIES * PAGE_SIZE); 
   70.84  
    71.1 --- a/tools/libxc/xc_pagetab.c	Fri Jan 13 10:38:44 2006 -0600
    71.2 +++ b/tools/libxc/xc_pagetab.c	Fri Jan 13 14:12:24 2006 -0600
    71.3 @@ -74,7 +74,7 @@ unsigned long xc_translate_foreign_addre
    71.4  #define pt_levels 4
    71.5  #endif
    71.6  
    71.7 -    if (xc_domain_get_vcpu_context(xc_handle, dom, vcpu, &ctx) != 0) {
    71.8 +    if (xc_vcpu_getcontext(xc_handle, dom, vcpu, &ctx) != 0) {
    71.9          fprintf(stderr, "failed to retreive vcpu context\n");
   71.10          goto out;
   71.11      }
    72.1 --- a/tools/libxc/xc_ptrace.c	Fri Jan 13 10:38:44 2006 -0600
    72.2 +++ b/tools/libxc/xc_ptrace.c	Fri Jan 13 14:12:24 2006 -0600
    72.3 @@ -33,7 +33,7 @@ fetch_regs(int xc_handle, int cpu, int *
    72.4      if (online)
    72.5          *online = 0;
    72.6      if ( !(regs_valid & (1 << cpu)) ) { 
    72.7 -        retval = xc_domain_get_vcpu_context(xc_handle, current_domid, 
    72.8 +        retval = xc_vcpu_getcontext(xc_handle, current_domid, 
    72.9  						cpu, &ctxt[cpu]);
   72.10          if ( retval ) 
   72.11              goto done;
   72.12 @@ -43,8 +43,7 @@ fetch_regs(int xc_handle, int cpu, int *
   72.13  	if ( online == NULL )
   72.14  	    goto done;
   72.15  
   72.16 -	retval = xc_domain_get_vcpu_info(xc_handle, current_domid,
   72.17 -					 cpu, &info);
   72.18 +	retval = xc_vcpu_getinfo(xc_handle, current_domid, cpu, &info);
   72.19  	*online = info.online;
   72.20      
   72.21   done:
   72.22 @@ -395,7 +394,7 @@ xc_ptrace(
   72.23  
   72.24      case PTRACE_SETREGS:
   72.25          SET_XC_REGS(((struct gdb_regs *)data), ctxt[cpu].user_regs);
   72.26 -        retval = xc_domain_setinfo(xc_handle, current_domid, cpu, &ctxt[cpu]);
   72.27 +        retval = xc_vcpu_setcontext(xc_handle, current_domid, cpu, &ctxt[cpu]);
   72.28          if (retval)
   72.29              goto error_out;
   72.30          break;
   72.31 @@ -405,7 +404,7 @@ xc_ptrace(
   72.32           *  during single-stepping - but that just seems retarded
   72.33           */
   72.34          ctxt[cpu].user_regs.eflags |= PSL_T; 
   72.35 -        retval = xc_domain_setinfo(xc_handle, current_domid, cpu, &ctxt[cpu]);
   72.36 +        retval = xc_vcpu_setcontext(xc_handle, current_domid, cpu, &ctxt[cpu]);
   72.37          if ( retval )
   72.38          {
   72.39              perror("dom0 op failed");
   72.40 @@ -423,8 +422,8 @@ xc_ptrace(
   72.41                  /* Clear trace flag */
   72.42                  if ( ctxt[cpu].user_regs.eflags & PSL_T ) {
   72.43                      ctxt[cpu].user_regs.eflags &= ~PSL_T;
   72.44 -                    retval = xc_domain_setinfo(xc_handle, current_domid, 
   72.45 -                                               cpu, &ctxt[cpu]);
   72.46 +                    retval = xc_vcpu_setcontext(xc_handle, current_domid, 
   72.47 +                                                cpu, &ctxt[cpu]);
   72.48                      if ( retval ) {
   72.49                          perror("dom0 op failed");
   72.50                          goto error_out;
    73.1 --- a/tools/libxc/xc_vmx_build.c	Fri Jan 13 10:38:44 2006 -0600
    73.2 +++ b/tools/libxc/xc_vmx_build.c	Fri Jan 13 14:12:24 2006 -0600
    73.3 @@ -9,7 +9,8 @@
    73.4  #include <stdlib.h>
    73.5  #include <unistd.h>
    73.6  #include <zlib.h>
    73.7 -#include <xen/io/ioreq.h>
    73.8 +#include <xen/hvm/hvm_info_table.h>
    73.9 +#include <xen/hvm/ioreq.h>
   73.10  
   73.11  #define VMX_LOADER_ENTR_ADDR  0x00100000
   73.12  
   73.13 @@ -33,18 +34,6 @@
   73.14  #define E820_MAP_NR_OFFSET  0x000001E8
   73.15  #define E820_MAP_OFFSET     0x000002D0
   73.16  
   73.17 -#define HVM_INFO_PAGE        0x0009F000
   73.18 -#define HVM_INFO_OFFSET      0x00000800
   73.19 -
   73.20 -struct hvm_info_table {
   73.21 -    char     signature[8]; /* "HVM INFO" */
   73.22 -    uint32_t length;
   73.23 -    uint8_t  checksum;
   73.24 -    uint8_t  acpi_enabled;
   73.25 -    uint8_t  pad[2];
   73.26 -    uint32_t nr_vcpus;
   73.27 -};
   73.28 -
   73.29  struct e820entry {
   73.30      uint64_t addr;
   73.31      uint64_t size;
   73.32 @@ -128,7 +117,7 @@ static unsigned char build_e820map(void 
   73.33      return (*(((unsigned char *)e820_page) + E820_MAP_NR_OFFSET) = nr_map);
   73.34  }
   73.35  
   73.36 -static void 
   73.37 +static void
   73.38  set_hvm_info_checksum(struct hvm_info_table *t)
   73.39  {
   73.40      uint8_t *ptr = (uint8_t *)t, sum = 0;
   73.41 @@ -148,14 +137,18 @@ set_hvm_info_checksum(struct hvm_info_ta
   73.42   */
   73.43  static int set_hvm_info(int xc_handle, uint32_t dom,
   73.44                          unsigned long *pfn_list, unsigned int vcpus,
   73.45 -                        unsigned int acpi)
   73.46 +                        unsigned int acpi, unsigned int apic)
   73.47  {
   73.48      char *va_map;
   73.49      struct hvm_info_table *va_hvm;
   73.50  
   73.51 -    va_map = xc_map_foreign_range(xc_handle, dom,
   73.52 -                                  PAGE_SIZE, PROT_READ|PROT_WRITE,
   73.53 -                                  pfn_list[HVM_INFO_PAGE >> PAGE_SHIFT]);
   73.54 +    va_map = xc_map_foreign_range(
   73.55 +        xc_handle,
   73.56 +        dom,
   73.57 +        PAGE_SIZE,
   73.58 +        PROT_READ|PROT_WRITE,
   73.59 +        pfn_list[HVM_INFO_PFN]);
   73.60 +    
   73.61      if ( va_map == NULL )
   73.62          return -1;
   73.63  
   73.64 @@ -164,8 +157,9 @@ static int set_hvm_info(int xc_handle, u
   73.65      strncpy(va_hvm->signature, "HVM INFO", 8);
   73.66      va_hvm->length       = sizeof(struct hvm_info_table);
   73.67      va_hvm->acpi_enabled = acpi;
   73.68 +    va_hvm->apic_enabled = apic;
   73.69      va_hvm->nr_vcpus     = vcpus;
   73.70 -    
   73.71 +
   73.72      set_hvm_info_checksum(va_hvm);
   73.73  
   73.74      munmap(va_map, PAGE_SIZE);
   73.75 @@ -307,9 +301,9 @@ static int setup_guest(int xc_handle,
   73.76                         vcpu_guest_context_t *ctxt,
   73.77                         unsigned long shared_info_frame,
   73.78                         unsigned int control_evtchn,
   73.79 -                       unsigned int lapic,
   73.80                         unsigned int vcpus,
   73.81                         unsigned int acpi,
   73.82 +                       unsigned int apic,
   73.83                         unsigned int store_evtchn,
   73.84                         unsigned long *store_mfn)
   73.85  {
   73.86 @@ -519,20 +513,14 @@ static int setup_guest(int xc_handle,
   73.87              goto error_out;
   73.88      }
   73.89  
   73.90 -    if (set_hvm_info(xc_handle, dom, page_array, vcpus, acpi)) {
   73.91 +    if ( set_hvm_info(xc_handle, dom, page_array, vcpus, acpi, apic) ) {
   73.92          fprintf(stderr, "Couldn't set hvm info for VMX guest.\n");
   73.93          goto error_out;
   73.94      }
   73.95  
   73.96 -    *store_mfn = page_array[(v_end-2) >> PAGE_SHIFT];
   73.97 -    if ( xc_clear_domain_page(xc_handle, dom, *store_mfn) )
   73.98 -        goto error_out;
   73.99 -
  73.100 -    shared_page_frame = (v_end - PAGE_SIZE) >> PAGE_SHIFT;
  73.101 -
  73.102 -    if ((e820_page = xc_map_foreign_range(
  73.103 -        xc_handle, dom, PAGE_SIZE, PROT_READ|PROT_WRITE,
  73.104 -        page_array[E820_MAP_PAGE >> PAGE_SHIFT])) == 0)
  73.105 +    if ( (e820_page = xc_map_foreign_range(
  73.106 +         xc_handle, dom, PAGE_SIZE, PROT_READ|PROT_WRITE,
  73.107 +         page_array[E820_MAP_PAGE >> PAGE_SHIFT])) == 0 )
  73.108          goto error_out;
  73.109      memset(e820_page, 0, PAGE_SIZE);
  73.110      e820_map_nr = build_e820map(e820_page, v_end);
  73.111 @@ -547,26 +535,30 @@ static int setup_guest(int xc_handle,
  73.112      munmap(e820_page, PAGE_SIZE);
  73.113  
  73.114      /* shared_info page starts its life empty. */
  73.115 -    if ((shared_info = xc_map_foreign_range(
  73.116 -        xc_handle, dom, PAGE_SIZE, PROT_READ|PROT_WRITE,
  73.117 -        shared_info_frame)) == 0)
  73.118 +    if ( (shared_info = xc_map_foreign_range(
  73.119 +         xc_handle, dom, PAGE_SIZE, PROT_READ|PROT_WRITE,
  73.120 +         shared_info_frame)) == 0 )
  73.121          goto error_out;
  73.122      memset(shared_info, 0, sizeof(shared_info_t));
  73.123      /* Mask all upcalls... */
  73.124      for ( i = 0; i < MAX_VIRT_CPUS; i++ )
  73.125          shared_info->vcpu_info[i].evtchn_upcall_mask = 1;
  73.126 -
  73.127      munmap(shared_info, PAGE_SIZE);
  73.128  
  73.129      /* Populate the event channel port in the shared page */
  73.130 -    if ((sp = (shared_iopage_t *) xc_map_foreign_range(
  73.131 -        xc_handle, dom, PAGE_SIZE, PROT_READ|PROT_WRITE,
  73.132 -        page_array[shared_page_frame])) == 0)
  73.133 +    shared_page_frame = page_array[(v_end >> PAGE_SHIFT) - 1];
  73.134 +    if ( (sp = (shared_iopage_t *) xc_map_foreign_range(
  73.135 +         xc_handle, dom, PAGE_SIZE, PROT_READ|PROT_WRITE,
  73.136 +         shared_page_frame)) == 0 )
  73.137          goto error_out;
  73.138      memset(sp, 0, PAGE_SIZE);
  73.139      sp->sp_global.eport = control_evtchn;
  73.140      munmap(sp, PAGE_SIZE);
  73.141  
  73.142 +    *store_mfn = page_array[(v_end >> PAGE_SHIFT) - 2];
  73.143 +    if ( xc_clear_domain_page(xc_handle, dom, *store_mfn) )
  73.144 +        goto error_out;
  73.145 +
  73.146      /* Send the page update requests down to the hypervisor. */
  73.147      if ( xc_finish_mmu_updates(xc_handle, mmu) )
  73.148          goto error_out;
  73.149 @@ -588,7 +580,7 @@ static int setup_guest(int xc_handle,
  73.150      ctxt->user_regs.eax = 0;
  73.151      ctxt->user_regs.esp = 0;
  73.152      ctxt->user_regs.ebx = 0; /* startup_32 expects this to be 0 to signal boot cpu */
  73.153 -    ctxt->user_regs.ecx = lapic;
  73.154 +    ctxt->user_regs.ecx = 0;
  73.155      ctxt->user_regs.esi = 0;
  73.156      ctxt->user_regs.edi = 0;
  73.157      ctxt->user_regs.ebp = 0;
  73.158 @@ -608,9 +600,9 @@ int xc_vmx_build(int xc_handle,
  73.159                   int memsize,
  73.160                   const char *image_name,
  73.161                   unsigned int control_evtchn,
  73.162 -                 unsigned int lapic,
  73.163                   unsigned int vcpus,
  73.164                   unsigned int acpi,
  73.165 +                 unsigned int apic,
  73.166                   unsigned int store_evtchn,
  73.167                   unsigned long *store_mfn)
  73.168  {
  73.169 @@ -659,7 +651,7 @@ int xc_vmx_build(int xc_handle,
  73.170          goto error_out;
  73.171      }
  73.172  
  73.173 -    if ( xc_domain_get_vcpu_context(xc_handle, domid, 0, ctxt) )
  73.174 +    if ( xc_vcpu_getcontext(xc_handle, domid, 0, ctxt) )
  73.175      {
  73.176          PERROR("Could not get vcpu context");
  73.177          goto error_out;
  73.178 @@ -674,7 +666,7 @@ int xc_vmx_build(int xc_handle,
  73.179  
  73.180      if ( setup_guest(xc_handle, domid, memsize, image, image_size, nr_pages,
  73.181                       ctxt, op.u.getdomaininfo.shared_info_frame, control_evtchn,
  73.182 -                     lapic, vcpus, acpi, store_evtchn, store_mfn) < 0)
  73.183 +                     vcpus, acpi, apic, store_evtchn, store_mfn) < 0)
  73.184      {
  73.185          ERROR("Error constructing guest OS");
  73.186          goto error_out;
  73.187 @@ -716,11 +708,11 @@ int xc_vmx_build(int xc_handle,
  73.188  
  73.189      memset( &launch_op, 0, sizeof(launch_op) );
  73.190  
  73.191 -    launch_op.u.setdomaininfo.domain = (domid_t)domid;
  73.192 -    launch_op.u.setdomaininfo.vcpu   = 0;
  73.193 -    launch_op.u.setdomaininfo.ctxt   = ctxt;
  73.194 +    launch_op.u.setvcpucontext.domain = (domid_t)domid;
  73.195 +    launch_op.u.setvcpucontext.vcpu   = 0;
  73.196 +    launch_op.u.setvcpucontext.ctxt   = ctxt;
  73.197  
  73.198 -    launch_op.cmd = DOM0_SETDOMAININFO;
  73.199 +    launch_op.cmd = DOM0_SETVCPUCONTEXT;
  73.200      rc = xc_dom0_op(xc_handle, &launch_op);
  73.201  
  73.202      return rc;
    74.1 --- a/tools/libxc/xenctrl.h	Fri Jan 13 10:38:44 2006 -0600
    74.2 +++ b/tools/libxc/xenctrl.h	Fri Jan 13 14:12:24 2006 -0600
    74.3 @@ -181,10 +181,11 @@ int xc_domain_unpause(int xc_handle,
    74.4   */
    74.5  int xc_domain_destroy(int xc_handle, 
    74.6                        uint32_t domid);
    74.7 -int xc_domain_pincpu(int xc_handle,
    74.8 -                     uint32_t domid,
    74.9 -                     int vcpu,
   74.10 -                     cpumap_t cpumap);
   74.11 +
   74.12 +int xc_vcpu_setaffinity(int xc_handle,
   74.13 +                        uint32_t domid,
   74.14 +                        int vcpu,
   74.15 +                        cpumap_t cpumap);
   74.16  
   74.17  /**
   74.18   * This function will return information about one or more domains. It is
   74.19 @@ -208,7 +209,7 @@ int xc_domain_getinfo(int xc_handle,
   74.20  
   74.21  
   74.22  /**
   74.23 - * This function will set the vcpu context for the specified domain.
   74.24 + * This function will set the execution context for the specified vcpu.
   74.25   *
   74.26   * @parm xc_handle a handle to an open hypervisor interface
   74.27   * @parm domid the domain to set the vcpu context for
   74.28 @@ -216,10 +217,10 @@ int xc_domain_getinfo(int xc_handle,
   74.29   * @parm ctxt pointer to the the cpu context with the values to set
   74.30   * @return the number of domains enumerated or -1 on error
   74.31   */
   74.32 -int xc_domain_setinfo(int xc_handle,
   74.33 -                      uint32_t domid,
   74.34 -                      uint32_t vcpu,
   74.35 -                      vcpu_guest_context_t *ctxt);
   74.36 +int xc_vcpu_setcontext(int xc_handle,
   74.37 +                       uint32_t domid,
   74.38 +                       uint32_t vcpu,
   74.39 +                       vcpu_guest_context_t *ctxt);
   74.40  /**
   74.41   * This function will return information about one or more domains, using a
   74.42   * single hypercall.  The domain information will be stored into the supplied
   74.43 @@ -249,17 +250,16 @@ int xc_domain_getinfolist(int xc_handle,
   74.44   *            domain
   74.45   * @return 0 on success, -1 on failure
   74.46   */
   74.47 -int xc_domain_get_vcpu_context(int xc_handle,
   74.48 +int xc_vcpu_getcontext(int xc_handle,
   74.49                                 uint32_t domid,
   74.50                                 uint32_t vcpu,
   74.51                                 vcpu_guest_context_t *ctxt);
   74.52  
   74.53  typedef dom0_getvcpuinfo_t xc_vcpuinfo_t;
   74.54 -int xc_domain_get_vcpu_info(int xc_handle,
   74.55 -                            uint32_t domid,
   74.56 -                            uint32_t vcpu,
   74.57 -                            xc_vcpuinfo_t *info);
   74.58 -
   74.59 +int xc_vcpu_getinfo(int xc_handle,
   74.60 +                    uint32_t domid,
   74.61 +                    uint32_t vcpu,
   74.62 +                    xc_vcpuinfo_t *info);
   74.63  
   74.64  int xc_domain_setcpuweight(int xc_handle,
   74.65                             uint32_t domid,
    75.1 --- a/tools/libxc/xenguest.h	Fri Jan 13 10:38:44 2006 -0600
    75.2 +++ b/tools/libxc/xenguest.h	Fri Jan 13 14:12:24 2006 -0600
    75.3 @@ -21,8 +21,10 @@
    75.4   * @parm dom the id of the domain
    75.5   * @return 0 on success, -1 on failure
    75.6   */
    75.7 -int xc_linux_save(int xc_handle, int fd, uint32_t dom, uint32_t max_iters, 
    75.8 -                  uint32_t max_factor, uint32_t flags /* XCFLAGS_xxx */);
    75.9 +int xc_linux_save(int xc_handle, int io_fd, uint32_t dom, uint32_t max_iters, 
   75.10 +                  uint32_t max_factor, uint32_t flags /* XCFLAGS_xxx */,
   75.11 +                  int (*suspend)(int domid));
   75.12 +
   75.13  
   75.14  /**
   75.15   * This function will restore a saved domain running Linux.
   75.16 @@ -56,9 +58,9 @@ int xc_vmx_build(int xc_handle,
   75.17                   int memsize,
   75.18                   const char *image_name,
   75.19                   unsigned int control_evtchn,
   75.20 -                 unsigned int lapic,
   75.21                   unsigned int vcpus,
   75.22                   unsigned int acpi,
   75.23 +                 unsigned int apic,
   75.24                   unsigned int store_evtchn,
   75.25                   unsigned long *store_mfn);
   75.26  
    76.1 --- a/tools/libxc/xg_private.c	Fri Jan 13 10:38:44 2006 -0600
    76.2 +++ b/tools/libxc/xg_private.c	Fri Jan 13 14:12:24 2006 -0600
    76.3 @@ -17,6 +17,9 @@ char *xc_read_kernel_image(const char *f
    76.4      char *image = NULL;
    76.5      unsigned int bytes;
    76.6  
    76.7 +    if ( filename == NULL )
    76.8 +        goto out;
    76.9 +
   76.10      if ( (kernel_fd = open(filename, O_RDONLY)) < 0 )
   76.11      {
   76.12          PERROR("Could not open kernel image");
    77.1 --- a/tools/python/xen/lowlevel/xc/xc.c	Fri Jan 13 10:38:44 2006 -0600
    77.2 +++ b/tools/python/xen/lowlevel/xc/xc.c	Fri Jan 13 14:12:24 2006 -0600
    77.3 @@ -16,7 +16,7 @@
    77.4  #include <netdb.h>
    77.5  #include <arpa/inet.h>
    77.6  
    77.7 -#include "xc_private.h"
    77.8 +#include "xenctrl.h"
    77.9  
   77.10  /* Needed for Python versions earlier than 2.3. */
   77.11  #ifndef PyMODINIT_FUNC
   77.12 @@ -135,9 +135,9 @@ static PyObject *pyxc_domain_destroy(XcO
   77.13  }
   77.14  
   77.15  
   77.16 -static PyObject *pyxc_domain_pincpu(XcObject *self,
   77.17 -                                    PyObject *args,
   77.18 -                                    PyObject *kwds)
   77.19 +static PyObject *pyxc_vcpu_setaffinity(XcObject *self,
   77.20 +                                       PyObject *args,
   77.21 +                                       PyObject *kwds)
   77.22  {
   77.23      uint32_t dom;
   77.24      int vcpu = 0, i;
   77.25 @@ -157,7 +157,7 @@ static PyObject *pyxc_domain_pincpu(XcOb
   77.26              cpumap |= (cpumap_t)1 << PyInt_AsLong(PyList_GetItem(cpulist, i));
   77.27      }
   77.28    
   77.29 -    if ( xc_domain_pincpu(self->xc_handle, dom, vcpu, cpumap) != 0 )
   77.30 +    if ( xc_vcpu_setaffinity(self->xc_handle, dom, vcpu, cpumap) != 0 )
   77.31          return PyErr_SetFromErrno(xc_error);
   77.32      
   77.33      Py_INCREF(zero);
   77.34 @@ -297,7 +297,7 @@ static PyObject *pyxc_vcpu_getinfo(XcObj
   77.35                                        &dom, &vcpu) )
   77.36          return NULL;
   77.37  
   77.38 -    rc = xc_domain_get_vcpu_info(self->xc_handle, dom, vcpu, &info);
   77.39 +    rc = xc_vcpu_getinfo(self->xc_handle, dom, vcpu, &info);
   77.40      if ( rc < 0 )
   77.41          return PyErr_SetFromErrno(xc_error);
   77.42  
   77.43 @@ -362,22 +362,23 @@ static PyObject *pyxc_vmx_build(XcObject
   77.44      uint32_t dom;
   77.45      char *image;
   77.46      int control_evtchn, store_evtchn;
   77.47 +    int memsize;
   77.48      int vcpus = 1;
   77.49 -    int lapic = 0;
   77.50      int acpi = 0;
   77.51 -    int memsize;
   77.52 +    int apic = 0;
   77.53      unsigned long store_mfn = 0;
   77.54  
   77.55      static char *kwd_list[] = { "dom", "control_evtchn", "store_evtchn",
   77.56 -                                "memsize", "image", "lapic", "vcpus", "acpi",NULL };
   77.57 +                                "memsize", "image", "vcpus", "acpi", "apic",
   77.58 +                                NULL };
   77.59  
   77.60      if ( !PyArg_ParseTupleAndKeywords(args, kwds, "iiiisiii", kwd_list,
   77.61                                        &dom, &control_evtchn, &store_evtchn,
   77.62 -                                      &memsize, &image, &lapic, &vcpus,&acpi) )
   77.63 +                                      &memsize, &image, &vcpus, &acpi, &apic) )
   77.64          return NULL;
   77.65  
   77.66      if ( xc_vmx_build(self->xc_handle, dom, memsize, image, control_evtchn,
   77.67 -                      lapic, vcpus, acpi, store_evtchn, &store_mfn) != 0 )
   77.68 +                      vcpus, acpi, apic, store_evtchn, &store_mfn) != 0 )
   77.69          return PyErr_SetFromErrno(xc_error);
   77.70  
   77.71      return Py_BuildValue("{s:i}", "store_mfn", store_mfn);
   77.72 @@ -889,8 +890,8 @@ static PyMethodDef pyxc_methods[] = {
   77.73        " dom [int]:    Identifier of domain to be destroyed.\n\n"
   77.74        "Returns: [int] 0 on success; -1 on error.\n" },
   77.75  
   77.76 -    { "domain_pincpu", 
   77.77 -      (PyCFunction)pyxc_domain_pincpu, 
   77.78 +    { "vcpu_setaffinity", 
   77.79 +      (PyCFunction)pyxc_vcpu_setaffinity, 
   77.80        METH_VARARGS | METH_KEYWORDS, "\n"
   77.81        "Pin a VCPU to a specified set CPUs.\n"
   77.82        " dom [int]:     Identifier of domain to which VCPU belongs.\n"
    78.1 --- a/tools/python/xen/lowlevel/xs/xs.c	Fri Jan 13 10:38:44 2006 -0600
    78.2 +++ b/tools/python/xen/lowlevel/xs/xs.c	Fri Jan 13 14:12:24 2006 -0600
    78.3 @@ -66,7 +66,7 @@ static PyObject *none(bool result);
    78.4  
    78.5  static int parse_transaction_path(XsHandle *self, PyObject *args,
    78.6                                    struct xs_handle **xh,
    78.7 -                                  struct xs_transaction_handle **th,
    78.8 +                                  xs_transaction_t *th,
    78.9                                    char **path);
   78.10  
   78.11  
   78.12 @@ -83,7 +83,7 @@ static int parse_transaction_path(XsHand
   78.13  static PyObject *xspy_read(XsHandle *self, PyObject *args)
   78.14  {
   78.15      struct xs_handle *xh;
   78.16 -    struct xs_transaction_handle *th;
   78.17 +    xs_transaction_t th;
   78.18      char *path;
   78.19  
   78.20      char *xsval;
   78.21 @@ -120,7 +120,7 @@ static PyObject *xspy_write(XsHandle *se
   78.22  {
   78.23      static char *arg_spec = "sss#";
   78.24      struct xs_handle *xh = xshandle(self);
   78.25 -    struct xs_transaction_handle *th;
   78.26 +    xs_transaction_t th;
   78.27      char *thstr;
   78.28      char *path;
   78.29      char *data;
   78.30 @@ -132,7 +132,7 @@ static PyObject *xspy_write(XsHandle *se
   78.31      if (!PyArg_ParseTuple(args, arg_spec, &thstr, &path, &data, &data_n))
   78.32          return NULL;
   78.33  
   78.34 -    th = (struct xs_transaction_handle *)strtoul(thstr, NULL, 16);
   78.35 +    th = strtoul(thstr, NULL, 16);
   78.36  
   78.37      Py_BEGIN_ALLOW_THREADS
   78.38      result = xs_write(xh, th, path, data, data_n);
   78.39 @@ -155,7 +155,7 @@ static PyObject *xspy_write(XsHandle *se
   78.40  static PyObject *xspy_ls(XsHandle *self, PyObject *args)
   78.41  {
   78.42      struct xs_handle *xh;
   78.43 -    struct xs_transaction_handle *th;
   78.44 +    xs_transaction_t th;
   78.45      char *path;
   78.46  
   78.47      char **xsval;
   78.48 @@ -193,7 +193,7 @@ static PyObject *xspy_ls(XsHandle *self,
   78.49  static PyObject *xspy_mkdir(XsHandle *self, PyObject *args)
   78.50  {
   78.51      struct xs_handle *xh;
   78.52 -    struct xs_transaction_handle *th;
   78.53 +    xs_transaction_t th;
   78.54      char *path;
   78.55  
   78.56      bool result;
   78.57 @@ -221,7 +221,7 @@ static PyObject *xspy_mkdir(XsHandle *se
   78.58  static PyObject *xspy_rm(XsHandle *self, PyObject *args)
   78.59  {
   78.60      struct xs_handle *xh;
   78.61 -    struct xs_transaction_handle *th;
   78.62 +    xs_transaction_t th;
   78.63      char *path;
   78.64  
   78.65      bool result;
   78.66 @@ -256,7 +256,7 @@ static PyObject *xspy_get_permissions(Xs
   78.67      unsigned int perms_n = 0;
   78.68      int i;
   78.69  
   78.70 -    struct xs_transaction_handle *th;
   78.71 +    xs_transaction_t th;
   78.72      char *thstr;
   78.73  
   78.74      if (!xh)
   78.75 @@ -264,7 +264,7 @@ static PyObject *xspy_get_permissions(Xs
   78.76      if (!PyArg_ParseTuple(args, arg_spec, &thstr, &path))
   78.77          return NULL;
   78.78  
   78.79 -    th = (struct xs_transaction_handle *)strtoul(thstr, NULL, 16);
   78.80 +    th = strtoul(thstr, NULL, 16);
   78.81  
   78.82      Py_BEGIN_ALLOW_THREADS
   78.83      perms = xs_get_permissions(xh, th, path, &perms_n);
   78.84 @@ -312,7 +312,7 @@ static PyObject *xspy_set_permissions(Xs
   78.85      int xsperms_n;
   78.86      PyObject *tuple0 = NULL;
   78.87  
   78.88 -    struct xs_transaction_handle *th;
   78.89 +    xs_transaction_t th;
   78.90      char *thstr;
   78.91  
   78.92      if (!xh)
   78.93 @@ -320,7 +320,7 @@ static PyObject *xspy_set_permissions(Xs
   78.94      if (!PyArg_ParseTuple(args, "ssO", &thstr, &path, &perms))
   78.95          goto exit;
   78.96  
   78.97 -    th = (struct xs_transaction_handle *)strtoul(thstr, NULL, 16);
   78.98 +    th = strtoul(thstr, NULL, 16);
   78.99  
  78.100      if (!PyList_Check(perms)) {
  78.101          PyErr_SetString(PyExc_RuntimeError, "perms must be a list");
  78.102 @@ -509,7 +509,7 @@ static PyObject *xspy_unwatch(XsHandle *
  78.103  static PyObject *xspy_transaction_start(XsHandle *self)
  78.104  {
  78.105      struct xs_handle *xh = xshandle(self);
  78.106 -    struct xs_transaction_handle *th;
  78.107 +    xs_transaction_t th;
  78.108      char thstr[MAX_STRLEN(unsigned long) + 1];
  78.109  
  78.110      if (!xh)
  78.111 @@ -519,7 +519,7 @@ static PyObject *xspy_transaction_start(
  78.112      th = xs_transaction_start(xh);
  78.113      Py_END_ALLOW_THREADS
  78.114  
  78.115 -    if (th == NULL) {
  78.116 +    if (th == XBT_NULL) {
  78.117          PyErr_SetFromErrno(PyExc_RuntimeError);
  78.118          return NULL;
  78.119      }
  78.120 @@ -547,7 +547,7 @@ static PyObject *xspy_transaction_end(Xs
  78.121      struct xs_handle *xh = xshandle(self);
  78.122      bool result;
  78.123  
  78.124 -    struct xs_transaction_handle *th;
  78.125 +    xs_transaction_t th;
  78.126      char *thstr;
  78.127  
  78.128      if (!xh)
  78.129 @@ -556,7 +556,7 @@ static PyObject *xspy_transaction_end(Xs
  78.130                                       &thstr, &abort))
  78.131          return NULL;
  78.132  
  78.133 -    th = (struct xs_transaction_handle *)strtoul(thstr, NULL, 16);
  78.134 +    th = strtoul(thstr, NULL, 16);
  78.135  
  78.136      Py_BEGIN_ALLOW_THREADS
  78.137      result = xs_transaction_end(xh, th, abort);
  78.138 @@ -727,7 +727,7 @@ static void remove_watch(XsHandle *self,
  78.139   */
  78.140  static int parse_transaction_path(XsHandle *self, PyObject *args,
  78.141                                    struct xs_handle **xh,
  78.142 -                                  struct xs_transaction_handle **th,
  78.143 +                                  xs_transaction_t *th,
  78.144                                    char **path)
  78.145  {
  78.146      char *thstr;
  78.147 @@ -740,7 +740,7 @@ static int parse_transaction_path(XsHand
  78.148      if (!PyArg_ParseTuple(args, "ss", &thstr, path))
  78.149          return 0;
  78.150  
  78.151 -    *th = (struct xs_transaction_handle *)strtoul(thstr, NULL, 16);
  78.152 +    *th = strtoul(thstr, NULL, 16);
  78.153  
  78.154      return 1;
  78.155  }
    79.1 --- a/tools/python/xen/xend/XendDomain.py	Fri Jan 13 10:38:44 2006 -0600
    79.2 +++ b/tools/python/xen/xend/XendDomain.py	Fri Jan 13 14:12:24 2006 -0600
    79.3 @@ -443,7 +443,7 @@ class XendDomain:
    79.4          cpumap = map(lambda x: int(x),
    79.5                       cpumap.replace("[", "").replace("]", "").split(","))
    79.6          try:
    79.7 -            return xc.domain_pincpu(dominfo.getDomid(), vcpu, cpumap)
    79.8 +            return xc.vcpu_setaffinity(dominfo.getDomid(), vcpu, cpumap)
    79.9          except Exception, ex:
   79.10              raise XendError(str(ex))
   79.11  
    80.1 --- a/tools/python/xen/xend/XendDomainInfo.py	Fri Jan 13 10:38:44 2006 -0600
    80.2 +++ b/tools/python/xen/xend/XendDomainInfo.py	Fri Jan 13 14:12:24 2006 -0600
    80.3 @@ -1179,7 +1179,7 @@ class XendDomainInfo:
    80.4                  for v in range(0, self.info['max_vcpu_id']+1):
    80.5                      # pincpu takes a list of ints
    80.6                      cpu = [ int( cpus[v % len(cpus)] ) ]
    80.7 -                    xc.domain_pincpu(self.domid, v, cpu)
    80.8 +                    xc.vcpu_setaffinity(self.domid, v, cpu)
    80.9  
   80.10              m = self.image.getDomainMemory(self.info['memory'] * 1024)
   80.11              balloon.free(m)
    81.1 --- a/tools/python/xen/xend/image.py	Fri Jan 13 10:38:44 2006 -0600
    81.2 +++ b/tools/python/xen/xend/image.py	Fri Jan 13 14:12:24 2006 -0600
    81.3 @@ -209,13 +209,9 @@ class VmxImageHandler(ImageHandler):
    81.4  
    81.5          self.dmargs += self.configVNC(imageConfig)
    81.6  
    81.7 -        self.lapic = 0
    81.8 -        lapic = sxp.child_value(imageConfig, 'lapic')
    81.9 -        if not lapic is None:
   81.10 -            self.lapic = int(lapic)
   81.11 +        self.acpi = int(sxp.child_value(imageConfig, 'acpi', 0))
   81.12 +        self.apic = int(sxp.child_value(imageConfig, 'apic', 0))
   81.13  
   81.14 -        self.acpi = int(sxp.child_value(imageConfig, 'acpi', 0))
   81.15 -        
   81.16      def buildDomain(self):
   81.17          # Create an event channel
   81.18          self.device_channel = xc.evtchn_alloc_unbound(dom=self.vm.getDomid(),
   81.19 @@ -229,18 +225,18 @@ class VmxImageHandler(ImageHandler):
   81.20          log.debug("control_evtchn = %d", self.device_channel)
   81.21          log.debug("store_evtchn   = %d", store_evtchn)
   81.22          log.debug("memsize        = %d", self.vm.getMemoryTarget() / 1024)
   81.23 -        log.debug("lapic          = %d", self.lapic)
   81.24          log.debug("vcpus          = %d", self.vm.getVCpuCount())
   81.25          log.debug("acpi           = %d", self.acpi)
   81.26 +        log.debug("apic           = %d", self.apic)
   81.27  
   81.28          return xc.vmx_build(dom            = self.vm.getDomid(),
   81.29                              image          = self.kernel,
   81.30                              control_evtchn = self.device_channel,
   81.31                              store_evtchn   = store_evtchn,
   81.32                              memsize        = self.vm.getMemoryTarget() / 1024,
   81.33 -                            lapic          = self.lapic,
   81.34 +                            vcpus          = self.vm.getVCpuCount(),
   81.35                              acpi           = self.acpi,
   81.36 -                            vcpus          = self.vm.getVCpuCount())
   81.37 +                            apic           = self.apic)
   81.38  
   81.39      # Return a list of cmd line args to the device models based on the
   81.40      # xm config file
    82.1 --- a/tools/python/xen/xend/server/DevController.py	Fri Jan 13 10:38:44 2006 -0600
    82.2 +++ b/tools/python/xen/xend/server/DevController.py	Fri Jan 13 14:12:24 2006 -0600
    82.3 @@ -32,11 +32,10 @@ HOTPLUG_STATUS_ERROR = "error"
    82.4  HOTPLUG_STATUS_BUSY  = "busy"
    82.5  
    82.6  Connected = 1
    82.7 -Died      = 2
    82.8 -Error     = 3
    82.9 -Missing   = 4
   82.10 -Timeout   = 5
   82.11 -Busy      = 6
   82.12 +Error     = 2
   82.13 +Missing   = 3
   82.14 +Timeout   = 4
   82.15 +Busy      = 5
   82.16  
   82.17  xenbusState = {
   82.18      'Unknown'      : 0,
   82.19 @@ -157,11 +156,6 @@ class DevController:
   82.20              raise VmError("Device %s (%s) could not be connected. "
   82.21                            "Device not found." % (devid, self.deviceClass))
   82.22  
   82.23 -        elif status == Died:
   82.24 -            self.destroyDevice(devid)
   82.25 -            raise VmError("Device %s (%s) could not be connected. "
   82.26 -                          "Device has died." % (devid, self.deviceClass))
   82.27 -
   82.28          elif status == Busy:
   82.29              err = None
   82.30              frontpath = self.frontendPath(devid)
   82.31 @@ -408,20 +402,17 @@ class DevController:
   82.32  def hotplugStatusCallback(statusPath, ev, result):
   82.33      log.debug("hotplugStatusCallback %s.", statusPath)
   82.34  
   82.35 -    try:
   82.36 -        status = xstransact.Read(statusPath)
   82.37 +    status = xstransact.Read(statusPath)
   82.38  
   82.39 -        if status is not None:
   82.40 -            if status == HOTPLUG_STATUS_ERROR:
   82.41 -                result['status'] = Error
   82.42 -            elif status == HOTPLUG_STATUS_BUSY:
   82.43 -                result['status'] = Busy
   82.44 -            else:
   82.45 -                result['status'] = Connected
   82.46 +    if status is not None:
   82.47 +        if status == HOTPLUG_STATUS_ERROR:
   82.48 +            result['status'] = Error
   82.49 +        elif status == HOTPLUG_STATUS_BUSY:
   82.50 +            result['status'] = Busy
   82.51          else:
   82.52 -            return 1
   82.53 -    except VmError:
   82.54 -        result['status'] = Died
   82.55 +            result['status'] = Connected
   82.56 +    else:
   82.57 +        return 1
   82.58  
   82.59      log.debug("hotplugStatusCallback %d.", result['status'])
   82.60  
    83.1 --- a/tools/python/xen/xm/create.py	Fri Jan 13 10:38:44 2006 -0600
    83.2 +++ b/tools/python/xen/xm/create.py	Fri Jan 13 14:12:24 2006 -0600
    83.3 @@ -160,14 +160,14 @@ gopts.var('cpus', val='CPUS',
    83.4            fn=set_int, default=None,
    83.5            use="CPUS to run the domain on.")
    83.6  
    83.7 -gopts.var('lapic', val='LAPIC',
    83.8 -          fn=set_int, default=0,
    83.9 -          use="Disable or enable local APIC of VMX domain.")
   83.10 -
   83.11  gopts.var('acpi', val='ACPI',
   83.12            fn=set_int, default=0,
   83.13            use="Disable or enable ACPI of VMX domain.")
   83.14  
   83.15 +gopts.var('apic', val='APIC',
   83.16 +          fn=set_int, default=0,
   83.17 +          use="Disable or enable APIC of VMX domain.")
   83.18 +
   83.19  gopts.var('vcpus', val='VCPUS',
   83.20            fn=set_int, default=1,
   83.21            use="# of Virtual CPUS in domain.")
   83.22 @@ -534,8 +534,8 @@ def configure_vmx(config_image, vals):
   83.23      """
   83.24      args = [ 'device_model', 'vcpus', 'cdrom', 'boot', 'fda', 'fdb',
   83.25               'localtime', 'serial', 'stdvga', 'isa', 'nographic', 'audio',
   83.26 -             'vnc', 'vncviewer', 'sdl', 'display', 'ne2000', 'lapic',
   83.27 -             'xauthority', 'acpi' ]
   83.28 +             'vnc', 'vncviewer', 'sdl', 'display', 'ne2000', 'acpi', 'apic',
   83.29 +             'xauthority' ]
   83.30      for a in args:
   83.31          if (vals.__dict__[a]):
   83.32              config_image.append([a, vals.__dict__[a]])
    84.1 --- a/tools/python/xen/xm/main.py	Fri Jan 13 10:38:44 2006 -0600
    84.2 +++ b/tools/python/xen/xm/main.py	Fri Jan 13 14:12:24 2006 -0600
    84.3 @@ -390,7 +390,6 @@ def xm_brief_list(doms):
    84.4  
    84.5  
    84.6  def xm_vcpu_list(args):
    84.7 -    print 'Name                              ID  VCPU  CPU  State  Time(s)  CPU Affinity'
    84.8  
    84.9      from xen.xend.XendClient import server
   84.10      if args:
   84.11 @@ -401,6 +400,8 @@ def xm_vcpu_list(args):
   84.12              lambda x: server.xend_domain_vcpuinfo(sxp.child_value(x, 'name')),
   84.13              doms)
   84.14  
   84.15 +    print 'Name                              ID  VCPU  CPU  State  Time(s)  CPU Affinity'
   84.16 +
   84.17      for dom in dominfo:
   84.18          def get_info(n):
   84.19              return sxp.child_value(dom, n)
   84.20 @@ -625,6 +626,8 @@ def xm_sched_sedf(args):
   84.21      server.xend_domain_cpu_sedf_set(dom, *v)
   84.22  
   84.23  def xm_info(args):
   84.24 +    arg_check(args, "info", 0)
   84.25 +
   84.26      from xen.xend.XendClient import server
   84.27      info = server.xend_node()
   84.28      
   84.29 @@ -645,9 +648,12 @@ def xm_console(args):
   84.30  
   84.31  
   84.32  def xm_top(args):
   84.33 +    arg_check(args, "top", 0)
   84.34 +
   84.35      os.execvp('xentop', ['xentop'])
   84.36  
   84.37  def xm_dmesg(args):
   84.38 +    arg_check(args, "dmesg", 0)
   84.39      
   84.40      gopts = Opts(use="""[-c|--clear]
   84.41  
    85.1 --- a/tools/tests/test_x86_emulator.c	Fri Jan 13 10:38:44 2006 -0600
    85.2 +++ b/tools/tests/test_x86_emulator.c	Fri Jan 13 14:12:24 2006 -0600
    85.3 @@ -92,7 +92,7 @@ int main(int argc, char **argv)
    85.4      regs.ecx    = 0x12345678;
    85.5      cr2         = (unsigned long)&res;
    85.6      res         = 0x7FFFFFFF;
    85.7 -    rc = x86_emulate_memop(&regs, cr2, &emulops, 4);
    85.8 +    rc = x86_emulate_memop(&regs, cr2, &emulops, X86EMUL_MODE_PROT32);
    85.9      if ( (rc != 0) || 
   85.10           (res != 0x92345677) || 
   85.11           (regs.eflags != 0xa94) ||
   85.12 @@ -110,7 +110,7 @@ int main(int argc, char **argv)
   85.13      regs.ecx    = 0x12345678UL;
   85.14  #endif
   85.15      cr2         = (unsigned long)&res;
   85.16 -    rc = x86_emulate_memop(&regs, cr2, &emulops, 4);
   85.17 +    rc = x86_emulate_memop(&regs, cr2, &emulops, X86EMUL_MODE_PROT32);
   85.18      if ( (rc != 0) || 
   85.19           (res != 0x92345677) || 
   85.20           (regs.ecx != 0x8000000FUL) ||
   85.21 @@ -125,7 +125,7 @@ int main(int argc, char **argv)
   85.22      regs.eax    = 0x92345677UL;
   85.23      regs.ecx    = 0xAA;
   85.24      cr2         = (unsigned long)&res;
   85.25 -    rc = x86_emulate_memop(&regs, cr2, &emulops, 4);    
   85.26 +    rc = x86_emulate_memop(&regs, cr2, &emulops, X86EMUL_MODE_PROT32);    
   85.27      if ( (rc != 0) || 
   85.28           (res != 0x923456AA) || 
   85.29           (regs.eflags != 0x244) ||
   85.30 @@ -141,7 +141,7 @@ int main(int argc, char **argv)
   85.31      regs.eax    = 0xAABBCC77UL;
   85.32      regs.ecx    = 0xFF;
   85.33      cr2         = (unsigned long)&res;
   85.34 -    rc = x86_emulate_memop(&regs, cr2, &emulops, 4);    
   85.35 +    rc = x86_emulate_memop(&regs, cr2, &emulops, X86EMUL_MODE_PROT32);    
   85.36      if ( (rc != 0) || 
   85.37           (res != 0x923456AA) || 
   85.38           ((regs.eflags&0x240) != 0x200) ||
   85.39 @@ -157,7 +157,7 @@ int main(int argc, char **argv)
   85.40      regs.eip    = (unsigned long)&instr[0];
   85.41      regs.ecx    = 0x12345678;
   85.42      cr2         = (unsigned long)&res;
   85.43 -    rc = x86_emulate_memop(&regs, cr2, &emulops, 4);    
   85.44 +    rc = x86_emulate_memop(&regs, cr2, &emulops, X86EMUL_MODE_PROT32);    
   85.45      if ( (rc != 0) || 
   85.46           (res != 0x12345678) || 
   85.47           (regs.eflags != 0x200) ||
   85.48 @@ -174,7 +174,7 @@ int main(int argc, char **argv)
   85.49      regs.eax    = 0x923456AAUL;
   85.50      regs.ecx    = 0xDDEEFF00L;
   85.51      cr2         = (unsigned long)&res;
   85.52 -    rc = x86_emulate_memop(&regs, cr2, &emulops, 4);    
   85.53 +    rc = x86_emulate_memop(&regs, cr2, &emulops, X86EMUL_MODE_PROT32);    
   85.54      if ( (rc != 0) || 
   85.55           (res != 0xDDEEFF00) || 
   85.56           (regs.eflags != 0x244) ||
   85.57 @@ -193,7 +193,7 @@ int main(int argc, char **argv)
   85.58      regs.edi    = (unsigned long)&res + 2;
   85.59      regs.error_code = 0; /* read fault */
   85.60      cr2         = regs.esi;
   85.61 -    rc = x86_emulate_memop(&regs, cr2, &emulops, 4);    
   85.62 +    rc = x86_emulate_memop(&regs, cr2, &emulops, X86EMUL_MODE_PROT32);    
   85.63      if ( (rc != 0) || 
   85.64           (res != 0x44554455) ||
   85.65           (regs.eflags != 0x200) ||
   85.66 @@ -211,7 +211,7 @@ int main(int argc, char **argv)
   85.67      regs.eip    = (unsigned long)&instr[0];
   85.68      regs.edi    = (unsigned long)&res;
   85.69      cr2         = regs.edi;
   85.70 -    rc = x86_emulate_memop(&regs, cr2, &emulops, 4);    
   85.71 +    rc = x86_emulate_memop(&regs, cr2, &emulops, X86EMUL_MODE_PROT32);    
   85.72      if ( (rc != 0) || 
   85.73           (res != 0x2233445D) ||
   85.74           ((regs.eflags&0x201) != 0x201) ||
   85.75 @@ -229,7 +229,7 @@ int main(int argc, char **argv)
   85.76      regs.eip    = (unsigned long)&instr[0];
   85.77      regs.edi    = (unsigned long)cmpxchg8b_res;
   85.78      cr2         = regs.edi;
   85.79 -    rc = x86_emulate_memop(&regs, cr2, &emulops, 4);
   85.80 +    rc = x86_emulate_memop(&regs, cr2, &emulops, X86EMUL_MODE_PROT32);
   85.81      if ( (rc != 0) || 
   85.82           (cmpxchg8b_res[0] != 0x9999AAAA) ||
   85.83           (cmpxchg8b_res[1] != 0xCCCCFFFF) ||
   85.84 @@ -243,7 +243,7 @@ int main(int argc, char **argv)
   85.85      regs.eip    = (unsigned long)&instr[0];
   85.86      regs.edi    = (unsigned long)cmpxchg8b_res;
   85.87      cr2         = regs.edi;
   85.88 -    rc = x86_emulate_memop(&regs, cr2, &emulops, 4);
   85.89 +    rc = x86_emulate_memop(&regs, cr2, &emulops, X86EMUL_MODE_PROT32);
   85.90      if ( (rc != 0) || 
   85.91           (cmpxchg8b_res[0] != 0x9999AAAA) ||
   85.92           (cmpxchg8b_res[1] != 0xCCCCFFFF) ||
   85.93 @@ -260,7 +260,7 @@ int main(int argc, char **argv)
   85.94      regs.ecx    = 0x12345678;
   85.95      cr2         = (unsigned long)&res;
   85.96      res         = 0x82;
   85.97 -    rc = x86_emulate_memop(&regs, cr2, &emulops, 4);
   85.98 +    rc = x86_emulate_memop(&regs, cr2, &emulops, X86EMUL_MODE_PROT32);
   85.99      if ( (rc != 0) ||
  85.100           (res != 0x82) ||
  85.101           (regs.ecx != 0xFFFFFF82) ||
  85.102 @@ -275,7 +275,7 @@ int main(int argc, char **argv)
  85.103      regs.ecx    = 0x12345678;
  85.104      cr2         = (unsigned long)&res;
  85.105      res         = 0x1234aa82;
  85.106 -    rc = x86_emulate_memop(&regs, cr2, &emulops, 4);
  85.107 +    rc = x86_emulate_memop(&regs, cr2, &emulops, X86EMUL_MODE_PROT32);
  85.108      if ( (rc != 0) ||
  85.109           (res != 0x1234aa82) ||
  85.110           (regs.ecx != 0xaa82) ||
    86.1 --- a/tools/vtpm/Makefile	Fri Jan 13 10:38:44 2006 -0600
    86.2 +++ b/tools/vtpm/Makefile	Fri Jan 13 14:12:24 2006 -0600
    86.3 @@ -11,6 +11,8 @@ VTPM_DIR = vtpm
    86.4  # Emulator tarball name
    86.5  TPM_EMULATOR_TARFILE = tpm_emulator-0.2b.tar.gz
    86.6  
    86.7 +GMP_HEADER = /usr/include/gmp.h
    86.8 +
    86.9  all: build
   86.10  
   86.11  build: $(TPM_EMULATOR_DIR) $(VTPM_DIR) build_sub
   86.12 @@ -55,5 +57,12 @@ mrproper:
   86.13  	patch -p1 <../vtpm.patch
   86.14  
   86.15  build_sub:
   86.16 -	$(MAKE) -C $(TPM_EMULATOR_DIR)
   86.17 -	$(MAKE) -C $(VTPM_DIR)
   86.18 +	if [ -e $(GMP_HEADER) ]; then \
   86.19 +		$(MAKE) -C $(VTPM_DIR); \
   86.20 +		if [ "$(BUILD_EMULATOR)" = "y" ]; then \
   86.21 +			$(MAKE) -C $(TPM_EMULATOR_DIR); \
   86.22 +		fi \
   86.23 +	else \
   86.24 +		echo "*** Unable to build VTPMs. libgmp could not be found."; \
   86.25 +	fi
   86.26 +
    87.1 --- a/tools/vtpm/Rules.mk	Fri Jan 13 10:38:44 2006 -0600
    87.2 +++ b/tools/vtpm/Rules.mk	Fri Jan 13 14:12:24 2006 -0600
    87.3 @@ -33,5 +33,7 @@ OBJS	= $(patsubst %.c,%.o,$(SRCS))
    87.4  
    87.5  -include $(DEP_FILES)
    87.6  
    87.7 +BUILD_EMULATOR = n
    87.8 +
    87.9  # Make sure these are just rules
   87.10  .PHONY : all build install clean
    88.1 --- a/tools/vtpm_manager/Makefile	Fri Jan 13 10:38:44 2006 -0600
    88.2 +++ b/tools/vtpm_manager/Makefile	Fri Jan 13 14:12:24 2006 -0600
    88.3 @@ -4,13 +4,18 @@ XEN_ROOT = ../..
    88.4  include $(XEN_ROOT)/tools/vtpm_manager/Rules.mk
    88.5  
    88.6  SUBDIRS		= crypto tcs util manager
    88.7 +OPENSSL_HEADER	= /usr/include/openssl/crypto.h
    88.8  
    88.9  all: build
   88.10  
   88.11  build:
   88.12 -	@set -e; for subdir in $(SUBDIRS); do \
   88.13 -		$(MAKE) -C $$subdir $@; \
   88.14 -	done
   88.15 +	if [ -e $(OPENSSL_HEADER) ]; then \
   88.16 +		@set -e; for subdir in $(SUBDIRS); do \
   88.17 +			$(MAKE) -C $$subdir $@; \
   88.18 +		done; \
   88.19 +	else \
   88.20 +		echo "*** Cannot build vtpm_manager: OpenSSL developement files missing."; \
   88.21 +	fi
   88.22  
   88.23  install: build
   88.24  	@set -e; for subdir in $(SUBDIRS); do \
    89.1 --- a/tools/vtpm_manager/manager/vtsp.c	Fri Jan 13 10:38:44 2006 -0600
    89.2 +++ b/tools/vtpm_manager/manager/vtsp.c	Fri Jan 13 14:12:24 2006 -0600
    89.3 @@ -144,7 +144,10 @@ TPM_RESULT VerifyAuth( /*[IN]*/ const BY
    89.4    if (memcmp (&hm, &(auth->HMAC), sizeof(TPM_DIGEST)) == 0)  // 0 indicates equality
    89.5      return (TPM_SUCCESS);
    89.6    else {
    89.7 -    VTSP_OIAP( hContext, auth);
    89.8 +    // If specified, reconnect the OIAP session.
    89.9 +    // NOTE: This only works for TCS's that never have a 0 context. 
   89.10 +    if (hContext) 
   89.11 +      VTSP_OIAP( hContext, auth);
   89.12      return (TPM_AUTHFAIL);
   89.13    }
   89.14  }
   89.15 @@ -157,6 +160,10 @@ TPM_RESULT VTSP_OIAP(const TCS_CONTEXT_H
   89.16    TPMTRYRETURN( TCSP_OIAP(hContext,
   89.17  			  &auth->AuthHandle,
   89.18  			  &auth->NonceEven) );
   89.19 +
   89.20 +  memset(&auth->HMAC, 0, sizeof(TPM_DIGEST));
   89.21 +  auth->fContinueAuthSession = FALSE;
   89.22 +
   89.23    goto egress;
   89.24    
   89.25   abort_egress:
   89.26 @@ -195,6 +202,9 @@ TPM_RESULT VTSP_OSAP(const TCS_CONTEXT_H
   89.27  		 BSG_TPM_NONCE, &nonceOddOSAP);
   89.28    
   89.29    Crypto_HMAC(sharedSecretText, sizeof(sharedSecretText), (BYTE *) usageAuth, TPM_DIGEST_SIZE, (BYTE *) sharedSecret);       
   89.30 +
   89.31 +  memset(&auth->HMAC, 0, sizeof(TPM_DIGEST));
   89.32 +  auth->fContinueAuthSession = FALSE;
   89.33      
   89.34    goto egress;
   89.35    
   89.36 @@ -288,9 +298,6 @@ TPM_RESULT VTSP_TakeOwnership(   const T
   89.37    
   89.38    struct pack_buf_t srkText;
   89.39    
   89.40 -  // GenerateAuth new nonceOdd    
   89.41 -  Crypto_GetRandom(&auth->NonceOdd, sizeof(TPM_NONCE) );
   89.42 -  
   89.43    //These values are accurate for an enc(AuthData).
   89.44    struct pack_buf_t encOwnerAuth, encSrkAuth;
   89.45    
   89.46 @@ -383,9 +390,6 @@ TPM_RESULT VTSP_DisablePubekRead( const 
   89.47    BYTE *paramText;        // Digest to make Auth.
   89.48    UINT32 paramTextSize;
   89.49      
   89.50 -  // Generate HMAC   
   89.51 -  Crypto_GetRandom(&auth->NonceOdd, sizeof(TPM_NONCE) );
   89.52 -  
   89.53    paramText = (BYTE *) malloc(sizeof(BYTE) * TCPA_MAX_BUFFER_LENGTH);
   89.54    
   89.55    paramTextSize = BSG_PackList(paramText, 1,
   89.56 @@ -504,9 +508,6 @@ TPM_RESULT VTSP_CreateWrapKey(  const TC
   89.57    newKeyText.data = flatKey;
   89.58    newKeyText.size = flatKeySize;
   89.59    
   89.60 -  // GenerateAuth new nonceOdd    
   89.61 -  Crypto_GetRandom(&auth->NonceOdd, sizeof(TPM_NONCE) );
   89.62 -  
   89.63    // Generate HMAC
   89.64    paramText = (BYTE *) malloc(sizeof(BYTE) * TCPA_MAX_BUFFER_LENGTH);
   89.65    
   89.66 @@ -587,9 +588,6 @@ TPM_RESULT VTSP_LoadKey(const TCS_CONTEX
   89.67      // Generate Extra TCS Parameters
   89.68      TPM_HANDLE phKeyHMAC;
   89.69    
   89.70 -    // Generate HMAC
   89.71 -    Crypto_GetRandom(&auth->NonceOdd, sizeof(TPM_NONCE) );
   89.72 -  
   89.73      paramText = (BYTE *) malloc(sizeof(BYTE) *  TCPA_MAX_BUFFER_LENGTH);
   89.74    
   89.75      paramTextSize = BSG_PackList(paramText, 1,
   89.76 @@ -676,9 +674,6 @@ TPM_RESULT VTSP_Unbind( const TCS_CONTEX
   89.77    BYTE *clear_data_text;
   89.78    UINT32 clear_data_size;
   89.79    
   89.80 -  // Generate HMAC   
   89.81 -  Crypto_GetRandom(&auth->NonceOdd, sizeof(TPM_NONCE) );
   89.82 -  
   89.83    struct pack_buf_t bound_data32 = {bound_data->size, bound_data->bytes};
   89.84    
   89.85    paramText = (BYTE *) malloc(sizeof(BYTE) * TCPA_MAX_BUFFER_LENGTH);
   89.86 @@ -787,6 +782,196 @@ TPM_RESULT VTSP_Bind(   CRYPTO_INFO *cry
   89.87    return TPM_SUCCESS;
   89.88  }
   89.89  
   89.90 +TPM_RESULT VTSP_Seal(const TCS_CONTEXT_HANDLE    hContext,
   89.91 +                     const TPM_KEY_HANDLE        keyHandle,
   89.92 +                     const TPM_AUTHDATA          *sealDataAuth,
   89.93 +                     const TPM_PCR_COMPOSITE     *pcrComp,
   89.94 +                     const buffer_t              *inData,
   89.95 +                     TPM_STORED_DATA             *sealedData,                                   
   89.96 +                     const TPM_SECRET            *osapSharedSecret,
   89.97 +                     TCS_AUTH                    *auth) {
   89.98 +
   89.99 +  TPM_RESULT status = TPM_SUCCESS;
  89.100 +  TPM_COMMAND_CODE command = TPM_ORD_Seal;
  89.101 +
  89.102 +  BYTE *paramText;        // Digest to make Auth.
  89.103 +  UINT32 paramTextSize;
  89.104 +
  89.105 +  // Generate PCR_Info Struct from Comp
  89.106 +  TPM_PCR_INFO pcrInfo;
  89.107 +  UINT32 pcrInfoSize, flatpcrSize;
  89.108 +  BYTE flatpcr[3 +                          // PCR_Select = 3 1 byte banks
  89.109 +               sizeof(UINT16) +             //              2 byte UINT16
  89.110 +               sizeof(UINT32) +             // PCR_Comp   = 4 byte UINT32
  89.111 +               24 * sizeof(TPM_PCRVALUE) ]; //              up to 24 PCRs
  89.112 +
  89.113 +  if (pcrComp != NULL) {
  89.114 +      //printf("\n\tBinding to PCRs: ");
  89.115 +      //for(int i = 0 ; i < pcrComp->select.sizeOfSelect ; i++)
  89.116 +      //printf("%2.2x", pcrComp->select.pcrSelect[i]);
  89.117 +
  89.118 +      memcpy(&pcrInfo.pcrSelection, &pcrComp->select, sizeof(TPM_PCR_SELECTION));
  89.119 +
  89.120 +      flatpcrSize = BSG_Pack(BSG_TPM_PCR_COMPOSITE, (BYTE *) pcrComp, flatpcr);
  89.121 +      Crypto_SHA1Full((BYTE *) flatpcr, flatpcrSize, (BYTE *) &(pcrInfo.digestAtRelease));
  89.122 +      memset(&(pcrInfo.digestAtCreation), 0, sizeof(TPM_DIGEST));
  89.123 +      pcrInfoSize = BSG_Pack(BSG_TPM_PCR_INFO, (BYTE *) &pcrInfo, flatpcr);
  89.124 +  } else {
  89.125 +      //printf("\n\tBinding to no PCRS.");
  89.126 +      pcrInfoSize = 0;
  89.127 +  }
  89.128 +
  89.129 +  // Calculate encUsageAuth
  89.130 +  BYTE XORbuffer[sizeof(TPM_SECRET) + sizeof(TPM_NONCE)];
  89.131 +  UINT32 XORbufferSize = sizeof(XORbuffer);
  89.132 +  TPM_DIGEST XORKey;
  89.133 +  TPM_ENCAUTH encAuth;
  89.134 +
  89.135 +  BSG_PackList( XORbuffer, 2,
  89.136 +                BSG_TPM_SECRET, osapSharedSecret,
  89.137 +                BSG_TPM_NONCE, &auth->NonceEven );
  89.138 +
  89.139 +  Crypto_SHA1Full(XORbuffer, XORbufferSize, (BYTE *) &XORKey);
  89.140 +
  89.141 +  int i;
  89.142 +  for (i=0; i < TPM_DIGEST_SIZE; i++)
  89.143 +    ((BYTE *) &encAuth)[i] = ((BYTE *) &XORKey)[i] ^ ((BYTE *) sealDataAuth)[i];
  89.144 +
  89.145 +  // Generate Extra TCS Parameters
  89.146 +  UINT32 inDataSize = buffer_len(inData);
  89.147 +  struct pack_buf_t inData_pack = {inDataSize, inData->bytes};
  89.148 +  struct pack_buf_t pcrInfo_pack = {pcrInfoSize, flatpcr};
  89.149 +
  89.150 +  UINT32 sealedDataSize;
  89.151 +  BYTE *flatSealedData=NULL;
  89.152 +
  89.153 +  paramText = (BYTE *) malloc(sizeof(BYTE) *  TCPA_MAX_BUFFER_LENGTH);
  89.154 +
  89.155 +  paramTextSize = BSG_PackList(paramText, 4,
  89.156 +                               BSG_TPM_COMMAND_CODE, &command,
  89.157 +                               BSG_TPM_ENCAUTH, &encAuth,
  89.158 +                               BSG_TPM_SIZE32_DATA, &pcrInfo_pack,
  89.159 +                               BSG_TPM_SIZE32_DATA, &inData_pack);
  89.160 +
  89.161 +  TPMTRYRETURN( GenerateAuth( paramText, paramTextSize,
  89.162 +                              osapSharedSecret, auth) );
  89.163 +
  89.164 +  // Call TCS
  89.165 +  TPMTRYRETURN( TCSP_Seal( hContext,
  89.166 +                           keyHandle,
  89.167 +                           encAuth,
  89.168 +                           pcrInfoSize,
  89.169 +                           flatpcr,
  89.170 +                           inDataSize,
  89.171 +                           inData->bytes,
  89.172 +                           auth,
  89.173 +                           &sealedDataSize,
  89.174 +                           &flatSealedData) );
  89.175 +
  89.176 +  // Unpack/return key structure
  89.177 +  BSG_Unpack( BSG_TPM_STORED_DATA, flatSealedData, sealedData );
  89.178 +
  89.179 +  paramTextSize = BSG_PackList(paramText, 3,
  89.180 +                               BSG_TPM_RESULT, &status,
  89.181 +                               BSG_TPM_COMMAND_CODE, &command,
  89.182 +                               BSG_TPM_STORED_DATA, sealedData);
  89.183 +
  89.184 +  TPMTRYRETURN( VerifyAuth( paramText, paramTextSize,
  89.185 +                            osapSharedSecret, auth,
  89.186 +                            0) );
  89.187 +
  89.188 +
  89.189 +  goto egress;
  89.190 +
  89.191 + abort_egress:
  89.192 + egress:
  89.193 +
  89.194 +  if (flatSealedData)
  89.195 +    TCS_FreeMemory( hContext, flatSealedData);
  89.196 +
  89.197 +  free(paramText);
  89.198 +  return status;
  89.199 +}
  89.200 +
  89.201 +
  89.202 +TPM_RESULT VTSP_Unseal(const TCS_CONTEXT_HANDLE    hContext,
  89.203 +                       const TPM_KEY_HANDLE        keyHandle,
  89.204 +                       const TPM_STORED_DATA       *sealedData,
  89.205 +                       const TPM_AUTHDATA          *key_usage_auth,
  89.206 +                       const TPM_AUTHDATA          *data_usage_auth,
  89.207 +                       buffer_t                    *outData,
  89.208 +                       TCS_AUTH                    *auth,
  89.209 +                       TCS_AUTH                    *dataAuth) {
  89.210 +
  89.211 +  TPM_RESULT status = TPM_SUCCESS;
  89.212 +  TPM_COMMAND_CODE command = TPM_ORD_Unseal;
  89.213 +
  89.214 +  BYTE *paramText;        // Digest to make Auth.
  89.215 +  UINT32 paramTextSize;
  89.216 +
  89.217 +  // Generate Extra TCS Parameters
  89.218 +  UINT32 sealDataSize, clearDataSize;
  89.219 +  BYTE *flatSealedData= (BYTE *) malloc(sizeof(TPM_VERSION) +
  89.220 +                                        2 * sizeof(UINT32) +
  89.221 +                                        sealedData->sealInfoSize +
  89.222 +                                        sealedData->encDataSize),
  89.223 +       *clearData=NULL;
  89.224 +
  89.225 +  sealDataSize = BSG_Pack(BSG_TPM_STORED_DATA, sealedData, flatSealedData );
  89.226 +
  89.227 +  paramText = (BYTE *) malloc(sizeof(BYTE) *  TCPA_MAX_BUFFER_LENGTH);
  89.228 +
  89.229 +  paramTextSize = BSG_PackList(paramText, 2,
  89.230 +                               BSG_TPM_COMMAND_CODE, &command,
  89.231 +                               BSG_TPM_STORED_DATA, sealedData);
  89.232 +
  89.233 +  TPMTRYRETURN( GenerateAuth( paramText, paramTextSize,
  89.234 +                              key_usage_auth, auth) );
  89.235 +
  89.236 +  TPMTRYRETURN( GenerateAuth( paramText, paramTextSize,
  89.237 +                              data_usage_auth, dataAuth) );
  89.238 +  // Call TCS
  89.239 +  TPMTRYRETURN( TCSP_Unseal(  hContext,
  89.240 +                              keyHandle,
  89.241 +                              sealDataSize,
  89.242 +                              flatSealedData,
  89.243 +                              auth,
  89.244 +                              dataAuth,
  89.245 +                              &clearDataSize,
  89.246 +                              &clearData) );
  89.247 +
  89.248 +  // Verify Auth
  89.249 +  struct pack_buf_t clearData_pack = {clearDataSize, clearData};
  89.250 +
  89.251 +  paramTextSize = BSG_PackList(paramText, 3,
  89.252 +                               BSG_TPM_RESULT, &status,
  89.253 +                               BSG_TPM_COMMAND_CODE, &command,
  89.254 +                               BSG_TPM_SIZE32_DATA, &clearData_pack);
  89.255 +
  89.256 +  TPMTRYRETURN( VerifyAuth( paramText, paramTextSize,
  89.257 +                            key_usage_auth, auth,
  89.258 +                            hContext) );
  89.259 +
  89.260 +  TPMTRYRETURN( VerifyAuth( paramText, paramTextSize,
  89.261 +                            data_usage_auth, dataAuth,
  89.262 +                            hContext) );
  89.263 +
  89.264 +  // Unpack/return key structure
  89.265 +  TPMTRYRETURN( buffer_init(outData, clearDataSize, clearData) );
  89.266 +
  89.267 +  goto egress;
  89.268 +
  89.269 + abort_egress:
  89.270 + egress:
  89.271 +
  89.272 +  if (flatSealedData)
  89.273 +    TCS_FreeMemory( hContext, clearData);
  89.274 +
  89.275 +  free(paramText);
  89.276 +  return status;
  89.277 +}
  89.278 +
  89.279 +
  89.280  // Function Reaches into unsupported TCS command, beware.
  89.281  TPM_RESULT VTSP_RawTransmit(const TCS_CONTEXT_HANDLE    hContext,
  89.282                              const buffer_t *inbuf,
    90.1 --- a/tools/vtpm_manager/manager/vtsp.h	Fri Jan 13 10:38:44 2006 -0600
    90.2 +++ b/tools/vtpm_manager/manager/vtsp.h	Fri Jan 13 14:12:24 2006 -0600
    90.3 @@ -100,4 +100,22 @@ TPM_RESULT VTSP_Bind(   CRYPTO_INFO *cry
    90.4              const buffer_t *inData, 
    90.5              buffer_t *outData);
    90.6                          
    90.7 +TPM_RESULT VTSP_Seal(const TCS_CONTEXT_HANDLE    hContext,
    90.8 +                     const TPM_KEY_HANDLE        keyHandle,
    90.9 +                     const TPM_AUTHDATA          *sealDataAuth,
   90.10 +                     const TPM_PCR_COMPOSITE     *pcrComp,
   90.11 +                     const buffer_t              *inData,
   90.12 +                     TPM_STORED_DATA             *sealedData,                                   
   90.13 +                     const TPM_SECRET            *osapSharedSecret,
   90.14 +                     TCS_AUTH                    *auth);
   90.15 +
   90.16 +TPM_RESULT VTSP_Unseal(const TCS_CONTEXT_HANDLE    hContext,
   90.17 +                       const TPM_KEY_HANDLE        keyHandle,
   90.18 +                       const TPM_STORED_DATA       *sealedData,
   90.19 +                       const TPM_AUTHDATA          *key_usage_auth,
   90.20 +                       const TPM_AUTHDATA          *data_usage_auth,
   90.21 +                       buffer_t                    *outData,
   90.22 +                       TCS_AUTH                    *auth,
   90.23 +                       TCS_AUTH                    *dataAuth);
   90.24 +
   90.25  #endif //_VTSP_H_
    91.1 --- a/tools/vtpm_manager/tcs/tcs.c	Fri Jan 13 10:38:44 2006 -0600
    91.2 +++ b/tools/vtpm_manager/tcs/tcs.c	Fri Jan 13 14:12:24 2006 -0600
    91.3 @@ -636,7 +636,7 @@ TPM_RESULT TCSP_Seal(   TCS_CONTEXT_HAND
    91.4    TDDL_UINT32  OutLength = TCPA_MAX_BUFFER_LENGTH;
    91.5    
    91.6    // check input params
    91.7 -  if (inData == NULL || pubAuth == NULL || SealedDataSize == NULL || *SealedData == NULL)
    91.8 +  if (inData == NULL || pubAuth == NULL || SealedDataSize == NULL || SealedData == NULL)
    91.9      return TPM_BAD_PARAMETER;
   91.10    
   91.11    // Convert Byte Input parameter in the input byte stream InBuf
    92.1 --- a/tools/xcutils/xc_save.c	Fri Jan 13 10:38:44 2006 -0600
    92.2 +++ b/tools/xcutils/xc_save.c	Fri Jan 13 14:12:24 2006 -0600
    92.3 @@ -10,10 +10,28 @@
    92.4  #include <err.h>
    92.5  #include <stdlib.h>
    92.6  #include <stdint.h>
    92.7 +#include <string.h>
    92.8  #include <stdio.h>
    92.9  
   92.10  #include <xenguest.h>
   92.11  
   92.12 +
   92.13 +/**
   92.14 + * Issue a suspend request through stdout, and receive the acknowledgement
   92.15 + * from stdin.  This is handled by XendCheckpoint in the Python layer.
   92.16 + */
   92.17 +static int suspend(int domid)
   92.18 +{
   92.19 +    char ans[30];
   92.20 +
   92.21 +    printf("suspend\n");
   92.22 +    fflush(stdout);
   92.23 +
   92.24 +    return (fgets(ans, sizeof(ans), stdin) != NULL &&
   92.25 +            !strncmp(ans, "done\n", 5));
   92.26 +}
   92.27 +
   92.28 +
   92.29  int
   92.30  main(int argc, char **argv)
   92.31  {
   92.32 @@ -29,5 +47,5 @@ main(int argc, char **argv)
   92.33      max_f = atoi(argv[5]);
   92.34      flags = atoi(argv[6]);
   92.35  
   92.36 -    return xc_linux_save(xc_fd, io_fd, domid, maxit, max_f, flags);
   92.37 +    return xc_linux_save(xc_fd, io_fd, domid, maxit, max_f, flags, &suspend);
   92.38  }
    93.1 --- a/tools/xenmon/Makefile	Fri Jan 13 10:38:44 2006 -0600
    93.2 +++ b/tools/xenmon/Makefile	Fri Jan 13 14:12:24 2006 -0600
    93.3 @@ -13,12 +13,9 @@
    93.4  INSTALL         = install
    93.5  INSTALL_PROG    = $(INSTALL) -m0755
    93.6  INSTALL_DIR     = $(INSTALL) -d -m0755
    93.7 -INSTALL_DATA    = $(INSTALL) -m064
    93.8 +INSTALL_DATA    = $(INSTALL) -m0644
    93.9  
   93.10 -prefix=/usr/local
   93.11 -mandir=$(prefix)/share/man
   93.12 -man1dir=$(mandir)/man1
   93.13 -sbindir=$(prefix)/sbin
   93.14 +sbindir=/usr/sbin
   93.15  
   93.16  XEN_ROOT=../..
   93.17  include $(XEN_ROOT)/tools/Rules.mk
    94.1 --- a/tools/xenstat/libxenstat/src/xenstat.c	Fri Jan 13 10:38:44 2006 -0600
    94.2 +++ b/tools/xenstat/libxenstat/src/xenstat.c	Fri Jan 13 14:12:24 2006 -0600
    94.3 @@ -705,7 +705,7 @@ static char *xenstat_get_domain_name(xen
    94.4  
    94.5  	snprintf(path, sizeof(path),"/local/domain/%i/name", domain_id);
    94.6  	
    94.7 -	name = xs_read(handle->xshandle, NULL, path, NULL);
    94.8 +	name = xs_read(handle->xshandle, XBT_NULL, path, NULL);
    94.9  	if (name == NULL)
   94.10  		name = strdup(" ");
   94.11  
    95.1 --- a/tools/xenstore/xenstore_client.c	Fri Jan 13 10:38:44 2006 -0600
    95.2 +++ b/tools/xenstore/xenstore_client.c	Fri Jan 13 14:12:24 2006 -0600
    95.3 @@ -66,7 +66,7 @@ usage(const char *progname)
    95.4  
    95.5  #if defined(CLIENT_rm)
    95.6  static int
    95.7 -do_rm(char *path, struct xs_handle *xsh, struct xs_transaction_handle *xth)
    95.8 +do_rm(char *path, struct xs_handle *xsh, xs_transaction_t xth)
    95.9  {
   95.10      if (xs_rm(xsh, xth, path)) {
   95.11          return 0;
   95.12 @@ -81,7 +81,7 @@ do_rm(char *path, struct xs_handle *xsh,
   95.13  
   95.14  static int
   95.15  perform(int optind, int argc, char **argv, struct xs_handle *xsh,
   95.16 -        struct xs_transaction_handle *xth, int prefix, int tidy)
   95.17 +        xs_transaction_t xth, int prefix, int tidy)
   95.18  {
   95.19      while (optind < argc) {
   95.20  #if defined(CLIENT_read)
   95.21 @@ -179,7 +179,7 @@ int
   95.22  main(int argc, char **argv)
   95.23  {
   95.24      struct xs_handle *xsh;
   95.25 -    struct xs_transaction_handle *xth;
   95.26 +    xs_transaction_t xth;
   95.27      int ret = 0, socket = 0;
   95.28      int prefix = 0;
   95.29      int tidy = 0;
   95.30 @@ -243,7 +243,7 @@ main(int argc, char **argv)
   95.31  
   95.32    again:
   95.33      xth = xs_transaction_start(xsh);
   95.34 -    if (xth == NULL)
   95.35 +    if (xth == XBT_NULL)
   95.36  	errx(1, "couldn't start transaction");
   95.37  
   95.38      ret = perform(optind, argc, argv, xsh, xth, prefix, tidy);
    96.1 --- a/tools/xenstore/xs.c	Fri Jan 13 10:38:44 2006 -0600
    96.2 +++ b/tools/xenstore/xs.c	Fri Jan 13 14:12:24 2006 -0600
    96.3 @@ -292,7 +292,7 @@ static void *read_reply(
    96.4  }
    96.5  
    96.6  /* Send message to xs, get malloc'ed reply.  NULL and set errno on error. */
    96.7 -static void *xs_talkv(struct xs_handle *h, struct xs_transaction_handle *t,
    96.8 +static void *xs_talkv(struct xs_handle *h, xs_transaction_t t,
    96.9  		      enum xsd_sockmsg_type type,
   96.10  		      const struct iovec *iovec,
   96.11  		      unsigned int num_vecs,
   96.12 @@ -304,7 +304,7 @@ static void *xs_talkv(struct xs_handle *
   96.13  	unsigned int i;
   96.14  	struct sigaction ignorepipe, oldact;
   96.15  
   96.16 -	msg.tx_id = (uint32_t)(unsigned long)t;
   96.17 +	msg.tx_id = t;
   96.18  	msg.req_id = 0;
   96.19  	msg.type = type;
   96.20  	msg.len = 0;
   96.21 @@ -368,7 +368,7 @@ static void free_no_errno(void *p)
   96.22  }
   96.23  
   96.24  /* Simplified version of xs_talkv: single message. */
   96.25 -static void *xs_single(struct xs_handle *h, struct xs_transaction_handle *t,
   96.26 +static void *xs_single(struct xs_handle *h, xs_transaction_t t,
   96.27  		       enum xsd_sockmsg_type type,
   96.28  		       const char *string,
   96.29  		       unsigned int *len)
   96.30 @@ -388,7 +388,7 @@ static bool xs_bool(char *reply)
   96.31  	return true;
   96.32  }
   96.33  
   96.34 -char **xs_directory(struct xs_handle *h, struct xs_transaction_handle *t,
   96.35 +char **xs_directory(struct xs_handle *h, xs_transaction_t t,
   96.36  		    const char *path, unsigned int *num)
   96.37  {
   96.38  	char *strings, *p, **ret;
   96.39 @@ -420,7 +420,7 @@ char **xs_directory(struct xs_handle *h,
   96.40   * Returns a malloced value: call free() on it after use.
   96.41   * len indicates length in bytes, not including the nul.
   96.42   */
   96.43 -void *xs_read(struct xs_handle *h, struct xs_transaction_handle *t,
   96.44 +void *xs_read(struct xs_handle *h, xs_transaction_t t,
   96.45  	      const char *path, unsigned int *len)
   96.46  {
   96.47  	return xs_single(h, t, XS_READ, path, len);
   96.48 @@ -429,7 +429,7 @@ void *xs_read(struct xs_handle *h, struc
   96.49  /* Write the value of a single file.
   96.50   * Returns false on failure.
   96.51   */
   96.52 -bool xs_write(struct xs_handle *h, struct xs_transaction_handle *t,
   96.53 +bool xs_write(struct xs_handle *h, xs_transaction_t t,
   96.54  	      const char *path, const void *data, unsigned int len)
   96.55  {
   96.56  	struct iovec iovec[2];
   96.57 @@ -446,7 +446,7 @@ bool xs_write(struct xs_handle *h, struc
   96.58  /* Create a new directory.
   96.59   * Returns false on failure, or success if it already exists.
   96.60   */
   96.61 -bool xs_mkdir(struct xs_handle *h, struct xs_transaction_handle *t,
   96.62 +bool xs_mkdir(struct xs_handle *h, xs_transaction_t t,
   96.63  	      const char *path)
   96.64  {
   96.65  	return xs_bool(xs_single(h, t, XS_MKDIR, path, NULL));
   96.66 @@ -455,7 +455,7 @@ bool xs_mkdir(struct xs_handle *h, struc
   96.67  /* Destroy a file or directory (directories must be empty).
   96.68   * Returns false on failure, or success if it doesn't exist.
   96.69   */
   96.70 -bool xs_rm(struct xs_handle *h, struct xs_transaction_handle *t,
   96.71 +bool xs_rm(struct xs_handle *h, xs_transaction_t t,
   96.72  	   const char *path)
   96.73  {
   96.74  	return xs_bool(xs_single(h, t, XS_RM, path, NULL));
   96.75 @@ -465,7 +465,7 @@ bool xs_rm(struct xs_handle *h, struct x
   96.76   * Returns malloced array, or NULL: call free() after use.
   96.77   */
   96.78  struct xs_permissions *xs_get_permissions(struct xs_handle *h,
   96.79 -					  struct xs_transaction_handle *t,
   96.80 +					  xs_transaction_t t,
   96.81  					  const char *path, unsigned int *num)
   96.82  {
   96.83  	char *strings;
   96.84 @@ -499,7 +499,7 @@ struct xs_permissions *xs_get_permission
   96.85   * Returns false on failure.
   96.86   */
   96.87  bool xs_set_permissions(struct xs_handle *h,
   96.88 -			struct xs_transaction_handle *t,
   96.89 +			xs_transaction_t t,
   96.90  			const char *path,
   96.91  			struct xs_permissions *perms,
   96.92  			unsigned int num_perms)
   96.93 @@ -560,7 +560,7 @@ bool xs_watch(struct xs_handle *h, const
   96.94  	iov[1].iov_base = (void *)token;
   96.95  	iov[1].iov_len = strlen(token) + 1;
   96.96  
   96.97 -	return xs_bool(xs_talkv(h, NULL, XS_WATCH, iov,
   96.98 +	return xs_bool(xs_talkv(h, XBT_NULL, XS_WATCH, iov,
   96.99  				ARRAY_SIZE(iov), NULL));
  96.100  }
  96.101  
  96.102 @@ -627,28 +627,28 @@ bool xs_unwatch(struct xs_handle *h, con
  96.103  	iov[1].iov_base = (char *)token;
  96.104  	iov[1].iov_len = strlen(token) + 1;
  96.105  
  96.106 -	return xs_bool(xs_talkv(h, NULL, XS_UNWATCH, iov,
  96.107 +	return xs_bool(xs_talkv(h, XBT_NULL, XS_UNWATCH, iov,
  96.108  				ARRAY_SIZE(iov), NULL));
  96.109  }
  96.110  
  96.111  /* Start a transaction: changes by others will not be seen during this
  96.112   * transaction, and changes will not be visible to others until end.
  96.113   * You can only have one transaction at any time.
  96.114 - * Returns NULL on failure.
  96.115 + * Returns XBT_NULL on failure.
  96.116   */
  96.117 -struct xs_transaction_handle *xs_transaction_start(struct xs_handle *h)
  96.118 +xs_transaction_t xs_transaction_start(struct xs_handle *h)
  96.119  {
  96.120  	char *id_str;
  96.121 -	unsigned long id;
  96.122 +	xs_transaction_t id;
  96.123  
  96.124 -	id_str = xs_single(h, NULL, XS_TRANSACTION_START, "", NULL);
  96.125 +	id_str = xs_single(h, XBT_NULL, XS_TRANSACTION_START, "", NULL);
  96.126  	if (id_str == NULL)
  96.127 -		return NULL;
  96.128 +		return XBT_NULL;
  96.129  
  96.130  	id = strtoul(id_str, NULL, 0);
  96.131  	free(id_str);
  96.132  
  96.133 -	return (struct xs_transaction_handle *)id;
  96.134 +	return id;
  96.135  }
  96.136  
  96.137  /* End a transaction.
  96.138 @@ -656,7 +656,7 @@ struct xs_transaction_handle *xs_transac
  96.139   * Returns false on failure, which indicates an error: transactions will
  96.140   * not fail spuriously.
  96.141   */
  96.142 -bool xs_transaction_end(struct xs_handle *h, struct xs_transaction_handle *t,
  96.143 +bool xs_transaction_end(struct xs_handle *h, xs_transaction_t t,
  96.144  			bool abort)
  96.145  {
  96.146  	char abortstr[2];
  96.147 @@ -693,7 +693,7 @@ bool xs_introduce_domain(struct xs_handl
  96.148  	iov[2].iov_base = eventchn_str;
  96.149  	iov[2].iov_len = strlen(eventchn_str) + 1;
  96.150  
  96.151 -	return xs_bool(xs_talkv(h, NULL, XS_INTRODUCE, iov,
  96.152 +	return xs_bool(xs_talkv(h, XBT_NULL, XS_INTRODUCE, iov,
  96.153  				ARRAY_SIZE(iov), NULL));
  96.154  }
  96.155  
  96.156 @@ -705,7 +705,7 @@ static void * single_with_domid(struct x
  96.157  
  96.158  	sprintf(domid_str, "%u", domid);
  96.159  
  96.160 -	return xs_single(h, NULL, type, domid_str, NULL);
  96.161 +	return xs_single(h, XBT_NULL, type, domid_str, NULL);
  96.162  }
  96.163  
  96.164  bool xs_release_domain(struct xs_handle *h, unsigned int domid)
  96.165 @@ -719,7 +719,7 @@ char *xs_get_domain_path(struct xs_handl
  96.166  
  96.167  	sprintf(domid_str, "%u", domid);
  96.168  
  96.169 -	return xs_single(h, NULL, XS_GET_DOMAIN_PATH, domid_str, NULL);
  96.170 +	return xs_single(h, XBT_NULL, XS_GET_DOMAIN_PATH, domid_str, NULL);
  96.171  }
  96.172  
  96.173  bool xs_is_domain_introduced(struct xs_handle *h, unsigned int domid)
  96.174 @@ -739,7 +739,7 @@ char *xs_debug_command(struct xs_handle 
  96.175  	iov[1].iov_base = data;
  96.176  	iov[1].iov_len = len;
  96.177  
  96.178 -	return xs_talkv(h, NULL, XS_DEBUG, iov,
  96.179 +	return xs_talkv(h, XBT_NULL, XS_DEBUG, iov,
  96.180  			ARRAY_SIZE(iov), NULL);
  96.181  }
  96.182  
    97.1 --- a/tools/xenstore/xs.h	Fri Jan 13 10:38:44 2006 -0600
    97.2 +++ b/tools/xenstore/xs.h	Fri Jan 13 14:12:24 2006 -0600
    97.3 @@ -22,8 +22,10 @@
    97.4  
    97.5  #include <xs_lib.h>
    97.6  
    97.7 +#define XBT_NULL 0
    97.8 +
    97.9  struct xs_handle;
   97.10 -struct xs_transaction_handle;
   97.11 +typedef uint32_t xs_transaction_t;
   97.12  
   97.13  /* On failure, these routines set errno. */
   97.14  
   97.15 @@ -45,45 +47,45 @@ void xs_daemon_close(struct xs_handle *)
   97.16   * Returns a malloced array: call free() on it after use.
   97.17   * Num indicates size.
   97.18   */
   97.19 -char **xs_directory(struct xs_handle *h, struct xs_transaction_handle *t,
   97.20 +char **xs_directory(struct xs_handle *h, xs_transaction_t t,
   97.21  		    const char *path, unsigned int *num);
   97.22  
   97.23  /* Get the value of a single file, nul terminated.
   97.24   * Returns a malloced value: call free() on it after use.
   97.25   * len indicates length in bytes, not including terminator.
   97.26   */
   97.27 -void *xs_read(struct xs_handle *h, struct xs_transaction_handle *t,
   97.28 +void *xs_read(struct xs_handle *h, xs_transaction_t t,
   97.29  	      const char *path, unsigned int *len);
   97.30  
   97.31  /* Write the value of a single file.
   97.32   * Returns false on failure.
   97.33   */
   97.34 -bool xs_write(struct xs_handle *h, struct xs_transaction_handle *t,
   97.35 +bool xs_write(struct xs_handle *h, xs_transaction_t t,
   97.36  	      const char *path, const void *data, unsigned int len);
   97.37  
   97.38  /* Create a new directory.
   97.39   * Returns false on failure, or success if it already exists.
   97.40   */
   97.41 -bool xs_mkdir(struct xs_handle *h, struct xs_transaction_handle *t,
   97.42 +bool xs_mkdir(struct xs_handle *h, xs_transaction_t t,
   97.43  	      const char *path);
   97.44  
   97.45  /* Destroy a file or directory (and children).
   97.46 - * Returns false on failure, or success if it doesn't exist.
   97.47 + * Returns false on failure, or if it doesn't exist.
   97.48   */
   97.49 -bool xs_rm(struct xs_handle *h, struct xs_transaction_handle *t,
   97.50 +bool xs_rm(struct xs_handle *h, xs_transaction_t t,
   97.51  	   const char *path);
   97.52  
   97.53  /* Get permissions of node (first element is owner, first perms is "other").
   97.54   * Returns malloced array, or NULL: call free() after use.
   97.55   */
   97.56  struct xs_permissions *xs_get_permissions(struct xs_handle *h,
   97.57 -					  struct xs_transaction_handle *t,
   97.58 +					  xs_transaction_t t,
   97.59  					  const char *path, unsigned int *num);
   97.60  
   97.61  /* Set permissions of node (must be owner).
   97.62   * Returns false on failure.
   97.63   */
   97.64 -bool xs_set_permissions(struct xs_handle *h, struct xs_transaction_handle *t,
   97.65 +bool xs_set_permissions(struct xs_handle *h, xs_transaction_t t,
   97.66  			const char *path, struct xs_permissions *perms,
   97.67  			unsigned int num_perms);
   97.68  
   97.69 @@ -113,14 +115,14 @@ bool xs_unwatch(struct xs_handle *h, con
   97.70   * You can only have one transaction at any time.
   97.71   * Returns NULL on failure.
   97.72   */
   97.73 -struct xs_transaction_handle *xs_transaction_start(struct xs_handle *h);
   97.74 +xs_transaction_t xs_transaction_start(struct xs_handle *h);
   97.75  
   97.76  /* End a transaction.
   97.77   * If abandon is true, transaction is discarded instead of committed.
   97.78   * Returns false on failure: if errno == EAGAIN, you have to restart
   97.79   * transaction.
   97.80   */
   97.81 -bool xs_transaction_end(struct xs_handle *h, struct xs_transaction_handle *t,
   97.82 +bool xs_transaction_end(struct xs_handle *h, xs_transaction_t t,
   97.83  			bool abort);
   97.84  
   97.85  /* Introduce a new domain.
    98.1 --- a/tools/xenstore/xs_test.c	Fri Jan 13 10:38:44 2006 -0600
    98.2 +++ b/tools/xenstore/xs_test.c	Fri Jan 13 14:12:24 2006 -0600
    98.3 @@ -37,12 +37,13 @@
    98.4  #include <sys/time.h>
    98.5  #include "utils.h"
    98.6  #include "xs_lib.h"
    98.7 +#include "xs.h"
    98.8  #include "list.h"
    98.9  
   98.10  #define XSTEST
   98.11  
   98.12  static struct xs_handle *handles[10] = { NULL };
   98.13 -static struct xs_transaction_handle *txh[10] = { NULL };
   98.14 +static xs_transaction_t txh[10] = { XBT_NULL };
   98.15  
   98.16  static unsigned int timeout_ms = 500;
   98.17  static bool timeout_suppressed = true;
   98.18 @@ -492,7 +493,7 @@ static void do_unwatch(unsigned int hand
   98.19  static void do_start(unsigned int handle)
   98.20  {
   98.21  	txh[handle] = xs_transaction_start(handles[handle]);
   98.22 -	if (txh[handle] == NULL)
   98.23 +	if (txh[handle] == XBT_NULL)
   98.24  		failed(handle);
   98.25  }
   98.26  
   98.27 @@ -500,7 +501,7 @@ static void do_end(unsigned int handle, 
   98.28  {
   98.29  	if (!xs_transaction_end(handles[handle], txh[handle], abort))
   98.30  		failed(handle);
   98.31 -	txh[handle] = NULL;
   98.32 +	txh[handle] = XBT_NULL;
   98.33  }
   98.34  
   98.35  static void do_introduce(unsigned int handle,
   98.36 @@ -534,7 +535,7 @@ static void do_introduce(unsigned int ha
   98.37  	*(uint16_t *)((void *)interface + 36) = atoi(eventchn);
   98.38  
   98.39  	if (!xs_introduce_domain(handles[handle], atoi(domid),
   98.40 -				 atol(mfn), atoi(eventchn), path)) {
   98.41 +				 atol(mfn), atoi(eventchn))) {
   98.42  		failed(handle);
   98.43  		munmap(interface, getpagesize());
   98.44  		return;
   98.45 @@ -718,7 +719,7 @@ static void do_command(unsigned int defa
   98.46  	else if (streq(command, "close")) {
   98.47  		xs_daemon_close(handles[handle]);
   98.48  		handles[handle] = NULL;
   98.49 -		txh[handle] = NULL;
   98.50 +		txh[handle] = XBT_NULL;
   98.51  	} else if (streq(command, "start"))
   98.52  		do_start(handle);
   98.53  	else if (streq(command, "commit"))
    99.1 --- a/tools/xenstore/xsls.c	Fri Jan 13 10:38:44 2006 -0600
    99.2 +++ b/tools/xenstore/xsls.c	Fri Jan 13 14:12:24 2006 -0600
    99.3 @@ -11,7 +11,7 @@ void print_dir(struct xs_handle *h, char
    99.4      int i;
    99.5      unsigned int num, len;
    99.6  
    99.7 -    e = xs_directory(h, NULL, path, &num);
    99.8 +    e = xs_directory(h, XBT_NULL, path, &num);
    99.9      if (e == NULL)
   99.10          err(1, "xs_directory (%s)", path);
   99.11  
   99.12 @@ -22,7 +22,7 @@ void print_dir(struct xs_handle *h, char
   99.13          sprintf(newpath, "%s%s%s", path, 
   99.14                  path[strlen(path)-1] == '/' ? "" : "/", 
   99.15                  e[i]);
   99.16 -        val = xs_read(h, NULL, newpath, &len);
   99.17 +        val = xs_read(h, XBT_NULL, newpath, &len);
   99.18          if (val == NULL)
   99.19              printf(":\n");
   99.20          else if ((unsigned)len > (151 - strlen(e[i])))
   100.1 --- a/tools/xentrace/xenctx.c	Fri Jan 13 10:38:44 2006 -0600
   100.2 +++ b/tools/xentrace/xenctx.c	Fri Jan 13 14:12:24 2006 -0600
   100.3 @@ -380,10 +380,10 @@ void dump_ctx(int vcpu)
   100.4          exit(-1);
   100.5      }
   100.6  
   100.7 -    ret = xc_domain_get_vcpu_context(xc_handle, domid, vcpu, &ctx);
   100.8 +    ret = xc_vcpu_getcontext(xc_handle, domid, vcpu, &ctx);
   100.9      if (ret < 0) {
  100.10          xc_domain_unpause(xc_handle, domid);
  100.11 -        perror("xc_domain_get_vcpu_context");
  100.12 +        perror("xc_vcpu_getcontext");
  100.13          exit(-1);
  100.14      }
  100.15  
   101.1 --- a/tools/xm-test/lib/XmTestLib/XenDomain.py	Fri Jan 13 10:38:44 2006 -0600
   101.2 +++ b/tools/xm-test/lib/XmTestLib/XenDomain.py	Fri Jan 13 14:12:24 2006 -0600
   101.3 @@ -72,7 +72,8 @@ ParavirtDefaults = {"memory"       : 64,
   101.4                      }
   101.5  VmxDefaults =      {"memory"       : 64,
   101.6                      "vcpus"        : 1,
   101.7 -                    "nics"         : 0,
   101.8 +                    "acpi"         : 0,
   101.9 +                    "apic"         : 0,
  101.10                      "disk"         : ["file:%s/disk.img,ioemu:%s,w" %
  101.11                                     (getRdPath(), BLOCK_ROOT_DEV)],
  101.12                      "kernel"       : "/usr/lib/xen/boot/vmxloader",
   102.1 --- a/tools/xm-test/tests/block-destroy/06_block-destroy_check_list_pos.py	Fri Jan 13 10:38:44 2006 -0600
   102.2 +++ b/tools/xm-test/tests/block-destroy/06_block-destroy_check_list_pos.py	Fri Jan 13 14:12:24 2006 -0600
   102.3 @@ -46,6 +46,8 @@ if not checkBlockList(domain):
   102.4  if not checkXmLongList(domain):
   102.5      FAIL("xm long list does not show that hda1 was attached")
   102.6  
   102.7 +time.sleep(2)
   102.8 +
   102.9  s, o = traceCommand("xm block-detach %s hda1" % domain.getName())
  102.10  if s != 0:
  102.11      FAIL("block-detach failed")
   103.1 --- a/tools/xm-test/tests/network-attach/Makefile.am	Fri Jan 13 10:38:44 2006 -0600
   103.2 +++ b/tools/xm-test/tests/network-attach/Makefile.am	Fri Jan 13 14:12:24 2006 -0600
   103.3 @@ -6,7 +6,7 @@ TESTS = 01_network_attach_pos.test \
   103.4  	03_network_attach_detach_multiple_pos.test  \
   103.5  	04_network_attach_baddomain_neg.test
   103.6  
   103.7 -XFAIL_TESTS = 03_network_attach_detach_multiple_pos.test
   103.8 +XFAIL_TESTS = 
   103.9  
  103.10  EXTRA_DIST = $(TESTS) $(XFAIL_TESTS) network_utils.py
  103.11  
   104.1 --- a/tools/xm-test/tests/network/02_network_local_ping_pos.py	Fri Jan 13 10:38:44 2006 -0600
   104.2 +++ b/tools/xm-test/tests/network/02_network_local_ping_pos.py	Fri Jan 13 14:12:24 2006 -0600
   104.3 @@ -50,7 +50,7 @@ try:
   104.4      console.sendInput("bhs")
   104.5  
   104.6      # Bring up the "lo" interface.
   104.7 -    console.runCmd("ifconfig lo up")
   104.8 +    console.runCmd("ifconfig lo 127.0.0.1")
   104.9  
  104.10      console.runCmd("ifconfig eth0 inet "+ip+" netmask "+mask+" up")
  104.11  
   105.1 --- a/xen/arch/ia64/linux-xen/process-linux-xen.c	Fri Jan 13 10:38:44 2006 -0600
   105.2 +++ b/xen/arch/ia64/linux-xen/process-linux-xen.c	Fri Jan 13 14:12:24 2006 -0600
   105.3 @@ -241,7 +241,7 @@ static inline void play_dead(void)
   105.4  
   105.5  	max_xtp();
   105.6  	local_irq_disable();
   105.7 -	idle_task_exit();
   105.8 +	idle_domain_exit();
   105.9  	ia64_jump_to_sal(&sal_boot_rendez_state[this_cpu]);
  105.10  	/*
  105.11  	 * The above is a point of no-return, the processor is
   106.1 --- a/xen/arch/ia64/linux-xen/smpboot.c	Fri Jan 13 10:38:44 2006 -0600
   106.2 +++ b/xen/arch/ia64/linux-xen/smpboot.c	Fri Jan 13 14:12:24 2006 -0600
   106.3 @@ -482,10 +482,8 @@ do_rest:
   106.4  	struct vcpu *v;
   106.5  	void *stack;
   106.6  
   106.7 -	if ( (idle = do_createdomain(IDLE_DOMAIN_ID, cpu)) == NULL )
   106.8 -		panic("failed 'createdomain' for CPU %d", cpu);
   106.9 -	set_bit(_DOMF_idle_domain, &idle->domain_flags);
  106.10 -	v = idle->vcpu[0];
  106.11 +	v = idle_vcpu[cpu] = alloc_vcpu(idle_vcpu[0]->domain, cpu, cpu);
  106.12 +	BUG_ON(v == NULL);
  106.13  
  106.14  	printf ("do_boot_cpu: cpu=%d, domain=%p, vcpu=%p\n", cpu, idle, v);
  106.15  
   107.1 --- a/xen/arch/ia64/vmx/mmio.c	Fri Jan 13 10:38:44 2006 -0600
   107.2 +++ b/xen/arch/ia64/vmx/mmio.c	Fri Jan 13 14:12:24 2006 -0600
   107.3 @@ -29,7 +29,7 @@
   107.4  #include <asm/vmx_vcpu.h>
   107.5  #include <asm/privop.h>
   107.6  #include <asm/types.h>
   107.7 -#include <public/io/ioreq.h>
   107.8 +#include <public/hvm/ioreq.h>
   107.9  #include <asm/mm.h>
  107.10  #include <asm/vmx.h>
  107.11  
   108.1 --- a/xen/arch/ia64/vmx/vlsapic.c	Fri Jan 13 10:38:44 2006 -0600
   108.2 +++ b/xen/arch/ia64/vmx/vlsapic.c	Fri Jan 13 14:12:24 2006 -0600
   108.3 @@ -119,7 +119,7 @@ void vtm_init(VCPU *vcpu)
   108.4      itc_freq = local_cpu_data->itc_freq;
   108.5      vtm->cfg_max_jump=itc_freq*MAX_JUMP_STEP/1000;
   108.6      vtm->cfg_min_grun=itc_freq*MIN_GUEST_RUNNING_TIME/1000;
   108.7 -    init_ac_timer(&vtm->vtm_timer, vtm_timer_fn, vcpu, 0);
   108.8 +    init_timer(&vtm->vtm_timer, vtm_timer_fn, vcpu, 0);
   108.9      vtm_reset(vcpu);
  108.10  }
  108.11  
  108.12 @@ -163,20 +163,20 @@ void vtm_set_itv(VCPU *vcpu)
  108.13      local_irq_save(spsr);
  108.14      itv = VCPU(vcpu, itv);
  108.15      if ( ITV_IRQ_MASK(itv) )
  108.16 -        rem_ac_timer(&vtm->vtm_timer);
  108.17 +        stop_timer(&vtm->vtm_timer);
  108.18      vtm_interruption_update(vcpu, vtm);
  108.19      local_irq_restore(spsr);
  108.20  }
  108.21  
  108.22  
  108.23  /*
  108.24 - * Update interrupt or hook the vtm ac_timer for fire 
  108.25 + * Update interrupt or hook the vtm timer for fire 
  108.26   * At this point vtm_timer should be removed if itv is masked.
  108.27   */
  108.28  /* Interrupt must be disabled at this point */
  108.29  
  108.30  extern u64 cycle_to_ns(u64 cyle);
  108.31 -#define TIMER_SLOP (50*1000) /* ns */  /* copy from ac_timer.c */
  108.32 +#define TIMER_SLOP (50*1000) /* ns */  /* copy from timer.c */
  108.33  void vtm_interruption_update(VCPU *vcpu, vtime_t* vtm)
  108.34  {
  108.35      uint64_t    cur_itc,vitm,vitv;
  108.36 @@ -198,7 +198,7 @@ void vtm_interruption_update(VCPU *vcpu,
  108.37      
  108.38      if ( diff_last >= 0 ) {
  108.39          // interrupt already fired.
  108.40 -        rem_ac_timer(&vtm->vtm_timer);
  108.41 +        stop_timer(&vtm->vtm_timer);
  108.42      }
  108.43      else if ( diff_now >= 0 ) {
  108.44          // ITV is fired.
  108.45 @@ -207,30 +207,30 @@ void vtm_interruption_update(VCPU *vcpu,
  108.46      /* Both last_itc & cur_itc < itm, wait for fire condition */
  108.47      else {
  108.48          expires = NOW() + cycle_to_ns(0-diff_now) + TIMER_SLOP;
  108.49 -        set_ac_timer(&vtm->vtm_timer, expires);
  108.50 +        set_timer(&vtm->vtm_timer, expires);
  108.51      }
  108.52      local_irq_restore(spsr);
  108.53  }
  108.54  
  108.55  /*
  108.56   * Action for vtm when the domain is scheduled out.
  108.57 - * Remove the ac_timer for vtm.
  108.58 + * Remove the timer for vtm.
  108.59   */
  108.60  void vtm_domain_out(VCPU *vcpu)
  108.61  {
  108.62 -    if(!is_idle_task(vcpu->domain))
  108.63 -	rem_ac_timer(&vcpu->arch.arch_vmx.vtm.vtm_timer);
  108.64 +    if(!is_idle_domain(vcpu->domain))
  108.65 +	stop_timer(&vcpu->arch.arch_vmx.vtm.vtm_timer);
  108.66  }
  108.67  
  108.68  /*
  108.69   * Action for vtm when the domain is scheduled in.
  108.70 - * Fire vtm IRQ or add the ac_timer for vtm.
  108.71 + * Fire vtm IRQ or add the timer for vtm.
  108.72   */
  108.73  void vtm_domain_in(VCPU *vcpu)
  108.74  {
  108.75      vtime_t     *vtm;
  108.76  
  108.77 -    if(!is_idle_task(vcpu->domain)) {
  108.78 +    if(!is_idle_domain(vcpu->domain)) {
  108.79  	vtm=&(vcpu->arch.arch_vmx.vtm);
  108.80  	vtm_interruption_update(vcpu, vtm);
  108.81      }
   109.1 --- a/xen/arch/ia64/vmx/vmx_init.c	Fri Jan 13 10:38:44 2006 -0600
   109.2 +++ b/xen/arch/ia64/vmx/vmx_init.c	Fri Jan 13 14:12:24 2006 -0600
   109.3 @@ -42,7 +42,7 @@
   109.4  #include <xen/lib.h>
   109.5  #include <asm/vmmu.h>
   109.6  #include <public/arch-ia64.h>
   109.7 -#include <public/io/ioreq.h>
   109.8 +#include <public/hvm/ioreq.h>
   109.9  #include <asm/vmx_phy_mode.h>
  109.10  #include <asm/processor.h>
  109.11  #include <asm/vmx.h>
   110.1 --- a/xen/arch/ia64/vmx/vmx_process.c	Fri Jan 13 10:38:44 2006 -0600
   110.2 +++ b/xen/arch/ia64/vmx/vmx_process.c	Fri Jan 13 14:12:24 2006 -0600
   110.3 @@ -231,7 +231,7 @@ void leave_hypervisor_tail(struct pt_reg
   110.4  	struct domain *d = current->domain;
   110.5  	struct vcpu *v = current;
   110.6  	// FIXME: Will this work properly if doing an RFI???
   110.7 -	if (!is_idle_task(d) ) {	// always comes from guest
   110.8 +	if (!is_idle_domain(d) ) {	// always comes from guest
   110.9  	        extern void vmx_dorfirfi(void);
  110.10  		struct pt_regs *user_regs = vcpu_regs(current);
  110.11   		if (local_softirq_pending())
   111.1 --- a/xen/arch/ia64/vmx/vmx_support.c	Fri Jan 13 10:38:44 2006 -0600
   111.2 +++ b/xen/arch/ia64/vmx/vmx_support.c	Fri Jan 13 14:12:24 2006 -0600
   111.3 @@ -21,7 +21,7 @@
   111.4   */
   111.5  #include <xen/config.h>
   111.6  #include <xen/sched.h>
   111.7 -#include <public/io/ioreq.h>
   111.8 +#include <public/hvm/ioreq.h>
   111.9  #include <asm/vmx.h>
  111.10  #include <asm/vmx_vcpu.h>
  111.11  
   112.1 --- a/xen/arch/ia64/xen/domain.c	Fri Jan 13 10:38:44 2006 -0600
   112.2 +++ b/xen/arch/ia64/xen/domain.c	Fri Jan 13 14:12:24 2006 -0600
   112.3 @@ -46,7 +46,7 @@
   112.4  #include <asm/vmx_vcpu.h>
   112.5  #include <asm/vmx_vpd.h>
   112.6  #include <asm/pal.h>
   112.7 -#include <public/io/ioreq.h>
   112.8 +#include <public/hvm/ioreq.h>
   112.9  
  112.10  #define CONFIG_DOMAIN0_CONTIGUOUS
  112.11  unsigned long dom0_start = -1L;
  112.12 @@ -87,7 +87,6 @@ static void continue_cpu_idle_loop(void)
  112.13  	int cpu = smp_processor_id();
  112.14  	for ( ; ; )
  112.15  	{
  112.16 -	printf ("idle%dD\n", cpu);
  112.17  #ifdef IA64
  112.18  //        __IRQ_STAT(cpu, idle_timestamp) = jiffies
  112.19  #else
  112.20 @@ -146,15 +145,26 @@ struct vcpu *alloc_vcpu_struct(struct do
  112.21  {
  112.22  	struct vcpu *v;
  112.23  
  112.24 +	/* Still keep idle vcpu0 static allocated at compilation, due
  112.25 +	 * to some code from Linux still requires it in early phase.
  112.26 +	 */
  112.27 +	if (is_idle_domain(d) && !vcpu_id)
  112.28 +		return idle_vcpu[0];
  112.29 +
  112.30  	if ((v = alloc_xenheap_pages(KERNEL_STACK_SIZE_ORDER)) == NULL)
  112.31  		return NULL;
  112.32  
  112.33  	memset(v, 0, sizeof(*v)); 
  112.34 -        memcpy(&v->arch, &idle0_vcpu.arch, sizeof(v->arch));
  112.35 -	v->arch.privregs = 
  112.36 +        memcpy(&v->arch, &idle_vcpu[0]->arch, sizeof(v->arch));
  112.37 +
  112.38 +	if (!is_idle_domain(d)) {
  112.39 +	    v->arch.privregs = 
  112.40  		alloc_xenheap_pages(get_order(sizeof(mapped_regs_t)));
  112.41 +	    BUG_ON(v->arch.privregs == NULL);
  112.42 +	    memset(v->arch.privregs, 0, PAGE_SIZE);
  112.43 +	}
  112.44 +
  112.45  	printf("arch_vcpu_info=%p\n", v->arch.privregs);
  112.46 -	memset(v->arch.privregs, 0, PAGE_SIZE);
  112.47  
  112.48  	return v;
  112.49  }
  112.50 @@ -191,6 +201,14 @@ int arch_do_createdomain(struct vcpu *v)
  112.51  	memset(ti, 0, sizeof(struct thread_info));
  112.52  	init_switch_stack(v);
  112.53  
  112.54 +	// the following will eventually need to be negotiated dynamically
  112.55 +	d->xen_vastart = XEN_START_ADDR;
  112.56 +	d->xen_vaend = XEN_END_ADDR;
  112.57 +	d->shared_info_va = SHAREDINFO_ADDR;
  112.58 +
  112.59 +	if (is_idle_vcpu(v))
  112.60 +	    return 0;
  112.61 +
  112.62  	d->shared_info = (void *)alloc_xenheap_page();
  112.63  	if (!d->shared_info) {
  112.64     		printk("ERROR/HALTING: CAN'T ALLOC PAGE\n");
  112.65 @@ -200,12 +218,7 @@ int arch_do_createdomain(struct vcpu *v)
  112.66  	if (v == d->vcpu[0])
  112.67  	    memset(&d->shared_info->evtchn_mask[0], 0xff,
  112.68  		sizeof(d->shared_info->evtchn_mask));
  112.69 -#if 0
  112.70 -	d->vcpu[0].arch.privregs = 
  112.71 -			alloc_xenheap_pages(get_order(sizeof(mapped_regs_t)));
  112.72 -	printf("arch_vcpu_info=%p\n", d->vcpu[0].arch.privregs);
  112.73 -	memset(d->vcpu.arch.privregs, 0, PAGE_SIZE);
  112.74 -#endif
  112.75 +
  112.76  	v->vcpu_info = &(d->shared_info->vcpu_info[0]);
  112.77  
  112.78  	d->max_pages = (128UL*1024*1024)/PAGE_SIZE; // 128MB default // FIXME
  112.79 @@ -227,28 +240,21 @@ int arch_do_createdomain(struct vcpu *v)
  112.80  		BUG();
  112.81  	v->arch.starting_rid = d->arch.starting_rid;
  112.82  	v->arch.ending_rid = d->arch.ending_rid;
  112.83 -	// the following will eventually need to be negotiated dynamically
  112.84 -	d->xen_vastart = XEN_START_ADDR;
  112.85 -	d->xen_vaend = XEN_END_ADDR;
  112.86 -	d->shared_info_va = SHAREDINFO_ADDR;
  112.87  	d->arch.breakimm = 0x1000;
  112.88  	v->arch.breakimm = d->arch.breakimm;
  112.89  
  112.90  	d->arch.sys_pgnr = 0;
  112.91 -	if (d->domain_id != IDLE_DOMAIN_ID) {
  112.92 -		d->arch.mm = xmalloc(struct mm_struct);
  112.93 -		if (unlikely(!d->arch.mm)) {
  112.94 -			printk("Can't allocate mm_struct for domain %d\n",d->domain_id);
  112.95 -			return -ENOMEM;
  112.96 -		}
  112.97 -		memset(d->arch.mm, 0, sizeof(*d->arch.mm));
  112.98 -		d->arch.mm->pgd = pgd_alloc(d->arch.mm);
  112.99 -		if (unlikely(!d->arch.mm->pgd)) {
 112.100 -			printk("Can't allocate pgd for domain %d\n",d->domain_id);
 112.101 -			return -ENOMEM;
 112.102 -		}
 112.103 -	} else
 112.104 - 		d->arch.mm = NULL;
 112.105 +	d->arch.mm = xmalloc(struct mm_struct);
 112.106 +	if (unlikely(!d->arch.mm)) {
 112.107 +		printk("Can't allocate mm_struct for domain %d\n",d->domain_id);
 112.108 +		return -ENOMEM;
 112.109 +	}
 112.110 +	memset(d->arch.mm, 0, sizeof(*d->arch.mm));
 112.111 +	d->arch.mm->pgd = pgd_alloc(d->arch.mm);
 112.112 +	if (unlikely(!d->arch.mm->pgd)) {
 112.113 +		printk("Can't allocate pgd for domain %d\n",d->domain_id);
 112.114 +		return -ENOMEM;
 112.115 +	}
 112.116  	printf ("arch_do_create_domain: domain=%p\n", d);
 112.117  
 112.118  	return 0;
 112.119 @@ -1070,15 +1076,6 @@ void domain_pend_keyboard_interrupt(int 
 112.120  	vcpu_pend_interrupt(dom0->vcpu[0],irq);
 112.121  }
 112.122  
 112.123 -void vcpu_migrate_cpu(struct vcpu *v, int newcpu)
 112.124 -{
 112.125 -	if ( v->processor == newcpu )
 112.126 -		return;
 112.127 -
 112.128 -	set_bit(_VCPUF_cpu_migrated, &v->vcpu_flags);
 112.129 -	v->processor = newcpu;
 112.130 -}
 112.131 -
 112.132  void sync_vcpu_execstate(struct vcpu *v)
 112.133  {
 112.134  	ia64_save_fpu(v->arch._thread.fph);
   113.1 --- a/xen/arch/ia64/xen/idle0_task.c	Fri Jan 13 10:38:44 2006 -0600
   113.2 +++ b/xen/arch/ia64/xen/idle0_task.c	Fri Jan 13 14:12:24 2006 -0600
   113.3 @@ -11,31 +11,15 @@
   113.4  	.mmlist		= LIST_HEAD_INIT(name.mmlist),		\
   113.5  }
   113.6  
   113.7 -#define IDLE0_EXEC_DOMAIN(_ed,_d)    \
   113.8 +#define IDLE_VCPU(_v)    	     \
   113.9  {                                    \
  113.10      processor:   0,                  \
  113.11 -    mm:          0,                  \
  113.12 -    thread:      INIT_THREAD,        \
  113.13 -    domain:      (_d)                \
  113.14 -}
  113.15 -
  113.16 -#define IDLE0_DOMAIN(_t)             \
  113.17 -{                                    \
  113.18 -    domain_id:   IDLE_DOMAIN_ID,     \
  113.19 -    domain_flags:DOMF_idle_domain,   \
  113.20 -    refcnt:      ATOMIC_INIT(1)      \
  113.21 +    domain:      0                   \
  113.22  }
  113.23  
  113.24  struct mm_struct init_mm = INIT_MM(init_mm);
  113.25  EXPORT_SYMBOL(init_mm);
  113.26  
  113.27 -struct domain idle0_domain = IDLE0_DOMAIN(idle0_domain);
  113.28 -#if 0
  113.29 -struct vcpu idle0_vcpu = IDLE0_EXEC_DOMAIN(idle0_vcpu,
  113.30 -                                                         &idle0_domain);
  113.31 -#endif
  113.32 -
  113.33 -
  113.34  /*
  113.35   * Initial task structure.
  113.36   *
  113.37 @@ -44,15 +28,12 @@ struct vcpu idle0_vcpu = IDLE0_EXEC_DOMA
  113.38   */
  113.39  union {
  113.40  	struct {
  113.41 -		struct domain task;
  113.42 +		struct vcpu task;
  113.43  	} s;
  113.44  	unsigned long stack[KERNEL_STACK_SIZE/sizeof (unsigned long)];
  113.45 -} init_task_mem asm ("init_task") __attribute__((section(".data.init_task")));
  113.46 -// = {{
  113.47 -	;
  113.48 -//.task =		IDLE0_EXEC_DOMAIN(init_task_mem.s.task,&idle0_domain),
  113.49 -//};
  113.50 -//};
  113.51 +} init_task_mem asm ("init_task") __attribute__((section(".data.init_task"))) = {{
  113.52 +	.task = IDLE_VCPU(init_task_mem.s.task)
  113.53 +}};
  113.54  
  113.55  EXPORT_SYMBOL(init_task);
  113.56  
   114.1 --- a/xen/arch/ia64/xen/process.c	Fri Jan 13 10:38:44 2006 -0600
   114.2 +++ b/xen/arch/ia64/xen/process.c	Fri Jan 13 14:12:24 2006 -0600
   114.3 @@ -65,26 +65,16 @@ long do_iopl(domid_t domain, unsigned in
   114.4  
   114.5  extern struct schedule_data schedule_data[NR_CPUS];
   114.6  
   114.7 -void schedule_tail(struct vcpu *next)
   114.8 +void schedule_tail(struct vcpu *prev)
   114.9  {
  114.10 -	unsigned long rr7;
  114.11 -	//printk("current=%lx,shared_info=%lx\n",current,current->vcpu_info);
  114.12 -	//printk("next=%lx,shared_info=%lx\n",next,next->vcpu_info);
  114.13 +	context_saved(prev);
  114.14  
  114.15 -    // TG: Real HACK FIXME.
  114.16 -    // This is currently necessary because when a new domain is started, 
  114.17 -    // the context_switch function of xen/common/schedule.c(__enter_scheduler)
  114.18 -    // never returns.  Therefore, the lock must be released.
  114.19 -    // schedule_tail is only called when a domain is started.
  114.20 -    spin_unlock_irq(&schedule_data[current->processor].schedule_lock);
  114.21 -
  114.22 -	/* rr7 will be postponed to last point when resuming back to guest */
  114.23 -    if(VMX_DOMAIN(current)){
  114.24 -    	vmx_load_all_rr(current);
  114.25 -    }else{
  114.26 -	    load_region_regs(current);
  114.27 -            vcpu_load_kernel_regs(current);
  114.28 -    }
  114.29 +	if (VMX_DOMAIN(current)) {
  114.30 +		vmx_load_all_rr(current);
  114.31 +	} else {
  114.32 +		load_region_regs(current);
  114.33 +		vcpu_load_kernel_regs(current);
  114.34 +	}
  114.35  }
  114.36  
  114.37  void tdpfoo(void) { }
  114.38 @@ -252,7 +242,7 @@ void deliver_pending_interrupt(struct pt
  114.39  	struct domain *d = current->domain;
  114.40  	struct vcpu *v = current;
  114.41  	// FIXME: Will this work properly if doing an RFI???
  114.42 -	if (!is_idle_task(d) && user_mode(regs)) {
  114.43 +	if (!is_idle_domain(d) && user_mode(regs)) {
  114.44  		//vcpu_poke_timer(v);
  114.45  		if (vcpu_deliverable_interrupts(v))
  114.46  			reflect_extint(regs);
   115.1 --- a/xen/arch/ia64/xen/vcpu.c	Fri Jan 13 10:38:44 2006 -0600
   115.2 +++ b/xen/arch/ia64/xen/vcpu.c	Fri Jan 13 14:12:24 2006 -0600
   115.3 @@ -1085,7 +1085,7 @@ void vcpu_set_next_timer(VCPU *vcpu)
   115.4  	/* gloss over the wraparound problem for now... we know it exists
   115.5  	 * but it doesn't matter right now */
   115.6  
   115.7 -	if (is_idle_task(vcpu->domain)) {
   115.8 +	if (is_idle_domain(vcpu->domain)) {
   115.9  //		printf("****** vcpu_set_next_timer called during idle!!\n");
  115.10  		vcpu_safe_set_itm(s);
  115.11  		return;
   116.1 --- a/xen/arch/ia64/xen/xenmisc.c	Fri Jan 13 10:38:44 2006 -0600
   116.2 +++ b/xen/arch/ia64/xen/xenmisc.c	Fri Jan 13 14:12:24 2006 -0600
   116.3 @@ -75,7 +75,7 @@ struct pt_regs *guest_cpu_user_regs(void
   116.4  
   116.5  void raise_actimer_softirq(void)
   116.6  {
   116.7 -	raise_softirq(AC_TIMER_SOFTIRQ);
   116.8 +	raise_softirq(TIMER_SOFTIRQ);
   116.9  }
  116.10  
  116.11  unsigned long
  116.12 @@ -320,18 +320,15 @@ if (!i--) { printk("+",id); i = 1000000;
  116.13  	ia64_set_iva(&ia64_ivt);
  116.14  	ia64_set_pta(VHPT_ADDR | (1 << 8) | (VHPT_SIZE_LOG2 << 2) |
  116.15  		VHPT_ENABLED);
  116.16 -    	if (!is_idle_task(current->domain)) {
  116.17 +    	if (!is_idle_domain(current->domain)) {
  116.18  	    	load_region_regs(current);
  116.19  	    	vcpu_load_kernel_regs(current);
  116.20  		    if (vcpu_timer_expired(current)) vcpu_pend_timer(current);
  116.21      	}
  116.22  	    if (vcpu_timer_expired(current)) vcpu_pend_timer(current);
  116.23      }
  116.24 -}
  116.25  
  116.26 -void context_switch_finalise(struct vcpu *next)
  116.27 -{
  116.28 -	/* nothing to do */
  116.29 +    context_saved(prev);
  116.30  }
  116.31  
  116.32  void continue_running(struct vcpu *same)
   117.1 --- a/xen/arch/ia64/xen/xensetup.c	Fri Jan 13 10:38:44 2006 -0600
   117.2 +++ b/xen/arch/ia64/xen/xensetup.c	Fri Jan 13 14:12:24 2006 -0600
   117.3 @@ -27,7 +27,7 @@ unsigned long xenheap_phys_end;
   117.4  
   117.5  char saved_command_line[COMMAND_LINE_SIZE];
   117.6  
   117.7 -struct vcpu *idle_task[NR_CPUS] = { &idle0_vcpu };
   117.8 +struct vcpu *idle_vcpu[NR_CPUS];
   117.9  
  117.10  cpumask_t cpu_present_map;
  117.11  
  117.12 @@ -157,16 +157,12 @@ void start_kernel(void)
  117.13      unsigned long dom0_memory_start, dom0_memory_size;
  117.14      unsigned long dom0_initrd_start, dom0_initrd_size;
  117.15      unsigned long initial_images_start, initial_images_end;
  117.16 +    struct domain *idle_domain;
  117.17  
  117.18      running_on_sim = is_platform_hp_ski();
  117.19      /* Kernel may be relocated by EFI loader */
  117.20      xen_pstart = ia64_tpa(KERNEL_START);
  117.21  
  117.22 -    /* Must do this early -- e.g., spinlocks rely on get_current(). */
  117.23 -    //set_current(&idle0_vcpu);
  117.24 -    ia64_r13 = (void *)&idle0_vcpu;
  117.25 -    idle0_vcpu.domain = &idle0_domain;
  117.26 -
  117.27      early_setup_arch(&cmdline);
  117.28  
  117.29      /* We initialise the serial devices very early so we can get debugging. */
  117.30 @@ -282,18 +278,22 @@ void start_kernel(void)
  117.31  	(xenheap_phys_end-__pa(heap_start)) >> 20,
  117.32  	(xenheap_phys_end-__pa(heap_start)) >> 10);
  117.33  
  117.34 +printk("About to call scheduler_init()\n");
  117.35 +    scheduler_init();
  117.36 +    idle_vcpu[0] = (struct vcpu*) ia64_r13;
  117.37 +    idle_domain = do_createdomain(IDLE_DOMAIN_ID, 0);
  117.38 +    BUG_ON(idle_domain == NULL);
  117.39 +
  117.40      late_setup_arch(&cmdline);
  117.41      setup_per_cpu_areas();
  117.42      mem_init();
  117.43  
  117.44 -printk("About to call scheduler_init()\n");
  117.45 -    scheduler_init();
  117.46      local_irq_disable();
  117.47      init_IRQ ();
  117.48  printk("About to call init_xen_time()\n");
  117.49      init_xen_time(); /* initialise the time */
  117.50 -printk("About to call ac_timer_init()\n");
  117.51 -    ac_timer_init();
  117.52 +printk("About to call timer_init()\n");
  117.53 +    timer_init();
  117.54  
  117.55  #ifdef CONFIG_XEN_CONSOLE_INPUT	/* CONFIG_SERIAL_8250_CONSOLE=n in dom0! */
  117.56      initialize_keytable();
  117.57 @@ -309,14 +309,10 @@ printk("About to call ac_timer_init()\n"
  117.58      }
  117.59  
  117.60      smp_prepare_cpus(max_cpus);
  117.61 -
  117.62      /* We aren't hotplug-capable yet. */
  117.63 -    //BUG_ON(!cpus_empty(cpu_present_map));
  117.64      for_each_cpu ( i )
  117.65          cpu_set(i, cpu_present_map);
  117.66  
  117.67 -    //BUG_ON(!local_irq_is_enabled());
  117.68 -
  117.69      /*  Enable IRQ to receive IPI (needed for ITC sync).  */
  117.70      local_irq_enable();
  117.71  
  117.72 @@ -345,12 +341,7 @@ printk("About to call sort_main_extable(
  117.73      /* Create initial domain 0. */
  117.74  printk("About to call do_createdomain()\n");
  117.75      dom0 = do_createdomain(0, 0);
  117.76 -    init_task.domain = &idle0_domain;
  117.77 -    init_task.processor = 0;
  117.78 -//    init_task.mm = &init_mm;
  117.79 -    init_task.domain->arch.mm = &init_mm;
  117.80 -//    init_task.thread = INIT_THREAD;
  117.81 -    //arch_do_createdomain(current);
  117.82 +
  117.83  #ifdef CLONE_DOMAIN0
  117.84      {
  117.85      int i;
  117.86 @@ -383,8 +374,7 @@ printk("About to call do_createdomain()\
  117.87          panic("Could not set up DOM0 guest OS\n");
  117.88  
  117.89      /* PIN domain0 on CPU 0.  */
  117.90 -    dom0->vcpu[0]->cpumap=1;
  117.91 -    set_bit(_VCPUF_cpu_pinned, &dom0->vcpu[0]->vcpu_flags);
  117.92 +    dom0->vcpu[0]->cpu_affinity = cpumask_of_cpu(0);
  117.93  
  117.94  #ifdef CLONE_DOMAIN0
  117.95      {
  117.96 @@ -433,8 +423,8 @@ printk("About to call init_trace_bufs()\
  117.97  
  117.98      local_irq_enable();
  117.99  
 117.100 -    printf("About to call schedulers_start dom0=%p, idle0_dom=%p\n",
 117.101 -	   dom0, &idle0_domain);
 117.102 +    printf("About to call schedulers_start dom0=%p, idle_dom=%p\n",
 117.103 +	   dom0, &idle_domain);
 117.104      schedulers_start();
 117.105  
 117.106      domain_unpause_by_systemcontroller(dom0);
   118.1 --- a/xen/arch/ia64/xen/xentime.c	Fri Jan 13 10:38:44 2006 -0600
   118.2 +++ b/xen/arch/ia64/xen/xentime.c	Fri Jan 13 14:12:24 2006 -0600
   118.3 @@ -127,7 +127,7 @@ xen_timer_interrupt (int irq, void *dev_
   118.4  			vcpu_wake(dom0->vcpu[0]);
   118.5  		}
   118.6  	}
   118.7 -	if (!is_idle_task(current->domain))  {
   118.8 +	if (!is_idle_domain(current->domain))  {
   118.9  		if (vcpu_timer_expired(current)) {
  118.10  			vcpu_pend_timer(current);
  118.11  			// ensure another timer interrupt happens even if domain doesn't
  118.12 @@ -196,7 +196,7 @@ xen_timer_interrupt (int irq, void *dev_
  118.13  //#endif
  118.14  		/* double check, in case we got hit by a (slow) PMI: */
  118.15  	} while (time_after_eq(ia64_get_itc(), new_itm));
  118.16 -	raise_softirq(AC_TIMER_SOFTIRQ);
  118.17 +	raise_softirq(TIMER_SOFTIRQ);
  118.18  
  118.19  	return IRQ_HANDLED;
  118.20  }
  118.21 @@ -235,7 +235,7 @@ int __init init_xen_time()
  118.22      return 0;
  118.23  }
  118.24  
  118.25 -int reprogram_ac_timer(s_time_t timeout)
  118.26 +int reprogram_timer(s_time_t timeout)
  118.27  {
  118.28  	struct vcpu *v = current;
  118.29  	s_time_t expire;
   119.1 --- a/xen/arch/x86/apic.c	Fri Jan 13 10:38:44 2006 -0600
   119.2 +++ b/xen/arch/x86/apic.c	Fri Jan 13 14:12:24 2006 -0600
   119.3 @@ -870,7 +870,7 @@ void enable_APIC_timer(void)
   119.4   * returns 1 on success
   119.5   * returns 0 if the timeout value is too small or in the past.
   119.6   */
   119.7 -int reprogram_ac_timer(s_time_t timeout)
   119.8 +int reprogram_timer(s_time_t timeout)
   119.9  {
  119.10      s_time_t    now;
  119.11      s_time_t    expire;
  119.12 @@ -931,7 +931,7 @@ void smp_apic_timer_interrupt(struct cpu
  119.13  {
  119.14      ack_APIC_irq();
  119.15      perfc_incrc(apic_timer);
  119.16 -    raise_softirq(AC_TIMER_SOFTIRQ);
  119.17 +    raise_softirq(TIMER_SOFTIRQ);
  119.18  }
  119.19  
  119.20  /*
   120.1 --- a/xen/arch/x86/boot/x86_32.S	Fri Jan 13 10:38:44 2006 -0600
   120.2 +++ b/xen/arch/x86/boot/x86_32.S	Fri Jan 13 14:12:24 2006 -0600
   120.3 @@ -100,7 +100,7 @@ 1:      mov     %eax,__PAGE_OFFSET>>18(%
   120.4  1:      stosl   /* low mappings cover as much physmem as possible */
   120.5          add     $4,%edi
   120.6          add     $(1<<L2_PAGETABLE_SHIFT),%eax
   120.7 -        cmp     $__HYPERVISOR_VIRT_START+0xe3,%eax
   120.8 +        cmp     $HYPERVISOR_VIRT_START+0xe3,%eax
   120.9          jne     1b
  120.10  #else
  120.11          /* Initialize low and high mappings of all memory with 4MB pages */
  120.12 @@ -113,7 +113,7 @@ 1:      mov     %eax,__PAGE_OFFSET>>20(%
  120.13          jne     1b
  120.14  1:      stosl   /* low mappings cover as much physmem as possible */
  120.15          add     $(1<<L2_PAGETABLE_SHIFT),%eax
  120.16 -        cmp     $__HYPERVISOR_VIRT_START+0xe3,%eax
  120.17 +        cmp     $HYPERVISOR_VIRT_START+0xe3,%eax
  120.18          jne     1b
  120.19  #endif
  120.20          
   121.1 --- a/xen/arch/x86/dm/i8259.c	Fri Jan 13 10:38:44 2006 -0600
   121.2 +++ b/xen/arch/x86/dm/i8259.c	Fri Jan 13 14:12:24 2006 -0600
   121.3 @@ -29,7 +29,7 @@
   121.4  #include <xen/lib.h>
   121.5  #include <xen/errno.h>
   121.6  #include <xen/sched.h>
   121.7 -#include <public/io/ioreq.h>
   121.8 +#include <public/hvm/ioreq.h>
   121.9  #include <asm/vmx.h>
  121.10  #include <asm/vmx_vpic.h>
  121.11  #include <asm/current.h>
   122.1 --- a/xen/arch/x86/dm/vmx_vioapic.c	Fri Jan 13 10:38:44 2006 -0600
   122.2 +++ b/xen/arch/x86/dm/vmx_vioapic.c	Fri Jan 13 14:12:24 2006 -0600
   122.3 @@ -37,7 +37,7 @@
   122.4  #include <xen/lib.h>
   122.5  #include <xen/errno.h>
   122.6  #include <xen/sched.h>
   122.7 -#include <public/io/ioreq.h>
   122.8 +#include <public/hvm/ioreq.h>
   122.9  #include <asm/vmx.h>
  122.10  #include <asm/vmx_vpic.h>
  122.11  #include <asm/current.h>
   123.1 --- a/xen/arch/x86/dom0_ops.c	Fri Jan 13 10:38:44 2006 -0600
   123.2 +++ b/xen/arch/x86/dom0_ops.c	Fri Jan 13 14:12:24 2006 -0600
   123.3 @@ -36,13 +36,13 @@ static unsigned long msr_hi;
   123.4  
   123.5  static void write_msr_for(void *unused)
   123.6  {
   123.7 -    if ( ((1 << current->processor) & msr_cpu_mask) )
   123.8 +    if ( ((1 << smp_processor_id()) & msr_cpu_mask) )
   123.9          (void)wrmsr_user(msr_addr, msr_lo, msr_hi);
  123.10  }
  123.11  
  123.12  static void read_msr_for(void *unused)
  123.13  {
  123.14 -    if ( ((1 << current->processor) & msr_cpu_mask) )
  123.15 +    if ( ((1 << smp_processor_id()) & msr_cpu_mask) )
  123.16          (void)rdmsr_user(msr_addr, msr_lo, msr_hi);
  123.17  }
  123.18  
  123.19 @@ -103,12 +103,27 @@ long arch_do_dom0_op(dom0_op_t *op, dom0
  123.20              op->u.add_memtype.nr_pfns,
  123.21              op->u.add_memtype.type,
  123.22              1);
  123.23 +        if (ret > 0)
  123.24 +        {
  123.25 +            (void)__put_user(0, &u_dom0_op->u.add_memtype.handle);
  123.26 +            (void)__put_user(ret, &u_dom0_op->u.add_memtype.reg);
  123.27 +            ret = 0;
  123.28 +        }
  123.29      }
  123.30      break;
  123.31  
  123.32      case DOM0_DEL_MEMTYPE:
  123.33      {
  123.34 -        ret = mtrr_del_page(op->u.del_memtype.reg, 0, 0);
  123.35 +        if (op->u.del_memtype.handle == 0
  123.36 +            /* mtrr/main.c otherwise does a lookup */
  123.37 +            && (int)op->u.del_memtype.reg >= 0)
  123.38 +        {
  123.39 +            ret = mtrr_del_page(op->u.del_memtype.reg, 0, 0);
  123.40 +            if (ret > 0)
  123.41 +                ret = 0;
  123.42 +        }
  123.43 +        else
  123.44 +            ret = -EINVAL;
  123.45      }
  123.46      break;
  123.47  
  123.48 @@ -179,7 +194,7 @@ long arch_do_dom0_op(dom0_op_t *op, dom0
  123.49          memcpy(pi->hw_cap, boot_cpu_data.x86_capability, NCAPINTS*4);
  123.50          ret = 0;
  123.51          if ( copy_to_user(u_dom0_op, op, sizeof(*op)) )
  123.52 -	    ret = -EFAULT;
  123.53 +            ret = -EFAULT;
  123.54      }
  123.55      break;
  123.56      
   124.1 --- a/xen/arch/x86/domain.c	Fri Jan 13 10:38:44 2006 -0600
   124.2 +++ b/xen/arch/x86/domain.c	Fri Jan 13 14:12:24 2006 -0600
   124.3 @@ -46,17 +46,16 @@ boolean_param("noreboot", opt_noreboot);
   124.4  
   124.5  struct percpu_ctxt {
   124.6      struct vcpu *curr_vcpu;
   124.7 -    unsigned int context_not_finalised;
   124.8      unsigned int dirty_segment_mask;
   124.9  } __cacheline_aligned;
  124.10  static struct percpu_ctxt percpu_ctxt[NR_CPUS];
  124.11  
  124.12 -static void continue_idle_task(struct vcpu *v)
  124.13 +static void continue_idle_domain(struct vcpu *v)
  124.14  {
  124.15      reset_stack_and_jump(idle_loop);
  124.16  }
  124.17  
  124.18 -static void continue_nonidle_task(struct vcpu *v)
  124.19 +static void continue_nonidle_domain(struct vcpu *v)
  124.20  {
  124.21      reset_stack_and_jump(ret_from_intr);
  124.22  }
  124.23 @@ -92,10 +91,9 @@ void startup_cpu_idle_loop(void)
  124.24  {
  124.25      struct vcpu *v = current;
  124.26  
  124.27 -    ASSERT(is_idle_task(v->domain));
  124.28 -    percpu_ctxt[smp_processor_id()].curr_vcpu = v;
  124.29 -    cpu_set(smp_processor_id(), v->domain->cpumask);
  124.30 -    v->arch.schedule_tail = continue_idle_task;
  124.31 +    ASSERT(is_idle_vcpu(v));
  124.32 +    cpu_set(smp_processor_id(), v->domain->domain_dirty_cpumask);
  124.33 +    cpu_set(smp_processor_id(), v->vcpu_dirty_cpumask);
  124.34  
  124.35      reset_stack_and_jump(idle_loop);
  124.36  }
  124.37 @@ -217,14 +215,20 @@ struct vcpu *alloc_vcpu_struct(struct do
  124.38  
  124.39      memset(v, 0, sizeof(*v));
  124.40  
  124.41 -    memcpy(&v->arch, &idle0_vcpu.arch, sizeof(v->arch));
  124.42 +    memcpy(&v->arch, &idle_vcpu[0]->arch, sizeof(v->arch));
  124.43      v->arch.flags = TF_kernel_mode;
  124.44  
  124.45 +    if ( is_idle_domain(d) )
  124.46 +    {
  124.47 +        percpu_ctxt[vcpu_id].curr_vcpu = v;
  124.48 +        v->arch.schedule_tail = continue_idle_domain;
  124.49 +    }
  124.50 +
  124.51      if ( (v->vcpu_id = vcpu_id) != 0 )
  124.52      {
  124.53          v->arch.schedule_tail  = d->vcpu[0]->arch.schedule_tail;
  124.54          v->arch.perdomain_ptes =
  124.55 -            d->arch.mm_perdomain_pt + (vcpu_id << PDPT_VCPU_SHIFT);
  124.56 +            d->arch.mm_perdomain_pt + (vcpu_id << GDT_LDT_VCPU_SHIFT);
  124.57      }
  124.58  
  124.59      return v;
  124.60 @@ -259,32 +263,11 @@ int arch_do_createdomain(struct vcpu *v)
  124.61      int i;
  124.62  #endif
  124.63  
  124.64 -    if ( is_idle_task(d) )
  124.65 -        return 0;
  124.66 -
  124.67 -    d->arch.ioport_caps = 
  124.68 -        rangeset_new(d, "I/O Ports", RANGESETF_prettyprint_hex);
  124.69 -    if ( d->arch.ioport_caps == NULL )
  124.70 -        return -ENOMEM;
  124.71 -
  124.72 -    if ( (d->shared_info = alloc_xenheap_page()) == NULL )
  124.73 -        return -ENOMEM;
  124.74 -
  124.75 -    if ( (rc = ptwr_init(d)) != 0 )
  124.76 -    {
  124.77 -        free_xenheap_page(d->shared_info);
  124.78 -        return rc;
  124.79 -    }
  124.80 -
  124.81 -    v->arch.schedule_tail = continue_nonidle_task;
  124.82 -
  124.83 -    memset(d->shared_info, 0, PAGE_SIZE);
  124.84 -    v->vcpu_info = &d->shared_info->vcpu_info[v->vcpu_id];
  124.85 -    v->cpumap = CPUMAP_RUNANYWHERE;
  124.86 -    SHARE_PFN_WITH_DOMAIN(virt_to_page(d->shared_info), d);
  124.87 -
  124.88      pdpt_order = get_order_from_bytes(PDPT_L1_ENTRIES * sizeof(l1_pgentry_t));
  124.89      d->arch.mm_perdomain_pt = alloc_xenheap_pages(pdpt_order);
  124.90 +    if ( d->arch.mm_perdomain_pt == NULL )
  124.91 +        goto fail_nomem;
  124.92 +
  124.93      memset(d->arch.mm_perdomain_pt, 0, PAGE_SIZE << pdpt_order);
  124.94      v->arch.perdomain_ptes = d->arch.mm_perdomain_pt;
  124.95  
  124.96 @@ -297,49 +280,73 @@ int arch_do_createdomain(struct vcpu *v)
  124.97       */
  124.98      gdt_l1e = l1e_from_page(virt_to_page(gdt_table), PAGE_HYPERVISOR);
  124.99      for ( vcpuid = 0; vcpuid < MAX_VIRT_CPUS; vcpuid++ )
 124.100 -        d->arch.mm_perdomain_pt[
 124.101 -            (vcpuid << PDPT_VCPU_SHIFT) + FIRST_RESERVED_GDT_PAGE] = gdt_l1e;
 124.102 +        d->arch.mm_perdomain_pt[((vcpuid << GDT_LDT_VCPU_SHIFT) +
 124.103 +                                 FIRST_RESERVED_GDT_PAGE)] = gdt_l1e;
 124.104  
 124.105      v->arch.guest_vtable  = __linear_l2_table;
 124.106      v->arch.shadow_vtable = __shadow_linear_l2_table;
 124.107  
 124.108 -#ifdef __x86_64__
 124.109 +#if defined(__i386__)
 124.110 +
 124.111 +    mapcache_init(d);
 124.112 +
 124.113 +#else /* __x86_64__ */
 124.114 +
 124.115      v->arch.guest_vl3table = __linear_l3_table;
 124.116      v->arch.guest_vl4table = __linear_l4_table;
 124.117  
 124.118      d->arch.mm_perdomain_l2 = alloc_xenheap_page();
 124.119 +    d->arch.mm_perdomain_l3 = alloc_xenheap_page();
 124.120 +    if ( (d->arch.mm_perdomain_l2 == NULL) ||
 124.121 +         (d->arch.mm_perdomain_l3 == NULL) )
 124.122 +        goto fail_nomem;
 124.123 +
 124.124      memset(d->arch.mm_perdomain_l2, 0, PAGE_SIZE);
 124.125      for ( i = 0; i < (1 << pdpt_order); i++ )
 124.126          d->arch.mm_perdomain_l2[l2_table_offset(PERDOMAIN_VIRT_START)+i] =
 124.127              l2e_from_page(virt_to_page(d->arch.mm_perdomain_pt)+i,
 124.128                            __PAGE_HYPERVISOR);
 124.129  
 124.130 -    d->arch.mm_perdomain_l3 = alloc_xenheap_page();
 124.131      memset(d->arch.mm_perdomain_l3, 0, PAGE_SIZE);
 124.132      d->arch.mm_perdomain_l3[l3_table_offset(PERDOMAIN_VIRT_START)] =
 124.133          l3e_from_page(virt_to_page(d->arch.mm_perdomain_l2),
 124.134                              __PAGE_HYPERVISOR);
 124.135 -#endif
 124.136 +
 124.137 +#endif /* __x86_64__ */
 124.138  
 124.139      shadow_lock_init(d);
 124.140      INIT_LIST_HEAD(&d->arch.free_shadow_frames);
 124.141  
 124.142 -    return 0;
 124.143 -}
 124.144 +    if ( !is_idle_domain(d) )
 124.145 +    {
 124.146 +        d->arch.ioport_caps = 
 124.147 +            rangeset_new(d, "I/O Ports", RANGESETF_prettyprint_hex);
 124.148 +        if ( d->arch.ioport_caps == NULL )
 124.149 +            goto fail_nomem;
 124.150  
 124.151 -void vcpu_migrate_cpu(struct vcpu *v, int newcpu)
 124.152 -{
 124.153 -    if ( v->processor == newcpu )
 124.154 -        return;
 124.155 +        if ( (d->shared_info = alloc_xenheap_page()) == NULL )
 124.156 +            goto fail_nomem;
 124.157 +
 124.158 +        if ( (rc = ptwr_init(d)) != 0 )
 124.159 +            goto fail_nomem;
 124.160  
 124.161 -    set_bit(_VCPUF_cpu_migrated, &v->vcpu_flags);
 124.162 -    v->processor = newcpu;
 124.163 +        memset(d->shared_info, 0, PAGE_SIZE);
 124.164 +        v->vcpu_info = &d->shared_info->vcpu_info[v->vcpu_id];
 124.165 +        SHARE_PFN_WITH_DOMAIN(virt_to_page(d->shared_info), d);
 124.166 +
 124.167 +        v->arch.schedule_tail = continue_nonidle_domain;
 124.168 +    }
 124.169 +
 124.170 +    return 0;
 124.171  
 124.172 -    if ( VMX_DOMAIN(v) )
 124.173 -    {
 124.174 -        __vmpclear(virt_to_phys(v->arch.arch_vmx.vmcs));
 124.175 -        v->arch.schedule_tail = arch_vmx_do_relaunch;
 124.176 -    }
 124.177 + fail_nomem:
 124.178 +    free_xenheap_page(d->shared_info);
 124.179 +#ifdef __x86_64__
 124.180 +    free_xenheap_page(d->arch.mm_perdomain_l2);
 124.181 +    free_xenheap_page(d->arch.mm_perdomain_l3);
 124.182 +#endif
 124.183 +    free_xenheap_pages(d->arch.mm_perdomain_pt, pdpt_order);
 124.184 +    return -ENOMEM;
 124.185  }
 124.186  
 124.187  /* This is called by arch_final_setup_guest and do_boot_vcpu */
 124.188 @@ -473,14 +480,6 @@ void new_thread(struct vcpu *d,
 124.189  
 124.190  #ifdef __x86_64__
 124.191  
 124.192 -void toggle_guest_mode(struct vcpu *v)
 124.193 -{
 124.194 -    v->arch.flags ^= TF_kernel_mode;
 124.195 -    __asm__ __volatile__ ( "swapgs" );
 124.196 -    update_pagetables(v);
 124.197 -    write_ptbase(v);
 124.198 -}
 124.199 -
 124.200  #define loadsegment(seg,value) ({               \
 124.201      int __r = 1;                                \
 124.202      __asm__ __volatile__ (                      \
 124.203 @@ -650,35 +649,6 @@ static void save_segments(struct vcpu *v
 124.204      percpu_ctxt[smp_processor_id()].dirty_segment_mask = dirty_segment_mask;
 124.205  }
 124.206  
 124.207 -long do_switch_to_user(void)
 124.208 -{
 124.209 -    struct cpu_user_regs  *regs = guest_cpu_user_regs();
 124.210 -    struct switch_to_user  stu;
 124.211 -    struct vcpu    *v = current;
 124.212 -
 124.213 -    if ( unlikely(copy_from_user(&stu, (void *)regs->rsp, sizeof(stu))) ||
 124.214 -         unlikely(pagetable_get_paddr(v->arch.guest_table_user) == 0) )
 124.215 -        return -EFAULT;
 124.216 -
 124.217 -    toggle_guest_mode(v);
 124.218 -
 124.219 -    regs->rip    = stu.rip;
 124.220 -    regs->cs     = stu.cs | 3; /* force guest privilege */
 124.221 -    regs->rflags = (stu.rflags & ~(EF_IOPL|EF_VM)) | EF_IE;
 124.222 -    regs->rsp    = stu.rsp;
 124.223 -    regs->ss     = stu.ss | 3; /* force guest privilege */
 124.224 -
 124.225 -    if ( !(stu.flags & VGCF_IN_SYSCALL) )
 124.226 -    {
 124.227 -        regs->entry_vector = 0;
 124.228 -        regs->r11 = stu.r11;
 124.229 -        regs->rcx = stu.rcx;
 124.230 -    }
 124.231 -
 124.232 -    /* Saved %rax gets written back to regs->rax in entry.S. */
 124.233 -    return stu.rax;
 124.234 -}
 124.235 -
 124.236  #define switch_kernel_stack(_n,_c) ((void)0)
 124.237  
 124.238  #elif defined(__i386__)
 124.239 @@ -705,7 +675,10 @@ static void __context_switch(void)
 124.240      struct vcpu          *p = percpu_ctxt[cpu].curr_vcpu;
 124.241      struct vcpu          *n = current;
 124.242  
 124.243 -    if ( !is_idle_task(p->domain) )
 124.244 +    ASSERT(p != n);
 124.245 +    ASSERT(cpus_empty(n->vcpu_dirty_cpumask));
 124.246 +
 124.247 +    if ( !is_idle_vcpu(p) )
 124.248      {
 124.249          memcpy(&p->arch.guest_context.user_regs,
 124.250                 stack_regs,
 124.251 @@ -714,7 +687,7 @@ static void __context_switch(void)
 124.252          save_segments(p);
 124.253      }
 124.254  
 124.255 -    if ( !is_idle_task(n->domain) )
 124.256 +    if ( !is_idle_vcpu(n) )
 124.257      {
 124.258          memcpy(stack_regs,
 124.259                 &n->arch.guest_context.user_regs,
 124.260 @@ -740,7 +713,8 @@ static void __context_switch(void)
 124.261      }
 124.262  
 124.263      if ( p->domain != n->domain )
 124.264 -        cpu_set(cpu, n->domain->cpumask);
 124.265 +        cpu_set(cpu, n->domain->domain_dirty_cpumask);
 124.266 +    cpu_set(cpu, n->vcpu_dirty_cpumask);
 124.267  
 124.268      write_ptbase(n);
 124.269  
 124.270 @@ -753,7 +727,8 @@ static void __context_switch(void)
 124.271      }
 124.272  
 124.273      if ( p->domain != n->domain )
 124.274 -        cpu_clear(cpu, p->domain->cpumask);
 124.275 +        cpu_clear(cpu, p->domain->domain_dirty_cpumask);
 124.276 +    cpu_clear(cpu, p->vcpu_dirty_cpumask);
 124.277  
 124.278      percpu_ctxt[cpu].curr_vcpu = n;
 124.279  }
 124.280 @@ -762,29 +737,32 @@ static void __context_switch(void)
 124.281  void context_switch(struct vcpu *prev, struct vcpu *next)
 124.282  {
 124.283      unsigned int cpu = smp_processor_id();
 124.284 +    cpumask_t dirty_mask = next->vcpu_dirty_cpumask;
 124.285  
 124.286 -    ASSERT(!local_irq_is_enabled());
 124.287 +    ASSERT(local_irq_is_enabled());
 124.288 +
 124.289 +    /* Allow at most one CPU at a time to be dirty. */
 124.290 +    ASSERT(cpus_weight(dirty_mask) <= 1);
 124.291 +    if ( unlikely(!cpu_isset(cpu, dirty_mask) && !cpus_empty(dirty_mask)) )
 124.292 +    {
 124.293 +        /* Other cpus call __sync_lazy_execstate from flush ipi handler. */
 124.294 +        flush_tlb_mask(dirty_mask);
 124.295 +    }
 124.296 +
 124.297 +    local_irq_disable();
 124.298  
 124.299      set_current(next);
 124.300  
 124.301 -    if ( (percpu_ctxt[cpu].curr_vcpu != next) && !is_idle_task(next->domain) )
 124.302 +    if ( (percpu_ctxt[cpu].curr_vcpu == next) || is_idle_vcpu(next) )
 124.303 +    {
 124.304 +        local_irq_enable();
 124.305 +    }
 124.306 +    else
 124.307      {
 124.308          __context_switch();
 124.309 -        percpu_ctxt[cpu].context_not_finalised = 1;
 124.310 -    }
 124.311 -}
 124.312  
 124.313 -void context_switch_finalise(struct vcpu *next)
 124.314 -{
 124.315 -    unsigned int cpu = smp_processor_id();
 124.316 -
 124.317 -    ASSERT(local_irq_is_enabled());
 124.318 -
 124.319 -    if ( percpu_ctxt[cpu].context_not_finalised )
 124.320 -    {
 124.321 -        percpu_ctxt[cpu].context_not_finalised = 0;
 124.322 -
 124.323 -        BUG_ON(percpu_ctxt[cpu].curr_vcpu != next);
 124.324 +        /* Re-enable interrupts before restoring state which may fault. */
 124.325 +        local_irq_enable();
 124.326  
 124.327          if ( VMX_DOMAIN(next) )
 124.328          {
 124.329 @@ -798,6 +776,8 @@ void context_switch_finalise(struct vcpu
 124.330          }
 124.331      }
 124.332  
 124.333 +    context_saved(prev);
 124.334 +
 124.335      schedule_tail(next);
 124.336      BUG();
 124.337  }
 124.338 @@ -827,20 +807,11 @@ int __sync_lazy_execstate(void)
 124.339  
 124.340  void sync_vcpu_execstate(struct vcpu *v)
 124.341  {
 124.342 -    unsigned int cpu = v->processor;
 124.343 -
 124.344 -    if ( !cpu_isset(cpu, v->domain->cpumask) )
 124.345 -        return;
 124.346 +    if ( cpu_isset(smp_processor_id(), v->vcpu_dirty_cpumask) )
 124.347 +        (void)__sync_lazy_execstate();
 124.348  
 124.349 -    if ( cpu == smp_processor_id() )
 124.350 -    {
 124.351 -        (void)__sync_lazy_execstate();
 124.352 -    }
 124.353 -    else
 124.354 -    {
 124.355 -        /* Other cpus call __sync_lazy_execstate from flush ipi handler. */
 124.356 -        flush_tlb_mask(cpumask_of_cpu(cpu));
 124.357 -    }
 124.358 +    /* Other cpus call __sync_lazy_execstate from flush ipi handler. */
 124.359 +    flush_tlb_mask(v->vcpu_dirty_cpumask);
 124.360  }
 124.361  
 124.362  unsigned long __hypercall_create_continuation(
 124.363 @@ -966,7 +937,7 @@ void domain_relinquish_resources(struct 
 124.364      struct vcpu *v;
 124.365      unsigned long pfn;
 124.366  
 124.367 -    BUG_ON(!cpus_empty(d->cpumask));
 124.368 +    BUG_ON(!cpus_empty(d->domain_dirty_cpumask));
 124.369  
 124.370      ptwr_destroy(d);
 124.371  
   125.1 --- a/xen/arch/x86/domain_build.c	Fri Jan 13 10:38:44 2006 -0600
   125.2 +++ b/xen/arch/x86/domain_build.c	Fri Jan 13 14:12:24 2006 -0600
   125.3 @@ -366,27 +366,20 @@ int construct_dom0(struct domain *d,
   125.4          l2tab[(LINEAR_PT_VIRT_START >> L2_PAGETABLE_SHIFT)+i] =
   125.5              l2e_from_paddr((u32)l2tab + i*PAGE_SIZE, __PAGE_HYPERVISOR);
   125.6      }
   125.7 -    {
   125.8 -        unsigned long va;
   125.9 -        for (va = PERDOMAIN_VIRT_START; va < PERDOMAIN_VIRT_END;
  125.10 -             va += (1 << L2_PAGETABLE_SHIFT)) {
  125.11 -            l2tab[va >> L2_PAGETABLE_SHIFT] =
  125.12 -                l2e_from_paddr(__pa(d->arch.mm_perdomain_pt) +
  125.13 -                               (va-PERDOMAIN_VIRT_START),
  125.14 -                               __PAGE_HYPERVISOR);
  125.15 -        }
  125.16 -    }
  125.17      v->arch.guest_table = mk_pagetable((unsigned long)l3start);
  125.18  #else
  125.19      l2start = l2tab = (l2_pgentry_t *)mpt_alloc; mpt_alloc += PAGE_SIZE;
  125.20      memcpy(l2tab, &idle_pg_table[0], PAGE_SIZE);
  125.21      l2tab[LINEAR_PT_VIRT_START >> L2_PAGETABLE_SHIFT] =
  125.22          l2e_from_paddr((unsigned long)l2start, __PAGE_HYPERVISOR);
  125.23 -    l2tab[PERDOMAIN_VIRT_START >> L2_PAGETABLE_SHIFT] =
  125.24 -        l2e_from_paddr(__pa(d->arch.mm_perdomain_pt), __PAGE_HYPERVISOR);
  125.25      v->arch.guest_table = mk_pagetable((unsigned long)l2start);
  125.26  #endif
  125.27  
  125.28 +    for ( i = 0; i < PDPT_L2_ENTRIES; i++ )
  125.29 +        l2tab[l2_linear_offset(PERDOMAIN_VIRT_START) + i] =
  125.30 +            l2e_from_page(virt_to_page(d->arch.mm_perdomain_pt) + i,
  125.31 +                          __PAGE_HYPERVISOR);
  125.32 +
  125.33      l2tab += l2_linear_offset(dsi.v_start);
  125.34      mfn = alloc_spfn;
  125.35      for ( count = 0; count < ((v_end-dsi.v_start)>>PAGE_SHIFT); count++ )
   126.1 --- a/xen/arch/x86/idle0_task.c	Fri Jan 13 10:38:44 2006 -0600
   126.2 +++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
   126.3 @@ -1,27 +0,0 @@
   126.4 -
   126.5 -#include <xen/config.h>
   126.6 -#include <xen/sched.h>
   126.7 -#include <asm/desc.h>
   126.8 -
   126.9 -struct domain idle0_domain = {
  126.10 -    domain_id:   IDLE_DOMAIN_ID,
  126.11 -    domain_flags:DOMF_idle_domain,
  126.12 -    refcnt:      ATOMIC_INIT(1)
  126.13 -};
  126.14 -
  126.15 -struct vcpu idle0_vcpu = {
  126.16 -    processor:   0,
  126.17 -    domain:      &idle0_domain
  126.18 -};
  126.19 -
  126.20 -struct tss_struct init_tss[NR_CPUS];
  126.21 -
  126.22 -/*
  126.23 - * Local variables:
  126.24 - * mode: C
  126.25 - * c-set-style: "BSD"
  126.26 - * c-basic-offset: 4
  126.27 - * tab-width: 4
  126.28 - * indent-tabs-mode: nil
  126.29 - * End:
  126.30 - */
   127.1 --- a/xen/arch/x86/io_apic.c	Fri Jan 13 10:38:44 2006 -0600
   127.2 +++ b/xen/arch/x86/io_apic.c	Fri Jan 13 14:12:24 2006 -0600
   127.3 @@ -1807,3 +1807,47 @@ int ioapic_guest_write(int apicid, int a
   127.4  
   127.5      return 0;
   127.6  }
   127.7 +
   127.8 +void dump_ioapic_irq_info(void)
   127.9 +{
  127.10 +    struct irq_pin_list *entry;
  127.11 +    struct IO_APIC_route_entry rte;
  127.12 +    unsigned int irq, pin, printed = 0;
  127.13 +    unsigned long flags;
  127.14 +
  127.15 +    for ( irq = 0; irq < NR_IRQS; irq++ )
  127.16 +    {
  127.17 +        entry = &irq_2_pin[irq];
  127.18 +        if ( entry->pin == -1 )
  127.19 +            continue;
  127.20 +
  127.21 +        if ( !printed++ )
  127.22 +            printk("IO-APIC interrupt information:\n");
  127.23 +
  127.24 +        printk("    IRQ%3d Vec%3d:\n", irq, irq_to_vector(irq));
  127.25 +
  127.26 +        for ( ; ; )
  127.27 +        {
  127.28 +            pin = entry->pin;
  127.29 +
  127.30 +            printk("      Apic 0x%02x, Pin %2d: ", entry->apic, pin);
  127.31 +
  127.32 +            spin_lock_irqsave(&ioapic_lock, flags);
  127.33 +            *(((int *)&rte) + 0) = io_apic_read(entry->apic, 0x10 + 2 * pin);
  127.34 +            *(((int *)&rte) + 1) = io_apic_read(entry->apic, 0x11 + 2 * pin);
  127.35 +            spin_unlock_irqrestore(&ioapic_lock, flags);
  127.36 +
  127.37 +            printk("vector=%u, delivery_mode=%u, dest_mode=%s, "
  127.38 +                   "delivery_status=%d, polarity=%d, irr=%d, "
  127.39 +                   "trigger=%s, mask=%d\n",
  127.40 +                   rte.vector, rte.delivery_mode,
  127.41 +                   rte.dest_mode ? "logical" : "physical",
  127.42 +                   rte.delivery_status, rte.polarity, rte.irr,
  127.43 +                   rte.trigger ? "level" : "edge", rte.mask);
  127.44 +
  127.45 +            if ( entry->next == 0 )
  127.46 +                break;
  127.47 +            entry = &irq_2_pin[entry->next];
  127.48 +        }
  127.49 +    }
  127.50 +}
   128.1 --- a/xen/arch/x86/irq.c	Fri Jan 13 10:38:44 2006 -0600
   128.2 +++ b/xen/arch/x86/irq.c	Fri Jan 13 14:12:24 2006 -0600
   128.3 @@ -12,6 +12,7 @@
   128.4  #include <xen/irq.h>
   128.5  #include <xen/perfc.h>
   128.6  #include <xen/sched.h>
   128.7 +#include <xen/keyhandler.h>
   128.8  #include <asm/current.h>
   128.9  #include <asm/smpboot.h>
  128.10  
  128.11 @@ -198,15 +199,21 @@ int pirq_guest_unmask(struct domain *d)
  128.12  
  128.13  int pirq_guest_bind(struct vcpu *v, int irq, int will_share)
  128.14  {
  128.15 -    unsigned int        vector = irq_to_vector(irq);
  128.16 -    irq_desc_t         *desc = &irq_desc[vector];
  128.17 +    unsigned int        vector;
  128.18 +    irq_desc_t         *desc;
  128.19      irq_guest_action_t *action;
  128.20      unsigned long       flags;
  128.21      int                 rc = 0;
  128.22      cpumask_t           cpumask = CPU_MASK_NONE;
  128.23  
  128.24 +    if ( (irq < 0) || (irq >= NR_IRQS) )
  128.25 +        return -EINVAL;
  128.26 +
  128.27 +    vector = irq_to_vector(irq);
  128.28      if ( vector == 0 )
  128.29 -        return -EBUSY;
  128.30 +        return -EINVAL;
  128.31 +
  128.32 +    desc = &irq_desc[vector];
  128.33  
  128.34      spin_lock_irqsave(&desc->lock, flags);
  128.35  
  128.36 @@ -305,3 +312,71 @@ int pirq_guest_unbind(struct domain *d, 
  128.37      spin_unlock_irqrestore(&desc->lock, flags);    
  128.38      return 0;
  128.39  }
  128.40 +
  128.41 +extern void dump_ioapic_irq_info(void);
  128.42 +
  128.43 +static void dump_irqs(unsigned char key)
  128.44 +{
  128.45 +    int i, irq, vector;
  128.46 +    irq_desc_t *desc;
  128.47 +    irq_guest_action_t *action;
  128.48 +    struct domain *d;
  128.49 +    unsigned long flags;
  128.50 +
  128.51 +    printk("Guest interrupt information:\n");
  128.52 +
  128.53 +    for ( irq = 0; irq < NR_IRQS; irq++ )
  128.54 +    {
  128.55 +        vector = irq_to_vector(irq);
  128.56 +        if ( vector == 0 )
  128.57 +            continue;
  128.58 +
  128.59 +        desc = &irq_desc[vector];
  128.60 +
  128.61 +        spin_lock_irqsave(&desc->lock, flags);
  128.62 +
  128.63 +        if ( desc->status & IRQ_GUEST )
  128.64 +        {
  128.65 +            action = (irq_guest_action_t *)desc->action;
  128.66 +
  128.67 +            printk("    IRQ%3d Vec%3d: type=%-15s status=%08x "
  128.68 +                   "in-flight=%d domain-list=",
  128.69 +                   irq, vector, desc->handler->typename,
  128.70 +                   desc->status, action->in_flight);
  128.71 +
  128.72 +            for ( i = 0; i < action->nr_guests; i++ )
  128.73 +            {
  128.74 +                d = action->guest[i];
  128.75 +                printk("%u(%c%c%c%c)",
  128.76 +                       d->domain_id,
  128.77 +                       (test_bit(d->pirq_to_evtchn[irq],
  128.78 +                                 &d->shared_info->evtchn_pending[0]) ?
  128.79 +                        'P' : '-'),
  128.80 +                       (test_bit(d->pirq_to_evtchn[irq]/BITS_PER_LONG,
  128.81 +                                 &d->shared_info->vcpu_info[0].
  128.82 +                                 evtchn_pending_sel) ?
  128.83 +                        'S' : '-'),
  128.84 +                       (test_bit(d->pirq_to_evtchn[irq],
  128.85 +                                 &d->shared_info->evtchn_mask[0]) ?
  128.86 +                        'M' : '-'),
  128.87 +                       (test_bit(irq, &d->pirq_mask) ?
  128.88 +                        'M' : '-'));
  128.89 +                if ( i != action->nr_guests )
  128.90 +                    printk(",");
  128.91 +            }
  128.92 +
  128.93 +            printk("\n");
  128.94 +        }
  128.95 +
  128.96 +        spin_unlock_irqrestore(&desc->lock, flags);
  128.97 +    }
  128.98 +
  128.99 +    dump_ioapic_irq_info();
 128.100 +}
 128.101 +
 128.102 +static int __init setup_dump_irqs(void)
 128.103 +{
 128.104 +    register_keyhandler('i', dump_irqs, "dump interrupt bindings");
 128.105 +    return 0;
 128.106 +}
 128.107 +__initcall(setup_dump_irqs);
   129.1 --- a/xen/arch/x86/mm.c	Fri Jan 13 10:38:44 2006 -0600
   129.2 +++ b/xen/arch/x86/mm.c	Fri Jan 13 14:12:24 2006 -0600
   129.3 @@ -297,7 +297,6 @@ int map_ldt_shadow_page(unsigned int off
   129.4  
   129.5  #if defined(__x86_64__)
   129.6      /* If in user mode, switch to kernel mode just to read LDT mapping. */
   129.7 -    extern void toggle_guest_mode(struct vcpu *);
   129.8      int user_mode = !(v->arch.flags & TF_kernel_mode);
   129.9  #define TOGGLE_MODE() if ( user_mode ) toggle_guest_mode(v)
  129.10  #elif defined(__i386__)
  129.11 @@ -841,10 +840,11 @@ static int alloc_l2_table(struct pfn_inf
  129.12             L2_PAGETABLE_XEN_SLOTS * sizeof(l2_pgentry_t));
  129.13      pl2e[l2_table_offset(LINEAR_PT_VIRT_START)] =
  129.14          l2e_from_pfn(pfn, __PAGE_HYPERVISOR);
  129.15 -    pl2e[l2_table_offset(PERDOMAIN_VIRT_START)] =
  129.16 -        l2e_from_page(
  129.17 -            virt_to_page(page_get_owner(page)->arch.mm_perdomain_pt),
  129.18 -            __PAGE_HYPERVISOR);
  129.19 +    for ( i = 0; i < PDPT_L2_ENTRIES; i++ )
  129.20 +        pl2e[l2_table_offset(PERDOMAIN_VIRT_START) + i] =
  129.21 +            l2e_from_page(
  129.22 +                virt_to_page(page_get_owner(page)->arch.mm_perdomain_pt) + i,
  129.23 +                __PAGE_HYPERVISOR);
  129.24  #endif
  129.25  
  129.26      unmap_domain_page(pl2e);
  129.27 @@ -1457,7 +1457,8 @@ int get_page_type(struct pfn_info *page,
  129.28                       * was GDT/LDT) but those circumstances should be
  129.29                       * very rare.
  129.30                       */
  129.31 -                    cpumask_t mask = page_get_owner(page)->cpumask;
  129.32 +                    cpumask_t mask =
  129.33 +                        page_get_owner(page)->domain_dirty_cpumask;
  129.34                      tlbflush_filter(mask, page->tlbflush_timestamp);
  129.35  
  129.36                      if ( unlikely(!cpus_empty(mask)) )
  129.37 @@ -1619,7 +1620,7 @@ static void process_deferred_ops(unsigne
  129.38          if ( shadow_mode_enabled(d) )
  129.39              shadow_sync_all(d);
  129.40          if ( deferred_ops & DOP_FLUSH_ALL_TLBS )
  129.41 -            flush_tlb_mask(d->cpumask);
  129.42 +            flush_tlb_mask(d->domain_dirty_cpumask);
  129.43          else
  129.44              local_flush_tlb();
  129.45      }
  129.46 @@ -1691,7 +1692,7 @@ static inline cpumask_t vcpumask_to_pcpu
  129.47      struct domain *d, unsigned long vmask)
  129.48  {
  129.49      unsigned int vcpu_id;
  129.50 -    cpumask_t    pmask;
  129.51 +    cpumask_t    pmask = CPU_MASK_NONE;
  129.52      struct vcpu *v;
  129.53  
  129.54      while ( vmask != 0 )
  129.55 @@ -1700,7 +1701,7 @@ static inline cpumask_t vcpumask_to_pcpu
  129.56          vmask &= ~(1UL << vcpu_id);
  129.57          if ( (vcpu_id < MAX_VIRT_CPUS) &&
  129.58               ((v = d->vcpu[vcpu_id]) != NULL) )
  129.59 -            cpu_set(v->processor, pmask);
  129.60 +            cpus_or(pmask, pmask, v->vcpu_dirty_cpumask);
  129.61      }
  129.62  
  129.63      return pmask;
  129.64 @@ -1869,7 +1870,6 @@ int do_mmuext_op(
  129.65                  break;
  129.66              }
  129.67              pmask = vcpumask_to_pcpumask(d, vmask);
  129.68 -            cpus_and(pmask, pmask, d->cpumask);
  129.69              if ( op.cmd == MMUEXT_TLB_FLUSH_MULTI )
  129.70                  flush_tlb_mask(pmask);
  129.71              else
  129.72 @@ -1878,11 +1878,11 @@ int do_mmuext_op(
  129.73          }
  129.74  
  129.75          case MMUEXT_TLB_FLUSH_ALL:
  129.76 -            flush_tlb_mask(d->cpumask);
  129.77 +            flush_tlb_mask(d->domain_dirty_cpumask);
  129.78              break;
  129.79      
  129.80          case MMUEXT_INVLPG_ALL:
  129.81 -            flush_tlb_one_mask(d->cpumask, op.arg1.linear_addr);
  129.82 +            flush_tlb_one_mask(d->domain_dirty_cpumask, op.arg1.linear_addr);
  129.83              break;
  129.84  
  129.85          case MMUEXT_FLUSH_CACHE:
  129.86 @@ -2497,7 +2497,7 @@ int do_update_va_mapping(unsigned long v
  129.87      l1_pgentry_t   val = l1e_from_intpte(val64);
  129.88      struct vcpu   *v   = current;
  129.89      struct domain *d   = v->domain;
  129.90 -    unsigned int   cpu = v->processor;
  129.91 +    unsigned int   cpu = smp_processor_id();
  129.92      unsigned long  vmask, bmap_ptr;
  129.93      cpumask_t      pmask;
  129.94      int            rc  = 0;
  129.95 @@ -2548,13 +2548,12 @@ int do_update_va_mapping(unsigned long v
  129.96              local_flush_tlb();
  129.97              break;
  129.98          case UVMF_ALL:
  129.99 -            flush_tlb_mask(d->cpumask);
 129.100 +            flush_tlb_mask(d->domain_dirty_cpumask);
 129.101              break;
 129.102          default:
 129.103              if ( unlikely(get_user(vmask, (unsigned long *)bmap_ptr)) )
 129.104                  rc = -EFAULT;
 129.105              pmask = vcpumask_to_pcpumask(d, vmask);
 129.106 -            cpus_and(pmask, pmask, d->cpumask);
 129.107              flush_tlb_mask(pmask);
 129.108              break;
 129.109          }
 129.110 @@ -2569,13 +2568,12 @@ int do_update_va_mapping(unsigned long v
 129.111              local_flush_tlb_one(va);
 129.112              break;
 129.113          case UVMF_ALL:
 129.114 -            flush_tlb_one_mask(d->cpumask, va);
 129.115 +            flush_tlb_one_mask(d->domain_dirty_cpumask, va);
 129.116              break;
 129.117          default:
 129.118              if ( unlikely(get_user(vmask, (unsigned long *)bmap_ptr)) )
 129.119                  rc = -EFAULT;
 129.120              pmask = vcpumask_to_pcpumask(d, vmask);
 129.121 -            cpus_and(pmask, pmask, d->cpumask);
 129.122              flush_tlb_one_mask(pmask, va);
 129.123              break;
 129.124          }
 129.125 @@ -2972,7 +2970,6 @@ void ptwr_flush(struct domain *d, const 
 129.126  
 129.127  #ifdef CONFIG_X86_64
 129.128      struct vcpu *v = current;
 129.129 -    extern void toggle_guest_mode(struct vcpu *);
 129.130      int user_mode = !(v->arch.flags & TF_kernel_mode);
 129.131  #endif
 129.132  
 129.133 @@ -3002,7 +2999,7 @@ void ptwr_flush(struct domain *d, const 
 129.134          BUG();
 129.135      }
 129.136      PTWR_PRINTK("[%c] disconnected_l1va at %p is %"PRIpte"\n",
 129.137 -                PTWR_PRINT_WHICH, ptep, pte.l1);
 129.138 +                PTWR_PRINT_WHICH, ptep, l1e_get_intpte(pte));
 129.139      l1e_remove_flags(pte, _PAGE_RW);
 129.140  
 129.141      /* Write-protect the p.t. page in the guest page table. */
 129.142 @@ -3018,20 +3015,33 @@ void ptwr_flush(struct domain *d, const 
 129.143  
 129.144      /* Ensure that there are no stale writable mappings in any TLB. */
 129.145      /* NB. INVLPG is a serialising instruction: flushes pending updates. */
 129.146 -    flush_tlb_one_mask(d->cpumask, l1va);
 129.147 +    flush_tlb_one_mask(d->domain_dirty_cpumask, l1va);
 129.148      PTWR_PRINTK("[%c] disconnected_l1va at %p now %"PRIpte"\n",
 129.149 -                PTWR_PRINT_WHICH, ptep, pte.l1);
 129.150 +                PTWR_PRINT_WHICH, ptep, l1e_get_intpte(pte));
 129.151  
 129.152      /*
 129.153       * STEP 2. Validate any modified PTEs.
 129.154       */
 129.155  
 129.156 -    pl1e = d->arch.ptwr[which].pl1e;
 129.157 -    modified = revalidate_l1(d, pl1e, d->arch.ptwr[which].page);
 129.158 -    unmap_domain_page(pl1e);
 129.159 -    perfc_incr_histo(wpt_updates, modified, PT_UPDATES);
 129.160 -    ptwr_eip_stat_update(d->arch.ptwr[which].eip, d->domain_id, modified);
 129.161 -    d->arch.ptwr[which].prev_nr_updates = modified;
 129.162 +    if ( likely(d == current->domain) )
 129.163 +    {
 129.164 +        pl1e = map_domain_page(l1e_get_pfn(pte));
 129.165 +        modified = revalidate_l1(d, pl1e, d->arch.ptwr[which].page);
 129.166 +        unmap_domain_page(pl1e);
 129.167 +        perfc_incr_histo(wpt_updates, modified, PT_UPDATES);
 129.168 +        ptwr_eip_stat_update(d->arch.ptwr[which].eip, d->domain_id, modified);
 129.169 +        d->arch.ptwr[which].prev_nr_updates = modified;
 129.170 +    }
 129.171 +    else
 129.172 +    {
 129.173 +        /*
 129.174 +         * Must make a temporary global mapping, since we are running in the
 129.175 +         * wrong address space, so no access to our own mapcache.
 129.176 +         */
 129.177 +        pl1e = map_domain_page_global(l1e_get_pfn(pte));
 129.178 +        modified = revalidate_l1(d, pl1e, d->arch.ptwr[which].page);
 129.179 +        unmap_domain_page_global(pl1e);
 129.180 +    }
 129.181  
 129.182      /*
 129.183       * STEP 3. Reattach the L1 p.t. page into the current address space.
 129.184 @@ -3209,7 +3219,7 @@ int ptwr_do_page_fault(struct domain *d,
 129.185  {
 129.186      unsigned long    pfn;
 129.187      struct pfn_info *page;
 129.188 -    l1_pgentry_t     pte;
 129.189 +    l1_pgentry_t    *pl1e, pte;
 129.190      l2_pgentry_t    *pl2e, l2e;
 129.191      int              which, flags;
 129.192      unsigned long    l2_idx;
 129.193 @@ -3342,15 +3352,14 @@ int ptwr_do_page_fault(struct domain *d,
 129.194      if ( which == PTWR_PT_ACTIVE )
 129.195      {
 129.196          l2e_remove_flags(*pl2e, _PAGE_PRESENT);
 129.197 -        flush_tlb_mask(d->cpumask);
 129.198 +        flush_tlb_mask(d->domain_dirty_cpumask);
 129.199      }
 129.200      
 129.201      /* Temporarily map the L1 page, and make a copy of it. */
 129.202 -    d->arch.ptwr[which].pl1e = map_domain_page(pfn);
 129.203 -    memcpy(d->arch.ptwr[which].page,
 129.204 -           d->arch.ptwr[which].pl1e,
 129.205 -           L1_PAGETABLE_ENTRIES * sizeof(l1_pgentry_t));
 129.206 -    
 129.207 +    pl1e = map_domain_page(pfn);
 129.208 +    memcpy(d->arch.ptwr[which].page, pl1e, PAGE_SIZE);
 129.209 +    unmap_domain_page(pl1e);
 129.210 +
 129.211      /* Finally, make the p.t. page writable by the guest OS. */
 129.212      l1e_add_flags(pte, _PAGE_RW);
 129.213      if ( unlikely(__put_user(pte.l1,
 129.214 @@ -3359,7 +3368,6 @@ int ptwr_do_page_fault(struct domain *d,
 129.215          MEM_LOG("ptwr: Could not update pte at %p", (unsigned long *)
 129.216                  &linear_pg_table[l1_linear_offset(addr)]);
 129.217          /* Toss the writable pagetable state and crash. */
 129.218 -        unmap_domain_page(d->arch.ptwr[which].pl1e);
 129.219          d->arch.ptwr[which].l1va = 0;
 129.220          domain_crash(d);
 129.221          return 0;
 129.222 @@ -3369,7 +3377,7 @@ int ptwr_do_page_fault(struct domain *d,
 129.223  
 129.224   emulate:
 129.225      if ( x86_emulate_memop(guest_cpu_user_regs(), addr,
 129.226 -                           &ptwr_mem_emulator, BITS_PER_LONG/8) )
 129.227 +                           &ptwr_mem_emulator, X86EMUL_MODE_HOST) )
 129.228          return 0;
 129.229      perfc_incrc(ptwr_emulations);
 129.230      return EXCRET_fault_fixed;
   130.1 --- a/xen/arch/x86/nmi.c	Fri Jan 13 10:38:44 2006 -0600
   130.2 +++ b/xen/arch/x86/nmi.c	Fri Jan 13 14:12:24 2006 -0600
   130.3 @@ -23,18 +23,20 @@
   130.4  #include <xen/sched.h>
   130.5  #include <xen/console.h>
   130.6  #include <xen/smp.h>
   130.7 +#include <xen/keyhandler.h>
   130.8  #include <asm/current.h>
   130.9  #include <asm/mc146818rtc.h>
  130.10  #include <asm/msr.h>
  130.11  #include <asm/mpspec.h>
  130.12  #include <asm/debugger.h>
  130.13  #include <asm/div64.h>
  130.14 +#include <asm/apic.h>
  130.15  
  130.16  unsigned int nmi_watchdog = NMI_NONE;
  130.17  static unsigned int nmi_hz = HZ;
  130.18  static unsigned int nmi_perfctr_msr;	/* the MSR to reset in NMI handler */
  130.19  static unsigned int nmi_p4_cccr_val;
  130.20 -static struct ac_timer nmi_timer[NR_CPUS];
  130.21 +static struct timer nmi_timer[NR_CPUS];
  130.22  static unsigned int nmi_timer_ticks[NR_CPUS];
  130.23  
  130.24  /*
  130.25 @@ -132,7 +134,7 @@ static void nmi_timer_fn(void *unused)
  130.26  {
  130.27      int cpu = smp_processor_id();
  130.28      nmi_timer_ticks[cpu]++;
  130.29 -    set_ac_timer(&nmi_timer[cpu], NOW() + MILLISECS(1000));
  130.30 +    set_timer(&nmi_timer[cpu], NOW() + MILLISECS(1000));
  130.31  }
  130.32  
  130.33  static void disable_lapic_nmi_watchdog(void)
  130.34 @@ -308,8 +310,6 @@ static int __pminit setup_p4_watchdog(vo
  130.35  
  130.36  void __pminit setup_apic_nmi_watchdog(void)
  130.37  {
  130.38 -    int cpu = smp_processor_id();
  130.39 -
  130.40      if (!nmi_watchdog)
  130.41          return;
  130.42  
  130.43 @@ -344,49 +344,37 @@ void __pminit setup_apic_nmi_watchdog(vo
  130.44  
  130.45      lapic_nmi_owner = LAPIC_NMI_WATCHDOG;
  130.46      nmi_active = 1;
  130.47 -
  130.48 -    init_ac_timer(&nmi_timer[cpu], nmi_timer_fn, NULL, cpu);
  130.49  }
  130.50  
  130.51  static unsigned int
  130.52  last_irq_sums [NR_CPUS],
  130.53      alert_counter [NR_CPUS];
  130.54  
  130.55 -static spinlock_t   watchdog_lock = SPIN_LOCK_UNLOCKED;
  130.56 -static unsigned int watchdog_disable_count = 1;
  130.57 -static unsigned int watchdog_on;
  130.58 +static atomic_t watchdog_disable_count = ATOMIC_INIT(1);
  130.59  
  130.60  void watchdog_disable(void)
  130.61  {
  130.62 -    unsigned long flags;
  130.63 -
  130.64 -    spin_lock_irqsave(&watchdog_lock, flags);
  130.65 -
  130.66 -    if ( watchdog_disable_count++ == 0 )
  130.67 -        watchdog_on = 0;
  130.68 -
  130.69 -    spin_unlock_irqrestore(&watchdog_lock, flags);
  130.70 +    atomic_inc(&watchdog_disable_count);
  130.71  }
  130.72  
  130.73  void watchdog_enable(void)
  130.74  {
  130.75 -    unsigned int  cpu;
  130.76 -    unsigned long flags;
  130.77 +    static unsigned long heartbeat_initialised;
  130.78 +    unsigned int cpu;
  130.79  
  130.80 -    spin_lock_irqsave(&watchdog_lock, flags);
  130.81 +    if ( !atomic_dec_and_test(&watchdog_disable_count) ||
  130.82 +         test_and_set_bit(0, &heartbeat_initialised) )
  130.83 +        return;
  130.84  
  130.85 -    if ( --watchdog_disable_count == 0 )
  130.86 +    /*
  130.87 +     * Activate periodic heartbeats. We cannot do this earlier during 
  130.88 +     * setup because the timer infrastructure is not available.
  130.89 +     */
  130.90 +    for_each_online_cpu ( cpu )
  130.91      {
  130.92 -        watchdog_on = 1;
  130.93 -        /*
  130.94 -         * Ensure periodic heartbeats are active. We cannot do this earlier
  130.95 -         * during setup because the timer infrastructure is not available. 
  130.96 -         */
  130.97 -        for_each_online_cpu ( cpu )
  130.98 -            set_ac_timer(&nmi_timer[cpu], NOW());
  130.99 +        init_timer(&nmi_timer[cpu], nmi_timer_fn, NULL, cpu);
 130.100 +        set_timer(&nmi_timer[cpu], NOW());
 130.101      }
 130.102 -
 130.103 -    spin_unlock_irqrestore(&watchdog_lock, flags);
 130.104  }
 130.105  
 130.106  void nmi_watchdog_tick(struct cpu_user_regs * regs)
 130.107 @@ -395,7 +383,7 @@ void nmi_watchdog_tick(struct cpu_user_r
 130.108  
 130.109      sum = nmi_timer_ticks[cpu];
 130.110  
 130.111 -    if ( (last_irq_sums[cpu] == sum) && watchdog_on )
 130.112 +    if ( (last_irq_sums[cpu] == sum) && !atomic_read(&watchdog_disable_count) )
 130.113      {
 130.114          /*
 130.115           * Ayiee, looks like this CPU is stuck ... wait a few IRQs (5 seconds) 
 130.116 @@ -440,3 +428,29 @@ void nmi_watchdog_tick(struct cpu_user_r
 130.117          write_watchdog_counter(NULL);
 130.118      }
 130.119  }
 130.120 +
 130.121 +/*
 130.122 + * For some reason the destination shorthand for self is not valid
 130.123 + * when used with the NMI delivery mode. This is documented in Tables
 130.124 + * 8-3 and 8-4 in IA32 Reference Manual Volume 3. We send the IPI to
 130.125 + * our own APIC ID explicitly which is valid.
 130.126 + */
 130.127 +static void do_nmi_trigger(unsigned char key)
 130.128 +{
 130.129 +    u32 id = apic_read(APIC_ID);
 130.130 +
 130.131 +    printk("Triggering NMI on APIC ID %x\n", id);
 130.132 +
 130.133 +    local_irq_disable();
 130.134 +    apic_wait_icr_idle();
 130.135 +    apic_write_around(APIC_ICR2, SET_APIC_DEST_FIELD(id));
 130.136 +    apic_write_around(APIC_ICR, APIC_DM_NMI | APIC_INT_ASSERT);
 130.137 +    local_irq_enable();
 130.138 +}
 130.139 +
 130.140 +static __init int register_nmi_trigger(void)
 130.141 +{
 130.142 +    register_keyhandler('n', do_nmi_trigger, "trigger an NMI");
 130.143 +    return 0;
 130.144 +}
 130.145 +__initcall(register_nmi_trigger);
   131.1 --- a/xen/arch/x86/setup.c	Fri Jan 13 10:38:44 2006 -0600
   131.2 +++ b/xen/arch/x86/setup.c	Fri Jan 13 14:12:24 2006 -0600
   131.3 @@ -81,6 +81,10 @@ extern void early_time_init(void);
   131.4  extern void initialize_keytable(void);
   131.5  extern void early_cpu_init(void);
   131.6  
   131.7 +struct tss_struct init_tss[NR_CPUS];
   131.8 +
   131.9 +struct vcpu *idle_vcpu[NR_CPUS];
  131.10 +
  131.11  extern unsigned long cpu0_stack[];
  131.12  
  131.13  struct cpuinfo_x86 boot_cpu_data = { 0, 0, 0, 0, -1, 1, 0, 0, -1 };
  131.14 @@ -92,8 +96,6 @@ unsigned long mmu_cr4_features = X86_CR4
  131.15  #endif
  131.16  EXPORT_SYMBOL(mmu_cr4_features);
  131.17  
  131.18 -struct vcpu *idle_task[NR_CPUS] = { &idle0_vcpu };
  131.19 -
  131.20  int acpi_disabled;
  131.21  
  131.22  int acpi_force;
  131.23 @@ -144,8 +146,8 @@ static struct e820entry e820_raw[E820MAX
  131.24  
  131.25  void __init __start_xen(multiboot_info_t *mbi)
  131.26  {
  131.27 -    unsigned long vgdt, gdt_pfn;
  131.28      char *cmdline;
  131.29 +    struct domain *idle_domain;
  131.30      unsigned long _initrd_start = 0, _initrd_len = 0;
  131.31      unsigned int initrdidx = 1;
  131.32      module_t *mod = (module_t *)__va(mbi->mods_addr);
  131.33 @@ -163,9 +165,8 @@ void __init __start_xen(multiboot_info_t
  131.34      if ( (mbi->flags & MBI_CMDLINE) && (mbi->cmdline != 0) )
  131.35          cmdline_parse(__va(mbi->cmdline));
  131.36  
  131.37 -    /* Must do this early -- e.g., spinlocks rely on get_current(). */
  131.38 -    set_current(&idle0_vcpu);
  131.39 -    set_processor_id(0);
  131.40 +    set_current((struct vcpu *)0xfffff000); /* debug sanity */
  131.41 +    set_processor_id(0); /* needed early, for smp_processor_id() */
  131.42  
  131.43      smp_prepare_boot_cpu();
  131.44  
  131.45 @@ -343,6 +344,12 @@ void __init __start_xen(multiboot_info_t
  131.46      BUG_ON(sizeof(shared_info_t) > PAGE_SIZE);
  131.47      BUG_ON(sizeof(vcpu_info_t) != 64);
  131.48  
  131.49 +    /* __foo are defined in public headers. Check they match internal defs. */
  131.50 +    BUG_ON(__HYPERVISOR_VIRT_START != HYPERVISOR_VIRT_START);
  131.51 +#ifdef HYPERVISOR_VIRT_END
  131.52 +    BUG_ON(__HYPERVISOR_VIRT_END   != HYPERVISOR_VIRT_END);
  131.53 +#endif
  131.54 +
  131.55      init_frametable();
  131.56  
  131.57      end_boot_allocator();
  131.58 @@ -376,6 +383,14 @@ void __init __start_xen(multiboot_info_t
  131.59  
  131.60      early_cpu_init();
  131.61  
  131.62 +    scheduler_init();
  131.63 +
  131.64 +    idle_domain = do_createdomain(IDLE_DOMAIN_ID, 0);
  131.65 +    BUG_ON(idle_domain == NULL);
  131.66 +
  131.67 +    set_current(idle_domain->vcpu[0]);
  131.68 +    idle_vcpu[0] = current;
  131.69 +
  131.70      paging_init();
  131.71  
  131.72      /* Unmap the first page of CPU0's stack. */
  131.73 @@ -388,21 +403,6 @@ void __init __start_xen(multiboot_info_t
  131.74  
  131.75      sort_exception_tables();
  131.76  
  131.77 -    if ( arch_do_createdomain(current) != 0 )
  131.78 -        BUG();
  131.79 -
  131.80 -    /*
  131.81 -     * Map default GDT into its final positions in the idle page table. As
  131.82 -     * noted in arch_do_createdomain(), we must map for every possible VCPU#.
  131.83 -     */
  131.84 -    vgdt = GDT_VIRT_START(current) + FIRST_RESERVED_GDT_BYTE;
  131.85 -    gdt_pfn = virt_to_phys(gdt_table) >> PAGE_SHIFT;
  131.86 -    for ( i = 0; i < MAX_VIRT_CPUS; i++ )
  131.87 -    {
  131.88 -        map_pages_to_xen(vgdt, gdt_pfn, 1, PAGE_HYPERVISOR);
  131.89 -        vgdt += 1 << PDPT_VCPU_VA_SHIFT;
  131.90 -    }
  131.91 -
  131.92      find_smp_config();
  131.93  
  131.94      smp_alloc_memory();
  131.95 @@ -423,14 +423,12 @@ void __init __start_xen(multiboot_info_t
  131.96  
  131.97      trap_init();
  131.98  
  131.99 -    ac_timer_init();
 131.100 +    timer_init();
 131.101  
 131.102      early_time_init();
 131.103  
 131.104      arch_init_memory();
 131.105  
 131.106 -    scheduler_init();
 131.107 -
 131.108      identify_cpu(&boot_cpu_data);
 131.109      if ( cpu_has_fxsr )
 131.110          set_in_cr4(X86_CR4_OSFXSR);
 131.111 @@ -480,7 +478,8 @@ void __init __start_xen(multiboot_info_t
 131.112  
 131.113      schedulers_start();
 131.114  
 131.115 -    watchdog_enable();
 131.116 +    if ( opt_watchdog ) 
 131.117 +        watchdog_enable();
 131.118  
 131.119      shadow_mode_init();
 131.120  
   132.1 --- a/xen/arch/x86/shadow.c	Fri Jan 13 10:38:44 2006 -0600
   132.2 +++ b/xen/arch/x86/shadow.c	Fri Jan 13 14:12:24 2006 -0600
   132.3 @@ -469,6 +469,7 @@ static unsigned long shadow_l2_table(
   132.4  {
   132.5      unsigned long smfn;
   132.6      l2_pgentry_t *spl2e;
   132.7 +    int i;
   132.8  
   132.9      SH_VVLOG("shadow_l2_table(gpfn=%lx, gmfn=%lx)", gpfn, gmfn);
  132.10  
  132.11 @@ -503,9 +504,11 @@ static unsigned long shadow_l2_table(
  132.12          spl2e[l2_table_offset(SH_LINEAR_PT_VIRT_START)] =
  132.13              l2e_from_pfn(smfn, __PAGE_HYPERVISOR);
  132.14  
  132.15 -        spl2e[l2_table_offset(PERDOMAIN_VIRT_START)] =
  132.16 -            l2e_from_paddr(__pa(page_get_owner(pfn_to_page(gmfn))->arch.mm_perdomain_pt),
  132.17 -                            __PAGE_HYPERVISOR);
  132.18 +        for ( i = 0; i < PDPT_L2_ENTRIES; i++ )
  132.19 +            spl2e[l2_table_offset(PERDOMAIN_VIRT_START) + i] =
  132.20 +                l2e_from_page(virt_to_page(page_get_owner(pfn_to_page(gmfn))->
  132.21 +                                           arch.mm_perdomain_pt) + i,
  132.22 +                              __PAGE_HYPERVISOR);
  132.23  
  132.24          if ( shadow_mode_translate(d) ) // NB: not external
  132.25          {
  132.26 @@ -1800,7 +1803,7 @@ static void sync_all(struct domain *d)
  132.27      }
  132.28  
  132.29      /* Other VCPUs mustn't use the revoked writable mappings. */
  132.30 -    other_vcpus_mask = d->cpumask;
  132.31 +    other_vcpus_mask = d->domain_dirty_cpumask;
  132.32      cpu_clear(smp_processor_id(), other_vcpus_mask);
  132.33      flush_tlb_mask(other_vcpus_mask);
  132.34  
  132.35 @@ -2150,8 +2153,8 @@ static void shadow_update_pagetables(str
  132.36      if ( max_mode & (SHM_enable | SHM_external) )
  132.37      {
  132.38          if ( likely(v->arch.guest_vtable != NULL) )
  132.39 -            unmap_domain_page(v->arch.guest_vtable);
  132.40 -        v->arch.guest_vtable = map_domain_page(gmfn);
  132.41 +            unmap_domain_page_global(v->arch.guest_vtable);
  132.42 +        v->arch.guest_vtable = map_domain_page_global(gmfn);
  132.43      }
  132.44  
  132.45      /*
  132.46 @@ -2187,8 +2190,8 @@ static void shadow_update_pagetables(str
  132.47          )
  132.48      {
  132.49          if ( v->arch.shadow_vtable )
  132.50 -            unmap_domain_page(v->arch.shadow_vtable);
  132.51 -        v->arch.shadow_vtable = map_domain_page(smfn);
  132.52 +            unmap_domain_page_global(v->arch.shadow_vtable);
  132.53 +        v->arch.shadow_vtable = map_domain_page_global(smfn);
  132.54      }
  132.55  
  132.56  #if CONFIG_PAGING_LEVELS == 2
  132.57 @@ -2204,8 +2207,8 @@ static void shadow_update_pagetables(str
  132.58          if ( unlikely(!(hl2mfn = __shadow_status(d, gpfn, PGT_hl2_shadow))) )
  132.59              hl2mfn = shadow_hl2_table(d, gpfn, gmfn, smfn);
  132.60          if ( v->arch.hl2_vtable )
  132.61 -            unmap_domain_page(v->arch.hl2_vtable);
  132.62 -        v->arch.hl2_vtable = map_domain_page(hl2mfn);
  132.63 +            unmap_domain_page_global(v->arch.hl2_vtable);
  132.64 +        v->arch.hl2_vtable = map_domain_page_global(hl2mfn);
  132.65      }
  132.66  
  132.67      /*
   133.1 --- a/xen/arch/x86/shadow32.c	Fri Jan 13 10:38:44 2006 -0600
   133.2 +++ b/xen/arch/x86/shadow32.c	Fri Jan 13 14:12:24 2006 -0600
   133.3 @@ -726,6 +726,7 @@ static void alloc_monitor_pagetable(stru
   133.4      l2_pgentry_t *mpl2e;
   133.5      struct pfn_info *mmfn_info;
   133.6      struct domain *d = v->domain;
   133.7 +    int i;
   133.8  
   133.9      ASSERT(pagetable_get_paddr(v->arch.monitor_table) == 0);
  133.10  
  133.11 @@ -733,16 +734,17 @@ static void alloc_monitor_pagetable(stru
  133.12      ASSERT(mmfn_info != NULL);
  133.13  
  133.14      mmfn = page_to_pfn(mmfn_info);
  133.15 -    mpl2e = (l2_pgentry_t *)map_domain_page(mmfn);
  133.16 +    mpl2e = (l2_pgentry_t *)map_domain_page_global(mmfn);
  133.17      memset(mpl2e, 0, PAGE_SIZE);
  133.18  
  133.19      memcpy(&mpl2e[DOMAIN_ENTRIES_PER_L2_PAGETABLE], 
  133.20             &idle_pg_table[DOMAIN_ENTRIES_PER_L2_PAGETABLE],
  133.21             HYPERVISOR_ENTRIES_PER_L2_PAGETABLE * sizeof(l2_pgentry_t));
  133.22  
  133.23 -    mpl2e[l2_table_offset(PERDOMAIN_VIRT_START)] =
  133.24 -        l2e_from_paddr(__pa(d->arch.mm_perdomain_pt),
  133.25 -                        __PAGE_HYPERVISOR);
  133.26 +    for ( i = 0; i < PDPT_L2_ENTRIES; i++ )
  133.27 +        mpl2e[l2_table_offset(PERDOMAIN_VIRT_START) + i] =
  133.28 +            l2e_from_page(virt_to_page(d->arch.mm_perdomain_pt) + i,
  133.29 +                          __PAGE_HYPERVISOR);
  133.30  
  133.31      // map the phys_to_machine map into the Read-Only MPT space for this domain
  133.32      mpl2e[l2_table_offset(RO_MPT_VIRT_START)] =
  133.33 @@ -794,7 +796,7 @@ void free_monitor_pagetable(struct vcpu 
  133.34       * Then free monitor_table.
  133.35       */
  133.36      mfn = pagetable_get_pfn(v->arch.monitor_table);
  133.37 -    unmap_domain_page(v->arch.monitor_vtable);
  133.38 +    unmap_domain_page_global(v->arch.monitor_vtable);
  133.39      free_domheap_page(pfn_to_page(mfn));
  133.40  
  133.41      v->arch.monitor_table = mk_pagetable(0);
  133.42 @@ -929,7 +931,7 @@ int __shadow_mode_enable(struct domain *
  133.43          if ( v->arch.guest_vtable &&
  133.44               (v->arch.guest_vtable != __linear_l2_table) )
  133.45          {
  133.46 -            unmap_domain_page(v->arch.guest_vtable);
  133.47 +            unmap_domain_page_global(v->arch.guest_vtable);
  133.48          }
  133.49          if ( (mode & (SHM_translate | SHM_external)) == SHM_translate )
  133.50              v->arch.guest_vtable = __linear_l2_table;
  133.51 @@ -942,7 +944,7 @@ int __shadow_mode_enable(struct domain *
  133.52          if ( v->arch.shadow_vtable &&
  133.53               (v->arch.shadow_vtable != __shadow_linear_l2_table) )
  133.54          {
  133.55 -            unmap_domain_page(v->arch.shadow_vtable);
  133.56 +            unmap_domain_page_global(v->arch.shadow_vtable);
  133.57          }
  133.58          if ( !(mode & SHM_external) )
  133.59              v->arch.shadow_vtable = __shadow_linear_l2_table;
  133.60 @@ -955,7 +957,7 @@ int __shadow_mode_enable(struct domain *
  133.61          if ( v->arch.hl2_vtable &&
  133.62               (v->arch.hl2_vtable != __linear_hl2_table) )
  133.63          {
  133.64 -            unmap_domain_page(v->arch.hl2_vtable);
  133.65 +            unmap_domain_page_global(v->arch.hl2_vtable);
  133.66          }
  133.67          if ( (mode & (SHM_translate | SHM_external)) == SHM_translate )
  133.68              v->arch.hl2_vtable = __linear_hl2_table;
  133.69 @@ -1508,6 +1510,7 @@ static unsigned long shadow_l2_table(
  133.70  {
  133.71      unsigned long smfn;
  133.72      l2_pgentry_t *spl2e;
  133.73 +    int i;
  133.74  
  133.75      SH_VVLOG("shadow_l2_table(gpfn=%lx, gmfn=%lx)", gpfn, gmfn);
  133.76  
  133.77 @@ -1542,9 +1545,11 @@ static unsigned long shadow_l2_table(
  133.78          spl2e[l2_table_offset(SH_LINEAR_PT_VIRT_START)] =
  133.79              l2e_from_pfn(smfn, __PAGE_HYPERVISOR);
  133.80  
  133.81 -        spl2e[l2_table_offset(PERDOMAIN_VIRT_START)] =
  133.82 -            l2e_from_paddr(__pa(page_get_owner(pfn_to_page(gmfn))->arch.mm_perdomain_pt),
  133.83 -                            __PAGE_HYPERVISOR);
  133.84 +        for ( i = 0; i < PDPT_L2_ENTRIES; i++ )
  133.85 +            spl2e[l2_table_offset(PERDOMAIN_VIRT_START) + i] =
  133.86 +            l2e_from_page(virt_to_page(page_get_owner(pfn_to_page(gmfn))->
  133.87 +                                       arch.mm_perdomain_pt) + i,
  133.88 +                          __PAGE_HYPERVISOR);
  133.89  
  133.90          if ( shadow_mode_translate(d) ) // NB: not external
  133.91          {
  133.92 @@ -2586,7 +2591,7 @@ void __shadow_sync_all(struct domain *d)
  133.93      }
  133.94  
  133.95      /* Other VCPUs mustn't use the revoked writable mappings. */
  133.96 -    other_vcpus_mask = d->cpumask;
  133.97 +    other_vcpus_mask = d->domain_dirty_cpumask;
  133.98      cpu_clear(smp_processor_id(), other_vcpus_mask);
  133.99      flush_tlb_mask(other_vcpus_mask);
 133.100  
 133.101 @@ -2906,8 +2911,8 @@ void __update_pagetables(struct vcpu *v)
 133.102      if ( max_mode & (SHM_enable | SHM_external) )
 133.103      {
 133.104          if ( likely(v->arch.guest_vtable != NULL) )
 133.105 -            unmap_domain_page(v->arch.guest_vtable);
 133.106 -        v->arch.guest_vtable = map_domain_page(gmfn);
 133.107 +            unmap_domain_page_global(v->arch.guest_vtable);
 133.108 +        v->arch.guest_vtable = map_domain_page_global(gmfn);
 133.109      }
 133.110  
 133.111      /*
 133.112 @@ -2932,8 +2937,8 @@ void __update_pagetables(struct vcpu *v)
 133.113      if ( max_mode == SHM_external )
 133.114      {
 133.115          if ( v->arch.shadow_vtable )
 133.116 -            unmap_domain_page(v->arch.shadow_vtable);
 133.117 -        v->arch.shadow_vtable = map_domain_page(smfn);
 133.118 +            unmap_domain_page_global(v->arch.shadow_vtable);
 133.119 +        v->arch.shadow_vtable = map_domain_page_global(smfn);
 133.120      }
 133.121  
 133.122      /*
 133.123 @@ -2948,8 +2953,8 @@ void __update_pagetables(struct vcpu *v)
 133.124          if ( unlikely(!(hl2mfn = __shadow_status(d, gpfn, PGT_hl2_shadow))) )
 133.125              hl2mfn = shadow_hl2_table(d, gpfn, gmfn, smfn);
 133.126          if ( v->arch.hl2_vtable )
 133.127 -            unmap_domain_page(v->arch.hl2_vtable);
 133.128 -        v->arch.hl2_vtable = map_domain_page(hl2mfn);
 133.129 +            unmap_domain_page_global(v->arch.hl2_vtable);
 133.130 +        v->arch.hl2_vtable = map_domain_page_global(hl2mfn);
 133.131      }
 133.132  
 133.133      /*
   134.1 --- a/xen/arch/x86/shadow_public.c	Fri Jan 13 10:38:44 2006 -0600
   134.2 +++ b/xen/arch/x86/shadow_public.c	Fri Jan 13 14:12:24 2006 -0600
   134.3 @@ -151,6 +151,8 @@ free_shadow_fl1_table(struct domain *d, 
   134.4  
   134.5      for (i = 0; i < L1_PAGETABLE_ENTRIES; i++)
   134.6          put_page_from_l1e(pl1e[i], d);
   134.7 +
   134.8 +    unmap_domain_page(pl1e);
   134.9  }
  134.10  
  134.11  /*
  134.12 @@ -254,6 +256,7 @@ static pagetable_t page_table_convert(st
  134.13      pae_l3 = map_domain_page(pagetable_get_pfn(d->arch.phys_table));
  134.14      for (i = 0; i < PDP_ENTRIES; i++)
  134.15          l3[i] = l3e_from_pfn(l3e_get_pfn(pae_l3[i]), __PAGE_HYPERVISOR);
  134.16 +    unmap_domain_page(pae_l3);
  134.17  
  134.18      unmap_domain_page(l4);
  134.19      unmap_domain_page(l3);
  134.20 @@ -275,7 +278,7 @@ static void alloc_monitor_pagetable(stru
  134.21      ASSERT( mmfn_info );
  134.22  
  134.23      mmfn = page_to_pfn(mmfn_info);
  134.24 -    mpl4e = (l4_pgentry_t *) map_domain_page(mmfn);
  134.25 +    mpl4e = (l4_pgentry_t *) map_domain_page_global(mmfn);
  134.26      memcpy(mpl4e, &idle_pg_table[0], PAGE_SIZE);
  134.27      mpl4e[l4_table_offset(PERDOMAIN_VIRT_START)] =
  134.28          l4e_from_paddr(__pa(d->arch.mm_perdomain_l3), __PAGE_HYPERVISOR);
  134.29 @@ -298,7 +301,7 @@ void free_monitor_pagetable(struct vcpu 
  134.30       * free monitor_table.
  134.31       */
  134.32      mfn = pagetable_get_pfn(v->arch.monitor_table);
  134.33 -    unmap_domain_page(v->arch.monitor_vtable);
  134.34 +    unmap_domain_page_global(v->arch.monitor_vtable);
  134.35      free_domheap_page(pfn_to_page(mfn));
  134.36  
  134.37      v->arch.monitor_table = mk_pagetable(0);
  134.38 @@ -325,6 +328,7 @@ static void alloc_monitor_pagetable(stru
  134.39      l2_pgentry_t *mpl2e;
  134.40      struct pfn_info *mmfn_info;
  134.41      struct domain *d = v->domain;
  134.42 +    int i;
  134.43  
  134.44      ASSERT(pagetable_get_paddr(v->arch.monitor_table) == 0);
  134.45  
  134.46 @@ -332,16 +336,17 @@ static void alloc_monitor_pagetable(stru
  134.47      ASSERT(mmfn_info != NULL);
  134.48  
  134.49      mmfn = page_to_pfn(mmfn_info);
  134.50 -    mpl2e = (l2_pgentry_t *)map_domain_page(mmfn);
  134.51 +    mpl2e = (l2_pgentry_t *)map_domain_page_global(mmfn);
  134.52      memset(mpl2e, 0, PAGE_SIZE);
  134.53  
  134.54      memcpy(&mpl2e[DOMAIN_ENTRIES_PER_L2_PAGETABLE], 
  134.55             &idle_pg_table[DOMAIN_ENTRIES_PER_L2_PAGETABLE],
  134.56             HYPERVISOR_ENTRIES_PER_L2_PAGETABLE * sizeof(l2_pgentry_t));
  134.57  
  134.58 -    mpl2e[l2_table_offset(PERDOMAIN_VIRT_START)] =
  134.59 -        l2e_from_paddr(__pa(d->arch.mm_perdomain_pt),
  134.60 -                       __PAGE_HYPERVISOR);
  134.61 +    for ( i = 0; i < PDPT_L2_ENTRIES; i++ )
  134.62 +        mpl2e[l2_table_offset(PERDOMAIN_VIRT_START) + i] =
  134.63 +            l2e_from_page(virt_to_page(d->arch.mm_perdomain_pt) + i,
  134.64 +                          __PAGE_HYPERVISOR);
  134.65  
  134.66      // map the phys_to_machine map into the Read-Only MPT space for this domain
  134.67      mpl2e[l2_table_offset(RO_MPT_VIRT_START)] =
  134.68 @@ -393,7 +398,7 @@ void free_monitor_pagetable(struct vcpu 
  134.69       * Then free monitor_table.
  134.70       */
  134.71      mfn = pagetable_get_pfn(v->arch.monitor_table);
  134.72 -    unmap_domain_page(v->arch.monitor_vtable);
  134.73 +    unmap_domain_page_global(v->arch.monitor_vtable);
  134.74      free_domheap_page(pfn_to_page(mfn));
  134.75  
  134.76      v->arch.monitor_table = mk_pagetable(0);
  134.77 @@ -977,7 +982,7 @@ int __shadow_mode_enable(struct domain *
  134.78          if ( v->arch.guest_vtable &&
  134.79               (v->arch.guest_vtable != __linear_l2_table) )
  134.80          {
  134.81 -            unmap_domain_page(v->arch.guest_vtable);
  134.82 +            unmap_domain_page_global(v->arch.guest_vtable);
  134.83          }
  134.84          if ( (mode & (SHM_translate | SHM_external)) == SHM_translate )
  134.85              v->arch.guest_vtable = __linear_l2_table;
  134.86 @@ -990,7 +995,7 @@ int __shadow_mode_enable(struct domain *
  134.87          if ( v->arch.shadow_vtable &&
  134.88               (v->arch.shadow_vtable != __shadow_linear_l2_table) )
  134.89          {
  134.90 -            unmap_domain_page(v->arch.shadow_vtable);
  134.91 +            unmap_domain_page_global(v->arch.shadow_vtable);
  134.92          }
  134.93          if ( !(mode & SHM_external) && d->arch.ops->guest_paging_levels == 2)
  134.94              v->arch.shadow_vtable = __shadow_linear_l2_table;
  134.95 @@ -1004,7 +1009,7 @@ int __shadow_mode_enable(struct domain *
  134.96          if ( v->arch.hl2_vtable &&
  134.97               (v->arch.hl2_vtable != __linear_hl2_table) )
  134.98          {
  134.99 -            unmap_domain_page(v->arch.hl2_vtable);
 134.100 +            unmap_domain_page_global(v->arch.hl2_vtable);
 134.101          }
 134.102          if ( (mode & (SHM_translate | SHM_external)) == SHM_translate )
 134.103              v->arch.hl2_vtable = __linear_hl2_table;
   135.1 --- a/xen/arch/x86/smpboot.c	Fri Jan 13 10:38:44 2006 -0600
   135.2 +++ b/xen/arch/x86/smpboot.c	Fri Jan 13 14:12:24 2006 -0600
   135.3 @@ -435,7 +435,7 @@ void __init start_secondary(void *unused
   135.4  
   135.5  	extern void percpu_traps_init(void);
   135.6  
   135.7 -	set_current(idle_task[cpu]);
   135.8 +	set_current(idle_vcpu[cpu]);
   135.9  	set_processor_id(cpu);
  135.10  
  135.11  	percpu_traps_init();
  135.12 @@ -761,7 +761,6 @@ static int __init do_boot_cpu(int apicid
  135.13   * Returns zero if CPU booted OK, else error code from wakeup_secondary_cpu.
  135.14   */
  135.15  {
  135.16 -	struct domain *idle;
  135.17  	struct vcpu *v;
  135.18  	unsigned long boot_error;
  135.19  	int timeout, cpu;
  135.20 @@ -770,14 +769,10 @@ static int __init do_boot_cpu(int apicid
  135.21  
  135.22  	cpu = ++cpucount;
  135.23  
  135.24 -	if ( (idle = do_createdomain(IDLE_DOMAIN_ID, cpu)) == NULL )
  135.25 -		panic("failed 'createdomain' for CPU %d", cpu);
  135.26 +	v = idle_vcpu[cpu] = alloc_vcpu(idle_vcpu[0]->domain, cpu, cpu);
  135.27 +        BUG_ON(v == NULL);
  135.28  
  135.29 -	v = idle_task[cpu] = idle->vcpu[0];
  135.30 -
  135.31 -	set_bit(_DOMF_idle_domain, &idle->domain_flags);
  135.32 -
  135.33 -	v->arch.monitor_table = mk_pagetable(__pa(idle_pg_table));
  135.34 +        v->arch.monitor_table = mk_pagetable(__pa(idle_pg_table));
  135.35  
  135.36  	/* start_eip had better be page-aligned! */
  135.37  	start_eip = setup_trampoline();
   136.1 --- a/xen/arch/x86/time.c	Fri Jan 13 10:38:44 2006 -0600
   136.2 +++ b/xen/arch/x86/time.c	Fri Jan 13 14:12:24 2006 -0600
   136.3 @@ -17,7 +17,7 @@
   136.4  #include <xen/config.h>
   136.5  #include <xen/init.h>
   136.6  #include <xen/time.h>
   136.7 -#include <xen/ac_timer.h>
   136.8 +#include <xen/timer.h>
   136.9  #include <xen/smp.h>
  136.10  #include <xen/irq.h>
  136.11  #include <xen/softirq.h>
  136.12 @@ -56,7 +56,7 @@ struct cpu_time {
  136.13      s_time_t stime_local_stamp;
  136.14      s_time_t stime_master_stamp;
  136.15      struct time_scale tsc_scale;
  136.16 -    struct ac_timer calibration_timer;
  136.17 +    struct timer calibration_timer;
  136.18  } __cacheline_aligned;
  136.19  
  136.20  static struct cpu_time cpu_time[NR_CPUS];
  136.21 @@ -163,7 +163,7 @@ void timer_interrupt(int irq, void *dev_
  136.22  
  136.23      /* Rough hack to allow accurate timers to sort-of-work with no APIC. */
  136.24      if ( !cpu_has_apic )
  136.25 -        raise_softirq(AC_TIMER_SOFTIRQ);
  136.26 +        raise_softirq(TIMER_SOFTIRQ);
  136.27  
  136.28      if ( using_pit )
  136.29          pit_overflow();
  136.30 @@ -342,7 +342,7 @@ static void init_pit(void)
  136.31  /* Protected by platform_timer_lock. */
  136.32  static u64 hpet_counter64, hpet_overflow_period;
  136.33  static u32 hpet_stamp;
  136.34 -static struct ac_timer hpet_overflow_timer;
  136.35 +static struct timer hpet_overflow_timer;
  136.36  
  136.37  static void hpet_overflow(void *unused)
  136.38  {
  136.39 @@ -354,7 +354,7 @@ static void hpet_overflow(void *unused)
  136.40      hpet_stamp = counter;
  136.41      spin_unlock_irq(&platform_timer_lock);
  136.42  
  136.43 -    set_ac_timer(&hpet_overflow_timer, NOW() + hpet_overflow_period);
  136.44 +    set_timer(&hpet_overflow_timer, NOW() + hpet_overflow_period);
  136.45  }
  136.46  
  136.47  static u64 read_hpet_count(void)
  136.48 @@ -430,7 +430,7 @@ static int init_hpet(void)
  136.49          (void)do_div(hpet_overflow_period, (u32)hpet_rate);
  136.50      }
  136.51  
  136.52 -    init_ac_timer(&hpet_overflow_timer, hpet_overflow, NULL, 0);
  136.53 +    init_timer(&hpet_overflow_timer, hpet_overflow, NULL, 0);
  136.54      hpet_overflow(NULL);
  136.55      platform_timer_stamp = hpet_counter64;
  136.56  
  136.57 @@ -459,7 +459,7 @@ int use_cyclone;
  136.58  /* Protected by platform_timer_lock. */
  136.59  static u64 cyclone_counter64;
  136.60  static u32 cyclone_stamp;
  136.61 -static struct ac_timer cyclone_overflow_timer;
  136.62 +static struct timer cyclone_overflow_timer;
  136.63  static volatile u32 *cyclone_timer; /* Cyclone MPMC0 register */
  136.64  
  136.65  static void cyclone_overflow(void *unused)
  136.66 @@ -472,7 +472,7 @@ static void cyclone_overflow(void *unuse
  136.67      cyclone_stamp = counter;
  136.68      spin_unlock_irq(&platform_timer_lock);
  136.69  
  136.70 -    set_ac_timer(&cyclone_overflow_timer, NOW() + MILLISECS(20000));
  136.71 +    set_timer(&cyclone_overflow_timer, NOW() + MILLISECS(20000));
  136.72  }
  136.73  
  136.74  static u64 read_cyclone_count(void)
  136.75 @@ -510,7 +510,7 @@ static int init_cyclone(void)
  136.76  
  136.77      read_platform_count = read_cyclone_count;
  136.78  
  136.79 -    init_ac_timer(&cyclone_overflow_timer, cyclone_overflow, NULL, 0);
  136.80 +    init_timer(&cyclone_overflow_timer, cyclone_overflow, NULL, 0);
  136.81      cyclone_overflow(NULL);
  136.82      platform_timer_stamp = cyclone_counter64;
  136.83      set_time_scale(&platform_timer_scale, CYCLONE_TIMER_FREQ);
  136.84 @@ -876,7 +876,7 @@ static void local_time_calibration(void 
  136.85      cpu_time[cpu].stime_master_stamp = curr_master_stime;
  136.86  
  136.87   out:
  136.88 -    set_ac_timer(&cpu_time[cpu].calibration_timer, NOW() + EPOCH);
  136.89 +    set_timer(&cpu_time[cpu].calibration_timer, NOW() + EPOCH);
  136.90  
  136.91      if ( cpu == 0 )
  136.92          platform_time_calibration();
  136.93 @@ -896,9 +896,9 @@ void init_percpu_time(void)
  136.94      cpu_time[cpu].stime_master_stamp = now;
  136.95      cpu_time[cpu].stime_local_stamp  = now;
  136.96  
  136.97 -    init_ac_timer(&cpu_time[cpu].calibration_timer,
  136.98 +    init_timer(&cpu_time[cpu].calibration_timer,
  136.99                    local_time_calibration, NULL, cpu);
 136.100 -    set_ac_timer(&cpu_time[cpu].calibration_timer, NOW() + EPOCH);
 136.101 +    set_timer(&cpu_time[cpu].calibration_timer, NOW() + EPOCH);
 136.102  }
 136.103  
 136.104  /* Late init function (after all CPUs are booted). */
   137.1 --- a/xen/arch/x86/traps.c	Fri Jan 13 10:38:44 2006 -0600
   137.2 +++ b/xen/arch/x86/traps.c	Fri Jan 13 14:12:24 2006 -0600
   137.3 @@ -130,9 +130,19 @@ unsigned long kernel_text_end(void)
   137.4  static void show_guest_stack(struct cpu_user_regs *regs)
   137.5  {
   137.6      int i;
   137.7 -    unsigned long *stack = (unsigned long *)regs->esp, addr;
   137.8 +    unsigned long *stack, addr;
   137.9  
  137.10 -    printk("Guest stack trace from "__OP"sp=%p:\n   ", stack);
  137.11 +    if ( VM86_MODE(regs) )
  137.12 +    {
  137.13 +        stack = (unsigned long *)((regs->ss << 4) + (regs->esp & 0xffff));
  137.14 +        printk("Guest stack trace from ss:sp = %04x:%04x (VM86)\n   ",
  137.15 +               regs->ss, (uint16_t)(regs->esp & 0xffff));
  137.16 +    }
  137.17 +    else
  137.18 +    {
  137.19 +        stack = (unsigned long *)regs->esp;
  137.20 +        printk("Guest stack trace from "__OP"sp=%p:\n   ", stack);
  137.21 +    }
  137.22  
  137.23      for ( i = 0; i < (debug_stack_lines*stack_words_per_line); i++ )
  137.24      {
  137.25 @@ -427,7 +437,7 @@ void propagate_page_fault(unsigned long 
  137.26          tb->flags |= TBF_INTERRUPT;
  137.27  }
  137.28  
  137.29 -static int handle_perdomain_mapping_fault(
  137.30 +static int handle_gdt_ldt_mapping_fault(
  137.31      unsigned long offset, struct cpu_user_regs *regs)
  137.32  {
  137.33      extern int map_ldt_shadow_page(unsigned int);
  137.34 @@ -437,14 +447,14 @@ static int handle_perdomain_mapping_faul
  137.35      int ret;
  137.36  
  137.37      /* Which vcpu's area did we fault in, and is it in the ldt sub-area? */
  137.38 -    unsigned int is_ldt_area = (offset >> (PDPT_VCPU_VA_SHIFT-1)) & 1;
  137.39 -    unsigned int vcpu_area   = (offset >> PDPT_VCPU_VA_SHIFT);
  137.40 +    unsigned int is_ldt_area = (offset >> (GDT_LDT_VCPU_VA_SHIFT-1)) & 1;
  137.41 +    unsigned int vcpu_area   = (offset >> GDT_LDT_VCPU_VA_SHIFT);
  137.42  
  137.43      /* Should never fault in another vcpu's area. */
  137.44      BUG_ON(vcpu_area != current->vcpu_id);
  137.45  
  137.46      /* Byte offset within the gdt/ldt sub-area. */
  137.47 -    offset &= (1UL << (PDPT_VCPU_VA_SHIFT-1)) - 1UL;
  137.48 +    offset &= (1UL << (GDT_LDT_VCPU_VA_SHIFT-1)) - 1UL;
  137.49  
  137.50      if ( likely(is_ldt_area) )
  137.51      {
  137.52 @@ -490,9 +500,9 @@ static int fixup_page_fault(unsigned lon
  137.53      {
  137.54          if ( shadow_mode_external(d) && GUEST_CONTEXT(v, regs) )
  137.55              return shadow_fault(addr, regs);
  137.56 -        if ( (addr >= PERDOMAIN_VIRT_START) && (addr < PERDOMAIN_VIRT_END) )
  137.57 -            return handle_perdomain_mapping_fault(
  137.58 -                addr - PERDOMAIN_VIRT_START, regs);
  137.59 +        if ( (addr >= GDT_LDT_VIRT_START) && (addr < GDT_LDT_VIRT_END) )
  137.60 +            return handle_gdt_ldt_mapping_fault(
  137.61 +                addr - GDT_LDT_VIRT_START, regs);
  137.62      }
  137.63      else if ( unlikely(shadow_mode_enabled(d)) )
  137.64      {
  137.65 @@ -596,7 +606,6 @@ static inline int guest_io_okay(
  137.66      u16 x;
  137.67  #if defined(__x86_64__)
  137.68      /* If in user mode, switch to kernel mode just to read I/O bitmap. */
  137.69 -    extern void toggle_guest_mode(struct vcpu *);
  137.70      int user_mode = !(v->arch.flags & TF_kernel_mode);
  137.71  #define TOGGLE_MODE() if ( user_mode ) toggle_guest_mode(v)
  137.72  #elif defined(__i386__)
  137.73 @@ -964,16 +973,26 @@ static int emulate_privileged_op(struct 
  137.74      case 0x30: /* WRMSR */
  137.75          /* Ignore the instruction if unprivileged. */
  137.76          if ( !IS_PRIV(v->domain) )
  137.77 -            DPRINTK("Non-priv domain attempted WRMSR(%p,%08lx,%08lx).\n",
  137.78 -                    _p(regs->ecx), (long)regs->eax, (long)regs->edx);
  137.79 +        {
  137.80 +            u32 l, h;
  137.81 +            if ( (rdmsr_user(regs->ecx, l, h) != 0) ||
  137.82 +                 (regs->ecx != MSR_EFER) ||
  137.83 +                 (regs->eax != l) || (regs->edx != h) )
  137.84 +                DPRINTK("Non-priv domain attempted WRMSR %p from "
  137.85 +                        "%08x:%08x to %08lx:%08lx.\n",
  137.86 +                        _p(regs->ecx), h, l, (long)regs->edx, (long)regs->eax);
  137.87 +        }
  137.88          else if ( wrmsr_user(regs->ecx, regs->eax, regs->edx) )
  137.89              goto fail;
  137.90          break;
  137.91  
  137.92      case 0x32: /* RDMSR */
  137.93          if ( !IS_PRIV(v->domain) )
  137.94 -            DPRINTK("Non-priv domain attempted RDMSR(%p,%08lx,%08lx).\n",
  137.95 -                    _p(regs->ecx), (long)regs->eax, (long)regs->edx);
  137.96 +        {
  137.97 +            if ( regs->ecx != MSR_EFER )
  137.98 +                DPRINTK("Non-priv domain attempted RDMSR %p.\n",
  137.99 +                        _p(regs->ecx));
 137.100 +        }
 137.101          /* Everyone can read the MSR space. */
 137.102          if ( rdmsr_user(regs->ecx, regs->eax, regs->edx) )
 137.103              goto fail;
 137.104 @@ -1080,26 +1099,23 @@ asmlinkage int do_general_protection(str
 137.105      return 0;
 137.106  }
 137.107  
 137.108 +static void nmi_softirq(void)
 137.109 +{
 137.110 +    /* Only used to defer wakeup of dom0,vcpu0 to a safe (non-NMI) context. */
 137.111 +    evtchn_notify(dom0->vcpu[0]);
 137.112 +}
 137.113  
 137.114 -/* Defer dom0 notification to softirq context (unsafe in NMI context). */
 137.115 -static unsigned long nmi_dom0_softirq_reason;
 137.116 -#define NMI_DOM0_PARITY_ERR 0
 137.117 -#define NMI_DOM0_IO_ERR     1
 137.118 -#define NMI_DOM0_UNKNOWN    2
 137.119 +static void nmi_dom0_report(unsigned int reason_idx)
 137.120 +{
 137.121 +    struct domain *d;
 137.122  
 137.123 -static void nmi_dom0_softirq(void)
 137.124 -{
 137.125 -    if ( dom0 == NULL )
 137.126 +    if ( (d = dom0) == NULL )
 137.127          return;
 137.128  
 137.129 -    if ( test_and_clear_bit(NMI_DOM0_PARITY_ERR, &nmi_dom0_softirq_reason) )
 137.130 -        send_guest_virq(dom0->vcpu[0], VIRQ_PARITY_ERR);
 137.131 +    set_bit(reason_idx, &d->shared_info->arch.nmi_reason);
 137.132  
 137.133 -    if ( test_and_clear_bit(NMI_DOM0_IO_ERR, &nmi_dom0_softirq_reason) )
 137.134 -        send_guest_virq(dom0->vcpu[0], VIRQ_IO_ERR);
 137.135 -
 137.136 -    if ( test_and_clear_bit(NMI_DOM0_UNKNOWN, &nmi_dom0_softirq_reason) )
 137.137 -        send_guest_virq(dom0->vcpu[0], VIRQ_NMI);
 137.138 +    if ( test_and_set_bit(_VCPUF_nmi_pending, &d->vcpu[0]->vcpu_flags) )
 137.139 +        raise_softirq(NMI_SOFTIRQ); /* not safe to wake up a vcpu here */
 137.140  }
 137.141  
 137.142  asmlinkage void mem_parity_error(struct cpu_user_regs *regs)
 137.143 @@ -1107,8 +1123,7 @@ asmlinkage void mem_parity_error(struct 
 137.144      switch ( opt_nmi[0] )
 137.145      {
 137.146      case 'd': /* 'dom0' */
 137.147 -        set_bit(NMI_DOM0_PARITY_ERR, &nmi_dom0_softirq_reason);
 137.148 -        raise_softirq(NMI_DOM0_SOFTIRQ);
 137.149 +        nmi_dom0_report(_XEN_NMIREASON_parity_error);
 137.150      case 'i': /* 'ignore' */
 137.151          break;
 137.152      default:  /* 'fatal' */
 137.153 @@ -1127,8 +1142,7 @@ asmlinkage void io_check_error(struct cp
 137.154      switch ( opt_nmi[0] )
 137.155      {
 137.156      case 'd': /* 'dom0' */
 137.157 -        set_bit(NMI_DOM0_IO_ERR, &nmi_dom0_softirq_reason);
 137.158 -        raise_softirq(NMI_DOM0_SOFTIRQ);
 137.159 +        nmi_dom0_report(_XEN_NMIREASON_io_error);
 137.160      case 'i': /* 'ignore' */
 137.161          break;
 137.162      default:  /* 'fatal' */
 137.163 @@ -1147,8 +1161,7 @@ static void unknown_nmi_error(unsigned c
 137.164      switch ( opt_nmi[0] )
 137.165      {
 137.166      case 'd': /* 'dom0' */
 137.167 -        set_bit(NMI_DOM0_UNKNOWN, &nmi_dom0_softirq_reason);
 137.168 -        raise_softirq(NMI_DOM0_SOFTIRQ);
 137.169 +        nmi_dom0_report(_XEN_NMIREASON_unknown);
 137.170      case 'i': /* 'ignore' */
 137.171          break;
 137.172      default:  /* 'fatal' */
 137.173 @@ -1347,7 +1360,7 @@ void __init trap_init(void)
 137.174  
 137.175      cpu_init();
 137.176  
 137.177 -    open_softirq(NMI_DOM0_SOFTIRQ, nmi_dom0_softirq);
 137.178 +    open_softirq(NMI_SOFTIRQ, nmi_softirq);
 137.179  }
 137.180  
 137.181  
   138.1 --- a/xen/arch/x86/vmx.c	Fri Jan 13 10:38:44 2006 -0600
   138.2 +++ b/xen/arch/x86/vmx.c	Fri Jan 13 14:12:24 2006 -0600
   138.3 @@ -42,7 +42,7 @@
   138.4  #include <asm/shadow_64.h>
   138.5  #endif
   138.6  #include <public/sched.h>
   138.7 -#include <public/io/ioreq.h>
   138.8 +#include <public/hvm/ioreq.h>
   138.9  #include <asm/vmx_vpic.h>
  138.10  #include <asm/vmx_vlapic.h>
  138.11  
  138.12 @@ -53,7 +53,7 @@ unsigned int opt_vmx_debug_level = 0;
  138.13  integer_param("vmx_debug", opt_vmx_debug_level);
  138.14  
  138.15  static unsigned long trace_values[NR_CPUS][4];
  138.16 -#define TRACE_VMEXIT(index,value) trace_values[current->processor][index]=value
  138.17 +#define TRACE_VMEXIT(index,value) trace_values[smp_processor_id()][index]=value
  138.18  
  138.19  static int vmx_switch_on;
  138.20  
  138.21 @@ -66,11 +66,6 @@ void vmx_final_setup_guest(struct vcpu *
  138.22          struct domain *d = v->domain;
  138.23          struct vcpu *vc;
  138.24  
  138.25 -        d->arch.vmx_platform.lapic_enable = v->arch.guest_context.user_regs.ecx;
  138.26 -        v->arch.guest_context.user_regs.ecx = 0;
  138.27 -        VMX_DBG_LOG(DBG_LEVEL_VLAPIC, "lapic enable is %d.\n",
  138.28 -                    d->arch.vmx_platform.lapic_enable);
  138.29 -
  138.30          /* Initialize monitor page table */
  138.31          for_each_vcpu(d, vc)
  138.32              vc->arch.monitor_table = mk_pagetable(0);
  138.33 @@ -95,7 +90,7 @@ void vmx_final_setup_guest(struct vcpu *
  138.34  void vmx_relinquish_resources(struct vcpu *v)
  138.35  {
  138.36      struct vmx_virpit *vpit;
  138.37 -    
  138.38 +
  138.39      if ( !VMX_DOMAIN(v) )
  138.40          return;
  138.41  
  138.42 @@ -103,19 +98,18 @@ void vmx_relinquish_resources(struct vcp
  138.43          /* unmap IO shared page */
  138.44          struct domain *d = v->domain;
  138.45          if ( d->arch.vmx_platform.shared_page_va )
  138.46 -            unmap_domain_page((void *)d->arch.vmx_platform.shared_page_va);
  138.47 +            unmap_domain_page_global(
  138.48 +                (void *)d->arch.vmx_platform.shared_page_va);
  138.49      }
  138.50  
  138.51      destroy_vmcs(&v->arch.arch_vmx);
  138.52      free_monitor_pagetable(v);
  138.53      vpit = &v->domain->arch.vmx_platform.vmx_pit;
  138.54 -    if ( active_ac_timer(&(vpit->pit_timer)) )
  138.55 -        rem_ac_timer(&vpit->pit_timer);
  138.56 -    if ( active_ac_timer(&v->arch.arch_vmx.hlt_timer) )
  138.57 -        rem_ac_timer(&v->arch.arch_vmx.hlt_timer);
  138.58 +    kill_timer(&vpit->pit_timer);
  138.59 +    kill_timer(&v->arch.arch_vmx.hlt_timer);
  138.60      if ( vmx_apic_support(v->domain) && (VLAPIC(v) != NULL) )
  138.61      {
  138.62 -        rem_ac_timer(&VLAPIC(v)->vlapic_timer);
  138.63 +        kill_timer(&VLAPIC(v)->vlapic_timer);
  138.64          xfree(VLAPIC(v));
  138.65      }
  138.66  }
  138.67 @@ -1604,7 +1598,7 @@ void vmx_vmexit_do_hlt(void)
  138.68          next_wakeup = next_pit;
  138.69      }
  138.70      if ( next_wakeup != - 1 ) 
  138.71 -        set_ac_timer(&current->arch.arch_vmx.hlt_timer, next_wakeup);
  138.72 +        set_timer(&current->arch.arch_vmx.hlt_timer, next_wakeup);
  138.73      do_block();
  138.74  }
  138.75  
  138.76 @@ -1955,9 +1949,12 @@ asmlinkage void load_cr2(void)
  138.77  
  138.78  asmlinkage void trace_vmentry (void)
  138.79  {
  138.80 -    TRACE_5D(TRC_VMENTRY,trace_values[current->processor][0],
  138.81 -             trace_values[current->processor][1],trace_values[current->processor][2],
  138.82 -             trace_values[current->processor][3],trace_values[current->processor][4]);
  138.83 +    TRACE_5D(TRC_VMENTRY,
  138.84 +             trace_values[smp_processor_id()][0],
  138.85 +             trace_values[smp_processor_id()][1],
  138.86 +             trace_values[smp_processor_id()][2],
  138.87 +             trace_values[smp_processor_id()][3],
  138.88 +             trace_values[smp_processor_id()][4]);
  138.89      TRACE_VMEXIT(0,9);
  138.90      TRACE_VMEXIT(1,9);
  138.91      TRACE_VMEXIT(2,9);
   139.1 --- a/xen/arch/x86/vmx_intercept.c	Fri Jan 13 10:38:44 2006 -0600
   139.2 +++ b/xen/arch/x86/vmx_intercept.c	Fri Jan 13 14:12:24 2006 -0600
   139.3 @@ -24,7 +24,7 @@
   139.4  #include <asm/vmx_vpit.h>
   139.5  #include <asm/vmx_intercept.h>
   139.6  #include <asm/vmx_vlapic.h>
   139.7 -#include <public/io/ioreq.h>
   139.8 +#include <public/hvm/ioreq.h>
   139.9  #include <xen/lib.h>
  139.10  #include <xen/sched.h>
  139.11  #include <asm/current.h>
  139.12 @@ -356,19 +356,19 @@ static void pit_timer_fn(void *data)
  139.13      vpit->pending_intr_nr++;
  139.14      if ( test_bit(_VCPUF_running, &v->vcpu_flags) ) {
  139.15          vpit->scheduled += vpit->period;
  139.16 -        set_ac_timer(&vpit->pit_timer, vpit->scheduled);
  139.17 +        set_timer(&vpit->pit_timer, vpit->scheduled);
  139.18      }
  139.19  }
  139.20  
  139.21  void pickup_deactive_ticks(struct vmx_virpit *vpit)
  139.22  {
  139.23  
  139.24 -    if ( !active_ac_timer(&(vpit->pit_timer)) ) {
  139.25 +    if ( !active_timer(&(vpit->pit_timer)) ) {
  139.26          /* pick up missed timer tick */
  139.27          missed_ticks(vpit);
  139.28      
  139.29          vpit->scheduled += vpit->period;
  139.30 -        set_ac_timer(&vpit->pit_timer, vpit->scheduled);
  139.31 +        set_timer(&vpit->pit_timer, vpit->scheduled);
  139.32      }
  139.33  }
  139.34  
  139.35 @@ -385,14 +385,14 @@ void vmx_hooks_assist(struct vcpu *v)
  139.36      /* load init count*/
  139.37      if (p->state == STATE_IORESP_HOOK) {
  139.38          /* set up actimer, handle re-init */
  139.39 -        if ( active_ac_timer(&(vpit->pit_timer)) ) {
  139.40 +        if ( active_timer(&(vpit->pit_timer)) ) {
  139.41              VMX_DBG_LOG(DBG_LEVEL_1, "VMX_PIT: guest reset PIT with channel %lx!\n", (unsigned long) ((p->u.data >> 24) & 0x3) );
  139.42 -            rem_ac_timer(&(vpit->pit_timer));
  139.43 +            stop_timer(&(vpit->pit_timer));
  139.44              reinit = 1;
  139.45   
  139.46          }
  139.47          else {
  139.48 -            init_ac_timer(&vpit->pit_timer, pit_timer_fn, v, v->processor);
  139.49 +            init_timer(&vpit->pit_timer, pit_timer_fn, v, v->processor);
  139.50          }
  139.51  
  139.52          /* init count for this channel */
  139.53 @@ -431,7 +431,7 @@ void vmx_hooks_assist(struct vcpu *v)
  139.54          }
  139.55  
  139.56          vpit->scheduled = NOW() + vpit->period;
  139.57 -        set_ac_timer(&vpit->pit_timer, vpit->scheduled);
  139.58 +        set_timer(&vpit->pit_timer, vpit->scheduled);
  139.59  
  139.60          /*restore the state*/
  139.61          p->state = STATE_IORESP_READY;
   140.1 --- a/xen/arch/x86/vmx_io.c	Fri Jan 13 10:38:44 2006 -0600
   140.2 +++ b/xen/arch/x86/vmx_io.c	Fri Jan 13 14:12:24 2006 -0600
   140.3 @@ -37,7 +37,7 @@
   140.4  #include <asm/shadow.h>
   140.5  #include <asm/vmx_vpic.h>
   140.6  #include <asm/vmx_vlapic.h>
   140.7 -#include <public/io/ioreq.h>
   140.8 +#include <public/hvm/ioreq.h>
   140.9  
  140.10  #ifdef CONFIG_VMX
  140.11  #if defined (__i386__)
  140.12 @@ -819,7 +819,7 @@ interrupt_post_injection(struct vcpu * v
  140.13          if ( !vpit->first_injected ) {
  140.14              vpit->pending_intr_nr = 0;
  140.15              vpit->scheduled = NOW() + vpit->period;
  140.16 -            set_ac_timer(&vpit->pit_timer, vpit->scheduled);
  140.17 +            set_timer(&vpit->pit_timer, vpit->scheduled);
  140.18              vpit->first_injected = 1;
  140.19          } else {
  140.20              vpit->pending_intr_nr--;
   141.1 --- a/xen/arch/x86/vmx_platform.c	Fri Jan 13 10:38:44 2006 -0600
   141.2 +++ b/xen/arch/x86/vmx_platform.c	Fri Jan 13 14:12:24 2006 -0600
   141.3 @@ -27,7 +27,7 @@
   141.4  #include <xen/trace.h>
   141.5  #include <asm/vmx.h>
   141.6  #include <asm/vmx_platform.h>
   141.7 -#include <public/io/ioreq.h>
   141.8 +#include <public/hvm/ioreq.h>
   141.9  
  141.10  #include <xen/lib.h>
  141.11  #include <xen/sched.h>
   142.1 --- a/xen/arch/x86/vmx_vlapic.c	Fri Jan 13 10:38:44 2006 -0600
   142.2 +++ b/xen/arch/x86/vmx_vlapic.c	Fri Jan 13 14:12:24 2006 -0600
   142.3 @@ -32,7 +32,7 @@
   142.4  #include <xen/lib.h>
   142.5  #include <xen/sched.h>
   142.6  #include <asm/current.h>
   142.7 -#include <public/io/ioreq.h>
   142.8 +#include <public/hvm/ioreq.h>
   142.9  
  142.10  #ifdef CONFIG_VMX
  142.11  
  142.12 @@ -62,7 +62,7 @@ int vlapic_find_highest_irr(struct vlapi
  142.13  
  142.14  int vmx_apic_support(struct domain *d)
  142.15  {
  142.16 -    return d->arch.vmx_platform.lapic_enable;
  142.17 +    return d->arch.vmx_platform.apic_enabled;
  142.18  }
  142.19  
  142.20  s_time_t get_apictime_scheduled(struct vcpu *v)
  142.21 @@ -391,7 +391,7 @@ static void vlapic_begin_timer(struct vl
  142.22        (262144 / get_apic_bus_scale()) * vlapic->timer_divide_counter;
  142.23      vlapic->vlapic_timer.expires = cur + offset;
  142.24  
  142.25 -    set_ac_timer(&(vlapic->vlapic_timer), vlapic->vlapic_timer.expires );
  142.26 +    set_timer(&(vlapic->vlapic_timer), vlapic->vlapic_timer.expires );
  142.27  
  142.28      VMX_DBG_LOG(DBG_LEVEL_VLAPIC, "vlapic_begin_timer: "
  142.29                  "bus_scale %x now %08x%08x expire %08x%08x "
  142.30 @@ -739,7 +739,7 @@ static void vlapic_write(struct vcpu *v,
  142.31  
  142.32      case APIC_TMICT:
  142.33          if (vlapic_timer_active(vlapic))
  142.34 -            rem_ac_timer(&(vlapic->vlapic_timer));
  142.35 +            stop_timer(&(vlapic->vlapic_timer));
  142.36  
  142.37          vlapic->timer_initial = val;
  142.38          vlapic->timer_current = val;
  142.39 @@ -846,7 +846,7 @@ void vlapic_timer_fn(void *data)
  142.40          vlapic->timer_current = vlapic->timer_initial;
  142.41          offset = vlapic->timer_current * (262144/get_apic_bus_scale()) * vlapic->timer_divide_counter;
  142.42          vlapic->vlapic_timer.expires = NOW() + offset;
  142.43 -        set_ac_timer(&(vlapic->vlapic_timer), vlapic->vlapic_timer.expires);
  142.44 +        set_timer(&(vlapic->vlapic_timer), vlapic->vlapic_timer.expires);
  142.45      }else {
  142.46          vlapic->timer_current = 0;
  142.47      }
  142.48 @@ -986,7 +986,7 @@ static int vlapic_reset(struct vlapic *v
  142.49  
  142.50      vmx_vioapic_add_lapic(vlapic, v);
  142.51  
  142.52 -    init_ac_timer(&vlapic->vlapic_timer,
  142.53 +    init_timer(&vlapic->vlapic_timer,
  142.54                    vlapic_timer_fn, vlapic, v->processor);
  142.55  
  142.56  #ifdef VLAPIC_NO_BIOS
   143.1 --- a/xen/arch/x86/vmx_vmcs.c	Fri Jan 13 10:38:44 2006 -0600
   143.2 +++ b/xen/arch/x86/vmx_vmcs.c	Fri Jan 13 14:12:24 2006 -0600
   143.3 @@ -32,7 +32,7 @@
   143.4  #include <asm/flushtlb.h>
   143.5  #include <xen/event.h>
   143.6  #include <xen/kernel.h>
   143.7 -#include <public/io/ioreq.h>
   143.8 +#include <public/hvm/hvm_info_table.h>
   143.9  #if CONFIG_PAGING_LEVELS >= 4
  143.10  #include <asm/shadow_64.h>
  143.11  #endif
  143.12 @@ -193,7 +193,7 @@ static void vmx_map_io_shared_page(struc
  143.13          domain_crash_synchronous();
  143.14      }
  143.15  
  143.16 -    p = map_domain_page(mpfn);
  143.17 +    p = map_domain_page_global(mpfn);
  143.18      if (p == NULL) {
  143.19          printk("Can not map io request shared page for VMX domain.\n");
  143.20          domain_crash_synchronous();
  143.21 @@ -206,36 +206,56 @@ static void vmx_map_io_shared_page(struc
  143.22                &d->shared_info->evtchn_mask[0]);
  143.23  }
  143.24  
  143.25 -#define VCPU_NR_PAGE        0x0009F000
  143.26 -#define VCPU_NR_OFFSET      0x00000800
  143.27 -#define VCPU_MAGIC          0x76637075  /* "vcpu" */
  143.28 +static int validate_hvm_info(struct hvm_info_table *t)
  143.29 +{
  143.30 +    char signature[] = "HVM INFO";
  143.31 +    uint8_t *ptr = (uint8_t *)t;
  143.32 +    uint8_t sum = 0;
  143.33 +    int i;
  143.34  
  143.35 -static void vmx_set_vcpu_nr(struct domain *d)
  143.36 +    /* strncmp(t->signature, "HVM INFO", 8) */
  143.37 +    for ( i = 0; i < 8; i++ ) {
  143.38 +        if ( signature[i] != t->signature[i] ) {
  143.39 +            printk("Bad hvm info signature\n");
  143.40 +            return 0;
  143.41 +        }
  143.42 +    }
  143.43 +
  143.44 +    for ( i = 0; i < t->length; i++ )
  143.45 +        sum += ptr[i];
  143.46 +
  143.47 +    return (sum == 0);
  143.48 +}
  143.49 +
  143.50 +static void vmx_get_hvm_info(struct domain *d)
  143.51  {
  143.52      unsigned char *p;
  143.53      unsigned long mpfn;
  143.54 -    unsigned int *vcpus;
  143.55 +    struct hvm_info_table *t;
  143.56  
  143.57 -    mpfn = get_mfn_from_pfn(VCPU_NR_PAGE >> PAGE_SHIFT);
  143.58 -    if (mpfn == INVALID_MFN) {
  143.59 -        printk("Can not get vcpu number page mfn for VMX domain.\n");
  143.60 +    mpfn = get_mfn_from_pfn(HVM_INFO_PFN);
  143.61 +    if ( mpfn == INVALID_MFN ) {
  143.62 +        printk("Can not get hvm info page mfn for VMX domain.\n");
  143.63          domain_crash_synchronous();
  143.64      }
  143.65  
  143.66      p = map_domain_page(mpfn);
  143.67 -    if (p == NULL) {
  143.68 -        printk("Can not map vcpu number page for VMX domain.\n");
  143.69 +    if ( p == NULL ) {
  143.70 +        printk("Can not map hvm info page for VMX domain.\n");
  143.71          domain_crash_synchronous();
  143.72      }
  143.73  
  143.74 -    vcpus = (unsigned int *)(p + VCPU_NR_OFFSET);
  143.75 -    if (vcpus[0] != VCPU_MAGIC) {
  143.76 -        printk("Bad vcpus magic, set vcpu number to 1 by default.\n");
  143.77 -        d->arch.vmx_platform.nr_vcpu = 1;
  143.78 +    t = (struct hvm_info_table *)(p + HVM_INFO_OFFSET);
  143.79 +
  143.80 +    if ( validate_hvm_info(t) ) {
  143.81 +        d->arch.vmx_platform.nr_vcpus = t->nr_vcpus;
  143.82 +        d->arch.vmx_platform.apic_enabled = t->apic_enabled;
  143.83 +    } else {
  143.84 +        printk("Bad hvm info table\n");
  143.85 +        d->arch.vmx_platform.nr_vcpus = 1;
  143.86 +        d->arch.vmx_platform.apic_enabled = 0;
  143.87      }
  143.88  
  143.89 -    d->arch.vmx_platform.nr_vcpu = vcpus[1];
  143.90 -
  143.91      unmap_domain_page(p);
  143.92  }
  143.93  
  143.94 @@ -244,10 +264,10 @@ static void vmx_setup_platform(struct do
  143.95      struct vmx_platform *platform;
  143.96  
  143.97      vmx_map_io_shared_page(d);
  143.98 -    vmx_set_vcpu_nr(d);
  143.99 +    vmx_get_hvm_info(d);
 143.100  
 143.101      platform = &d->arch.vmx_platform;
 143.102 -    pic_init(&platform->vmx_pic,  pic_irq_request, 
 143.103 +    pic_init(&platform->vmx_pic,  pic_irq_request,
 143.104               &platform->interrupt_request);
 143.105      register_pic_io_hook();
 143.106  
 143.107 @@ -321,7 +341,7 @@ static void vmx_do_launch(struct vcpu *v
 143.108          vlapic_init(v);
 143.109  
 143.110      vmx_set_host_env(v);
 143.111 -    init_ac_timer(&v->arch.arch_vmx.hlt_timer, hlt_timer_fn, v, v->processor);
 143.112 +    init_timer(&v->arch.arch_vmx.hlt_timer, hlt_timer_fn, v, v->processor);
 143.113  
 143.114      error |= __vmwrite(GUEST_LDTR_SELECTOR, 0);
 143.115      error |= __vmwrite(GUEST_LDTR_BASE, 0);
 143.116 @@ -335,6 +355,8 @@ static void vmx_do_launch(struct vcpu *v
 143.117      __vmwrite(HOST_RSP, (unsigned long)get_stack_bottom());
 143.118  
 143.119      v->arch.schedule_tail = arch_vmx_do_resume;
 143.120 +    v->arch.arch_vmx.launch_cpu = smp_processor_id();
 143.121 +
 143.122      /* init guest tsc to start from 0 */
 143.123      rdtscll(host_tsc);
 143.124      v->arch.arch_vmx.tsc_offset = 0 - host_tsc;
 143.125 @@ -617,11 +639,21 @@ void vm_resume_fail(unsigned long eflags
 143.126  
 143.127  void arch_vmx_do_resume(struct vcpu *v)
 143.128  {
 143.129 -    u64 vmcs_phys_ptr = (u64) virt_to_phys(v->arch.arch_vmx.vmcs);
 143.130 -
 143.131 -    load_vmcs(&v->arch.arch_vmx, vmcs_phys_ptr);
 143.132 -    vmx_do_resume(v);
 143.133 -    reset_stack_and_jump(vmx_asm_do_resume);
 143.134 +    if ( v->arch.arch_vmx.launch_cpu == smp_processor_id() )
 143.135 +    {
 143.136 +        load_vmcs(&v->arch.arch_vmx, virt_to_phys(v->arch.arch_vmx.vmcs));
 143.137 +        vmx_do_resume(v);
 143.138 +        reset_stack_and_jump(vmx_asm_do_resume);
 143.139 +    }
 143.140 +    else
 143.141 +    {
 143.142 +        __vmpclear(virt_to_phys(v->arch.arch_vmx.vmcs));
 143.143 +        load_vmcs(&v->arch.arch_vmx, virt_to_phys(v->arch.arch_vmx.vmcs));
 143.144 +        vmx_do_resume(v);
 143.145 +        vmx_set_host_env(v);
 143.146 +        v->arch.arch_vmx.launch_cpu = smp_processor_id();
 143.147 +        reset_stack_and_jump(vmx_asm_do_relaunch);
 143.148 +    }
 143.149  }
 143.150  
 143.151  void arch_vmx_do_launch(struct vcpu *v)
 143.152 @@ -643,18 +675,6 @@ void arch_vmx_do_launch(struct vcpu *v)
 143.153      reset_stack_and_jump(vmx_asm_do_launch);
 143.154  }
 143.155  
 143.156 -void arch_vmx_do_relaunch(struct vcpu *v)
 143.157 -{
 143.158 -    u64 vmcs_phys_ptr = (u64) virt_to_phys(v->arch.arch_vmx.vmcs);
 143.159 -
 143.160 -    load_vmcs(&v->arch.arch_vmx, vmcs_phys_ptr);
 143.161 -    vmx_do_resume(v);
 143.162 -    vmx_set_host_env(v);
 143.163 -    v->arch.schedule_tail = arch_vmx_do_resume;
 143.164 -
 143.165 -    reset_stack_and_jump(vmx_asm_do_relaunch);
 143.166 -}
 143.167 -
 143.168  #endif /* CONFIG_VMX */
 143.169  
 143.170  /*
   144.1 --- a/xen/arch/x86/x86_32/asm-offsets.c	Fri Jan 13 10:38:44 2006 -0600
   144.2 +++ b/xen/arch/x86/x86_32/asm-offsets.c	Fri Jan 13 14:12:24 2006 -0600
   144.3 @@ -65,6 +65,10 @@ void __dummy__(void)
   144.4             arch.guest_context.kernel_ss);
   144.5      OFFSET(VCPU_kernel_sp, struct vcpu,
   144.6             arch.guest_context.kernel_sp);
   144.7 +    OFFSET(VCPU_flags, struct vcpu, vcpu_flags);
   144.8 +    OFFSET(VCPU_nmi_addr, struct vcpu, nmi_addr);
   144.9 +    DEFINE(_VCPUF_nmi_pending, _VCPUF_nmi_pending);
  144.10 +    DEFINE(_VCPUF_nmi_masked, _VCPUF_nmi_masked);
  144.11      BLANK();
  144.12  
  144.13      OFFSET(VCPUINFO_upcall_pending, vcpu_info_t, evtchn_upcall_pending);
   145.1 --- a/xen/arch/x86/x86_32/domain_page.c	Fri Jan 13 10:38:44 2006 -0600
   145.2 +++ b/xen/arch/x86/x86_32/domain_page.c	Fri Jan 13 14:12:24 2006 -0600
   145.3 @@ -1,14 +1,9 @@
   145.4  /******************************************************************************
   145.5   * domain_page.h
   145.6   * 
   145.7 - * Allow temporary mapping of domain pages. Based on ideas from the
   145.8 - * Linux PKMAP code -- the copyrights and credits are retained below.
   145.9 - */
  145.10 -
  145.11 -/*
  145.12 - * (C) 1999 Andrea Arcangeli, SuSE GmbH, andrea@suse.de
  145.13 - *          Gerhard Wichert, Siemens AG, Gerhard.Wichert@pdb.siemens.de *
  145.14 - * Copyright (C) 1999 Ingo Molnar <mingo@redhat.com>
  145.15 + * Allow temporary mapping of domain pages.
  145.16 + * 
  145.17 + * Copyright (c) 2003-2006, Keir Fraser <keir@xensource.com>
  145.18   */
  145.19  
  145.20  #include <xen/config.h>
  145.21 @@ -20,80 +15,203 @@
  145.22  #include <asm/flushtlb.h>
  145.23  #include <asm/hardirq.h>
  145.24  
  145.25 -#define MAPCACHE_ORDER    10
  145.26 -#define MAPCACHE_ENTRIES  (1 << MAPCACHE_ORDER)
  145.27 -
  145.28 -l1_pgentry_t *mapcache;
  145.29 -static unsigned int map_idx, epoch, shadow_epoch[NR_CPUS];
  145.30 -static spinlock_t map_lock = SPIN_LOCK_UNLOCKED;
  145.31 -
  145.32 -/* Use a spare PTE bit to mark entries ready for recycling. */
  145.33 -#define READY_FOR_TLB_FLUSH (1<<10)
  145.34 -
  145.35 -static void flush_all_ready_maps(void)
  145.36 -{
  145.37 -    l1_pgentry_t *cache = mapcache;
  145.38 -    unsigned int i;
  145.39 -
  145.40 -    for ( i = 0; i < MAPCACHE_ENTRIES; i++ )
  145.41 -        if ( (l1e_get_flags(cache[i]) & READY_FOR_TLB_FLUSH) )
  145.42 -            cache[i] = l1e_empty();
  145.43 -}
  145.44 -
  145.45 -void *map_domain_pages(unsigned long pfn, unsigned int order)
  145.46 +void *map_domain_page(unsigned long pfn)
  145.47  {
  145.48      unsigned long va;
  145.49 -    unsigned int idx, i, flags, cpu = smp_processor_id();
  145.50 -    l1_pgentry_t *cache = mapcache;
  145.51 -#ifndef NDEBUG
  145.52 -    unsigned int flush_count = 0;
  145.53 -#endif
  145.54 +    unsigned int idx, i, vcpu = current->vcpu_id;
  145.55 +    struct domain *d;
  145.56 +    struct mapcache *cache;
  145.57 +    struct vcpu_maphash_entry *hashent;
  145.58  
  145.59      ASSERT(!in_irq());
  145.60 +
  145.61      perfc_incrc(map_domain_page_count);
  145.62  
  145.63 -    spin_lock(&map_lock);
  145.64 +    /* If we are the idle domain, ensure that we run on our own page tables. */
  145.65 +    d = current->domain;
  145.66 +    if ( unlikely(is_idle_domain(d)) )
  145.67 +        __sync_lazy_execstate();
  145.68 +
  145.69 +    cache = &d->arch.mapcache;
  145.70  
  145.71 -    /* Has some other CPU caused a wrap? We must flush if so. */
  145.72 -    if ( epoch != shadow_epoch[cpu] )
  145.73 +    hashent = &cache->vcpu_maphash[vcpu].hash[MAPHASH_HASHFN(pfn)];
  145.74 +    if ( hashent->pfn == pfn )
  145.75      {
  145.76 -        perfc_incrc(domain_page_tlb_flush);
  145.77 -        local_flush_tlb();
  145.78 -        shadow_epoch[cpu] = epoch;
  145.79 +        idx = hashent->idx;
  145.80 +        hashent->refcnt++;
  145.81 +        ASSERT(hashent->refcnt != 0);
  145.82 +        ASSERT(l1e_get_pfn(cache->l1tab[idx]) == pfn);
  145.83 +        goto out;
  145.84      }
  145.85  
  145.86 -    do {
  145.87 -        idx = map_idx = (map_idx + 1) & (MAPCACHE_ENTRIES - 1);
  145.88 -        if ( unlikely(idx == 0) )
  145.89 +    spin_lock(&cache->lock);
  145.90 +
  145.91 +    /* Has some other CPU caused a wrap? We must flush if so. */
  145.92 +    if ( unlikely(cache->epoch != cache->shadow_epoch[vcpu]) )
  145.93 +    {
  145.94 +        cache->shadow_epoch[vcpu] = cache->epoch;
  145.95 +        if ( NEED_FLUSH(tlbflush_time[smp_processor_id()],
  145.96 +                        cache->tlbflush_timestamp) )
  145.97          {
  145.98 -            ASSERT(flush_count++ == 0);
  145.99 -            flush_all_ready_maps();
 145.100              perfc_incrc(domain_page_tlb_flush);
 145.101              local_flush_tlb();
 145.102 -            shadow_epoch[cpu] = ++epoch;
 145.103 +        }
 145.104 +    }
 145.105 +
 145.106 +    idx = find_next_zero_bit(cache->inuse, MAPCACHE_ENTRIES, cache->cursor);
 145.107 +    if ( unlikely(idx >= MAPCACHE_ENTRIES) )
 145.108 +    {
 145.109 +        /* /First/, clean the garbage map and update the inuse list. */
 145.110 +        for ( i = 0; i < ARRAY_SIZE(cache->garbage); i++ )
 145.111 +        {
 145.112 +            unsigned long x = xchg(&cache->garbage[i], 0);
 145.113 +            cache->inuse[i] &= ~x;
 145.114          }
 145.115  
 145.116 -        flags = 0;
 145.117 -        for ( i = 0; i < (1U << order); i++ )
 145.118 -            flags |= l1e_get_flags(cache[idx+i]);
 145.119 -    }
 145.120 -    while ( flags & _PAGE_PRESENT );
 145.121 +        /* /Second/, flush TLBs. */
 145.122 +        perfc_incrc(domain_page_tlb_flush);
 145.123 +        local_flush_tlb();
 145.124 +        cache->shadow_epoch[vcpu] = ++cache->epoch;
 145.125 +        cache->tlbflush_timestamp = tlbflush_current_time();
 145.126  
 145.127 -    for ( i = 0; i < (1U << order); i++ )
 145.128 -        cache[idx+i] = l1e_from_pfn(pfn+i, __PAGE_HYPERVISOR);
 145.129 +        idx = find_first_zero_bit(cache->inuse, MAPCACHE_ENTRIES);
 145.130 +        ASSERT(idx < MAPCACHE_ENTRIES);
 145.131 +    }
 145.132  
 145.133 -    spin_unlock(&map_lock);
 145.134 +    set_bit(idx, cache->inuse);
 145.135 +    cache->cursor = idx + 1;
 145.136  
 145.137 +    spin_unlock(&cache->lock);
 145.138 +
 145.139 +    cache->l1tab[idx] = l1e_from_pfn(pfn, __PAGE_HYPERVISOR);
 145.140 +
 145.141 + out:
 145.142      va = MAPCACHE_VIRT_START + (idx << PAGE_SHIFT);
 145.143      return (void *)va;
 145.144  }
 145.145  
 145.146 -void unmap_domain_pages(void *va, unsigned int order)
 145.147 +void unmap_domain_page(void *va)
 145.148  {
 145.149 -    unsigned int idx, i;
 145.150 +    unsigned int idx;
 145.151 +    struct mapcache *cache = &current->domain->arch.mapcache;
 145.152 +    unsigned long pfn;
 145.153 +    struct vcpu_maphash_entry *hashent;
 145.154 +
 145.155 +    ASSERT(!in_irq());
 145.156 +
 145.157      ASSERT((void *)MAPCACHE_VIRT_START <= va);
 145.158      ASSERT(va < (void *)MAPCACHE_VIRT_END);
 145.159 +
 145.160      idx = ((unsigned long)va - MAPCACHE_VIRT_START) >> PAGE_SHIFT;
 145.161 -    for ( i = 0; i < (1U << order); i++ )
 145.162 -        l1e_add_flags(mapcache[idx+i], READY_FOR_TLB_FLUSH);
 145.163 +    pfn = l1e_get_pfn(cache->l1tab[idx]);
 145.164 +    hashent = &cache->vcpu_maphash[current->vcpu_id].hash[MAPHASH_HASHFN(pfn)];
 145.165 +
 145.166 +    if ( hashent->idx == idx )
 145.167 +    {
 145.168 +        ASSERT(hashent->pfn == pfn);
 145.169 +        ASSERT(hashent->refcnt != 0);
 145.170 +        hashent->refcnt--;
 145.171 +    }
 145.172 +    else if ( hashent->refcnt == 0 )
 145.173 +    {
 145.174 +        if ( hashent->idx != MAPHASHENT_NOTINUSE )
 145.175 +        {
 145.176 +            /* /First/, zap the PTE. */
 145.177 +            ASSERT(l1e_get_pfn(cache->l1tab[hashent->idx]) == hashent->pfn);
 145.178 +            cache->l1tab[hashent->idx] = l1e_empty();
 145.179 +            /* /Second/, mark as garbage. */
 145.180 +            set_bit(hashent->idx, cache->garbage);
 145.181 +        }
 145.182 +
 145.183 +        /* Add newly-freed mapping to the maphash. */
 145.184 +        hashent->pfn = pfn;
 145.185 +        hashent->idx = idx;
 145.186 +    }
 145.187 +    else
 145.188 +    {
 145.189 +        /* /First/, zap the PTE. */
 145.190 +        cache->l1tab[idx] = l1e_empty();
 145.191 +        /* /Second/, mark as garbage. */
 145.192 +        set_bit(idx, cache->garbage);
 145.193 +    }
 145.194 +}
 145.195 +
 145.196 +void mapcache_init(struct domain *d)
 145.197 +{
 145.198 +    unsigned int i, j;
 145.199 +
 145.200 +    d->arch.mapcache.l1tab = d->arch.mm_perdomain_pt +
 145.201 +        (GDT_LDT_MBYTES << (20 - PAGE_SHIFT));
 145.202 +    spin_lock_init(&d->arch.mapcache.lock);
 145.203 +
 145.204 +    /* Mark all maphash entries as not in use. */
 145.205 +    for ( i = 0; i < MAX_VIRT_CPUS; i++ )
 145.206 +        for ( j = 0; j < MAPHASH_ENTRIES; j++ )
 145.207 +            d->arch.mapcache.vcpu_maphash[i].hash[j].idx =
 145.208 +                MAPHASHENT_NOTINUSE;
 145.209  }
 145.210 +
 145.211 +#define GLOBALMAP_BITS (IOREMAP_MBYTES << (20 - PAGE_SHIFT))
 145.212 +static unsigned long inuse[BITS_TO_LONGS(GLOBALMAP_BITS)];
 145.213 +static unsigned long garbage[BITS_TO_LONGS(GLOBALMAP_BITS)];
 145.214 +static unsigned int inuse_cursor;
 145.215 +static spinlock_t globalmap_lock = SPIN_LOCK_UNLOCKED;
 145.216 +
 145.217 +void *map_domain_page_global(unsigned long pfn)
 145.218 +{
 145.219 +    l2_pgentry_t *pl2e;
 145.220 +    l1_pgentry_t *pl1e;
 145.221 +    unsigned int idx, i;
 145.222 +    unsigned long va;
 145.223 +
 145.224 +    ASSERT(!in_irq() && local_irq_is_enabled());
 145.225 +
 145.226 +    spin_lock(&globalmap_lock);
 145.227 +
 145.228 +    idx = find_next_zero_bit(inuse, GLOBALMAP_BITS, inuse_cursor);
 145.229 +    va = IOREMAP_VIRT_START + (idx << PAGE_SHIFT);
 145.230 +    if ( unlikely(va >= FIXADDR_START) )
 145.231 +    {
 145.232 +        /* /First/, clean the garbage map and update the inuse list. */
 145.233 +        for ( i = 0; i < ARRAY_SIZE(garbage); i++ )
 145.234 +        {
 145.235 +            unsigned long x = xchg(&garbage[i], 0);
 145.236 +            inuse[i] &= ~x;
 145.237 +        }
 145.238 +
 145.239 +        /* /Second/, flush all TLBs to get rid of stale garbage mappings. */
 145.240 +        flush_tlb_all();
 145.241 +
 145.242 +        idx = find_first_zero_bit(inuse, GLOBALMAP_BITS);
 145.243 +        va = IOREMAP_VIRT_START + (idx << PAGE_SHIFT);
 145.244 +        ASSERT(va < FIXADDR_START);
 145.245 +    }
 145.246 +
 145.247 +    set_bit(idx, inuse);
 145.248 +    inuse_cursor = idx + 1;
 145.249 +
 145.250 +    spin_unlock(&globalmap_lock);
 145.251 +
 145.252 +    pl2e = virt_to_xen_l2e(va);
 145.253 +    pl1e = l2e_to_l1e(*pl2e) + l1_table_offset(va);
 145.254 +    *pl1e = l1e_from_pfn(pfn, __PAGE_HYPERVISOR);
 145.255 +
 145.256 +    return (void *)va;
 145.257 +}
 145.258 +
 145.259 +void unmap_domain_page_global(void *va)
 145.260 +{
 145.261 +    unsigned long __va = (unsigned long)va;
 145.262 +    l2_pgentry_t *pl2e;
 145.263 +    l1_pgentry_t *pl1e;
 145.264 +    unsigned int idx;
 145.265 +
 145.266 +    /* /First/, we zap the PTE. */
 145.267 +    pl2e = virt_to_xen_l2e(__va);
 145.268 +    pl1e = l2e_to_l1e(*pl2e) + l1_table_offset(__va);
 145.269 +    *pl1e = l1e_empty();
 145.270 +
 145.271 +    /* /Second/, we add to the garbage map. */
 145.272 +    idx = (__va - IOREMAP_VIRT_START) >> PAGE_SHIFT;
 145.273 +    set_bit(idx, garbage);
 145.274 +}
   146.1 --- a/xen/arch/x86/x86_32/entry.S	Fri Jan 13 10:38:44 2006 -0600
   146.2 +++ b/xen/arch/x86/x86_32/entry.S	Fri Jan 13 14:12:24 2006 -0600
   146.3 @@ -326,7 +326,9 @@ test_all_events:
   146.4          shl  $IRQSTAT_shift,%eax
   146.5          test %ecx,irq_stat(%eax,1)
   146.6          jnz  process_softirqs
   146.7 -/*test_guest_events:*/
   146.8 +        btr  $_VCPUF_nmi_pending,VCPU_flags(%ebx)
   146.9 +        jc   process_nmi
  146.10 +test_guest_events:
  146.11          movl VCPU_vcpu_info(%ebx),%eax
  146.12          testb $0xFF,VCPUINFO_upcall_mask(%eax)
  146.13          jnz  restore_all_guest
  146.14 @@ -348,7 +350,24 @@ process_softirqs:
  146.15          sti       
  146.16          call do_softirq
  146.17          jmp  test_all_events
  146.18 -                
  146.19 +	
  146.20 +	ALIGN
  146.21 +process_nmi:
  146.22 +        movl VCPU_nmi_addr(%ebx),%eax
  146.23 +        test %eax,%eax
  146.24 +        jz   test_all_events
  146.25 +        bts  $_VCPUF_nmi_masked,VCPU_flags(%ebx)
  146.26 +        jc   1f
  146.27 +        sti
  146.28 +        leal VCPU_trap_bounce(%ebx),%edx
  146.29 +        movl %eax,TRAPBOUNCE_eip(%edx)
  146.30 +        movw $FLAT_KERNEL_CS,TRAPBOUNCE_cs(%edx)
  146.31 +        movw $TBF_INTERRUPT,TRAPBOUNCE_flags(%edx)
  146.32 +        call create_bounce_frame
  146.33 +        jmp  test_all_events
  146.34 +1:      bts  $_VCPUF_nmi_pending,VCPU_flags(%ebx)
  146.35 +        jmp  test_guest_events
  146.36 +
  146.37  /* CREATE A BASIC EXCEPTION FRAME ON GUEST OS (RING-1) STACK:            */
  146.38  /*   {EIP, CS, EFLAGS, [ESP, SS]}                                        */
  146.39  /* %edx == trap_bounce, %ebx == struct vcpu                       */
  146.40 @@ -620,9 +639,7 @@ ENTRY(nmi)
  146.41          jne   defer_nmi
  146.42  
  146.43  continue_nmi:
  146.44 -        movl  $(__HYPERVISOR_DS),%edx
  146.45 -        movl  %edx,%ds
  146.46 -        movl  %edx,%es
  146.47 +        SET_XEN_SEGMENTS(d)
  146.48          movl  %esp,%edx
  146.49          pushl %edx
  146.50          call  do_nmi
  146.51 @@ -660,42 +677,6 @@ do_arch_sched_op:
  146.52          movl %eax,UREGS_eax(%ecx)
  146.53          jmp  do_sched_op
  146.54  
  146.55 -do_switch_vm86:
  146.56 -        # Reset the stack pointer
  146.57 -        GET_GUEST_REGS(%ecx)
  146.58 -        movl %ecx,%esp
  146.59 -
  146.60 -        # GS:ESI == Ring-1 stack activation
  146.61 -        movl UREGS_esp(%esp),%esi
  146.62 -VFLT1:  mov  UREGS_ss(%esp),%gs
  146.63 -
  146.64 -        # ES:EDI == Ring-0 stack activation
  146.65 -        leal UREGS_eip(%esp),%edi
  146.66 -
  146.67 -        # Restore the hypercall-number-clobbered EAX on our stack frame
  146.68 -VFLT2:  movl %gs:(%esi),%eax
  146.69 -        movl %eax,UREGS_eax(%esp)
  146.70 -        addl $4,%esi
  146.71 -        	
  146.72 -      	# Copy the VM86 activation from the ring-1 stack to the ring-0 stack
  146.73 -        movl $(UREGS_user_sizeof-UREGS_eip)/4,%ecx
  146.74 -VFLT3:  movl %gs:(%esi),%eax
  146.75 -        stosl
  146.76 -        addl $4,%esi
  146.77 -        loop VFLT3
  146.78 -
  146.79 -        # Fix up EFLAGS: IOPL=0, IF=1, VM=1
  146.80 -        andl $~X86_EFLAGS_IOPL,UREGS_eflags(%esp)
  146.81 -        orl  $X86_EFLAGS_IF|X86_EFLAGS_VM,UREGS_eflags(%esp)
  146.82 -        
  146.83 -        jmp test_all_events
  146.84 -
  146.85 -.section __ex_table,"a"
  146.86 -        .long VFLT1,domain_crash_synchronous
  146.87 -        .long VFLT2,domain_crash_synchronous
  146.88 -        .long VFLT3,domain_crash_synchronous
  146.89 -.previous
  146.90 -
  146.91  .data
  146.92  
  146.93  ENTRY(exception_table)
  146.94 @@ -744,11 +725,12 @@ ENTRY(hypercall_table)
  146.95          .long do_grant_table_op     /* 20 */
  146.96          .long do_vm_assist
  146.97          .long do_update_va_mapping_otherdomain
  146.98 -        .long do_switch_vm86
  146.99 +        .long do_iret
 146.100          .long do_vcpu_op
 146.101          .long do_ni_hypercall       /* 25 */
 146.102          .long do_mmuext_op
 146.103 -        .long do_acm_op             /* 27 */
 146.104 +        .long do_acm_op
 146.105 +        .long do_nmi_op
 146.106          .rept NR_hypercalls-((.-hypercall_table)/4)
 146.107          .long do_ni_hypercall
 146.108          .endr
 146.109 @@ -777,11 +759,12 @@ ENTRY(hypercall_args_table)
 146.110          .byte 3 /* do_grant_table_op    */  /* 20 */
 146.111          .byte 2 /* do_vm_assist         */
 146.112          .byte 5 /* do_update_va_mapping_otherdomain */
 146.113 -        .byte 0 /* do_switch_vm86       */
 146.114 +        .byte 0 /* do_iret              */
 146.115          .byte 3 /* do_vcpu_op           */
 146.116          .byte 0 /* do_ni_hypercall      */  /* 25 */
 146.117          .byte 4 /* do_mmuext_op         */
 146.118          .byte 1 /* do_acm_op            */
 146.119 +        .byte 2 /* do_nmi_op            */
 146.120          .rept NR_hypercalls-(.-hypercall_args_table)
 146.121          .byte 0 /* do_ni_hypercall      */
 146.122          .endr
   147.1 --- a/xen/arch/x86/x86_32/mm.c	Fri Jan 13 10:38:44 2006 -0600
   147.2 +++ b/xen/arch/x86/x86_32/mm.c	Fri Jan 13 14:12:24 2006 -0600
   147.3 @@ -29,8 +29,6 @@
   147.4  #include <asm/fixmap.h>
   147.5  #include <public/memory.h>
   147.6  
   147.7 -extern l1_pgentry_t *mapcache;
   147.8 -
   147.9  unsigned int PAGE_HYPERVISOR         = __PAGE_HYPERVISOR;
  147.10  unsigned int PAGE_HYPERVISOR_NOCACHE = __PAGE_HYPERVISOR_NOCACHE;
  147.11  
  147.12 @@ -68,7 +66,7 @@ void __init paging_init(void)
  147.13      void *ioremap_pt;
  147.14      unsigned long v;
  147.15      struct pfn_info *pg;
  147.16 -    int i, mapcache_order;
  147.17 +    int i;
  147.18  
  147.19  #ifdef CONFIG_X86_PAE
  147.20      printk("PAE enabled, limit: %d GB\n", MACHPHYS_MBYTES);
  147.21 @@ -76,7 +74,7 @@ void __init paging_init(void)
  147.22      printk("PAE disabled.\n");
  147.23  #endif
  147.24  
  147.25 -    idle0_vcpu.arch.monitor_table = mk_pagetable(__pa(idle_pg_table));
  147.26 +    idle_vcpu[0]->arch.monitor_table = mk_pagetable(__pa(idle_pg_table));
  147.27  
  147.28      if ( cpu_has_pge )
  147.29      {
  147.30 @@ -121,14 +119,12 @@ void __init paging_init(void)
  147.31              l2e_from_page(virt_to_page(ioremap_pt), __PAGE_HYPERVISOR);
  147.32      }
  147.33  
  147.34 -    /* Set up mapping cache for domain pages. */
  147.35 -    mapcache_order = get_order_from_bytes(
  147.36 -        MAPCACHE_MBYTES << (20 - PAGETABLE_ORDER));
  147.37 -    mapcache = alloc_xenheap_pages(mapcache_order);
  147.38 -    memset(mapcache, 0, PAGE_SIZE << mapcache_order);
  147.39 -    for ( i = 0; i < (MAPCACHE_MBYTES >> (L2_PAGETABLE_SHIFT - 20)); i++ )
  147.40 -        idle_pg_table_l2[l2_linear_offset(MAPCACHE_VIRT_START) + i] =
  147.41 -            l2e_from_page(virt_to_page(mapcache) + i, __PAGE_HYPERVISOR);
  147.42 +    /* Install per-domain mappings for idle domain. */
  147.43 +    for ( i = 0; i < PDPT_L2_ENTRIES; i++ )
  147.44 +        idle_pg_table_l2[l2_linear_offset(PERDOMAIN_VIRT_START) + i] =
  147.45 +            l2e_from_page(virt_to_page(idle_vcpu[0]->domain->
  147.46 +                                       arch.mm_perdomain_pt) + i,
  147.47 +                          __PAGE_HYPERVISOR);
  147.48  }
  147.49  
  147.50  void __init zap_low_mappings(l2_pgentry_t *base)
   148.1 --- a/xen/arch/x86/x86_32/traps.c	Fri Jan 13 10:38:44 2006 -0600
   148.2 +++ b/xen/arch/x86/x86_32/traps.c	Fri Jan 13 14:12:24 2006 -0600
   148.3 @@ -157,6 +157,64 @@ asmlinkage void do_double_fault(void)
   148.4          __asm__ __volatile__ ( "hlt" );
   148.5  }
   148.6  
   148.7 +asmlinkage unsigned long do_iret(void)
   148.8 +{
   148.9 +    struct cpu_user_regs *regs = guest_cpu_user_regs();
  148.10 +    u32 eflags;
  148.11 +
  148.12 +    /* Check worst-case stack frame for overlap with Xen protected area. */
  148.13 +    if ( unlikely(!access_ok(regs->esp, 40)) )
  148.14 +        domain_crash_synchronous();
  148.15 +
  148.16 +    /* Pop and restore EAX (clobbered by hypercall). */
  148.17 +    if ( unlikely(__copy_from_user(&regs->eax, (void __user *)regs->esp, 4)) )
  148.18 +        domain_crash_synchronous();
  148.19 +    regs->esp += 4;
  148.20 +
  148.21 +    /* Pop and restore CS and EIP. */
  148.22 +    if ( unlikely(__copy_from_user(&regs->eip, (void __user *)regs->esp, 8)) )
  148.23 +        domain_crash_synchronous();
  148.24 +    regs->esp += 8;
  148.25 +
  148.26 +    /*
  148.27 +     * Pop, fix up and restore EFLAGS. We fix up in a local staging area
  148.28 +     * to avoid firing the BUG_ON(IOPL) check in arch_getdomaininfo_ctxt.
  148.29 +     */
  148.30 +    if ( unlikely(__copy_from_user(&eflags, (void __user *)regs->esp, 4)) )
  148.31 +        domain_crash_synchronous();
  148.32 +    regs->esp += 4;
  148.33 +    regs->eflags = (eflags & ~X86_EFLAGS_IOPL) | X86_EFLAGS_IF;
  148.34 +
  148.35 +    if ( VM86_MODE(regs) )
  148.36 +    {
  148.37 +        /* Return to VM86 mode: pop and restore ESP,SS,ES,DS,FS and GS. */
  148.38 +        if ( __copy_from_user(&regs->esp, (void __user *)regs->esp, 24) )
  148.39 +            domain_crash_synchronous();
  148.40 +    }
  148.41 +    else if ( unlikely(RING_0(regs)) )
  148.42 +    {
  148.43 +        domain_crash_synchronous();
  148.44 +    }
  148.45 +    else if ( !RING_1(regs) )
  148.46 +    {
  148.47 +        /* Return to ring 2/3: pop and restore ESP and SS. */
  148.48 +        if ( __copy_from_user(&regs->esp, (void __user *)regs->esp, 8) )
  148.49 +            domain_crash_synchronous();
  148.50 +    }
  148.51 +
  148.52 +    /* No longer in NMI context. */
  148.53 +    clear_bit(_VCPUF_nmi_masked, &current->vcpu_flags);
  148.54 +
  148.55 +    /* Restore upcall mask from saved value. */
  148.56 +    current->vcpu_info->evtchn_upcall_mask = regs->saved_upcall_mask;
  148.57 +
  148.58 +    /*
  148.59 +     * The hypercall exit path will overwrite EAX with this return
  148.60 +     * value.
  148.61 +     */
  148.62 +    return regs->eax;
  148.63 +}
  148.64 +
  148.65  BUILD_SMP_INTERRUPT(deferred_nmi, TRAP_deferred_nmi)
  148.66  asmlinkage void smp_deferred_nmi(struct cpu_user_regs regs)
  148.67  {
   149.1 --- a/xen/arch/x86/x86_64/asm-offsets.c	Fri Jan 13 10:38:44 2006 -0600
   149.2 +++ b/xen/arch/x86/x86_64/asm-offsets.c	Fri Jan 13 14:12:24 2006 -0600
   149.3 @@ -65,6 +65,10 @@ void __dummy__(void)
   149.4             arch.guest_context.syscall_callback_eip);
   149.5      OFFSET(VCPU_kernel_sp, struct vcpu,
   149.6             arch.guest_context.kernel_sp);
   149.7 +    OFFSET(VCPU_flags, struct vcpu, vcpu_flags);
   149.8 +    OFFSET(VCPU_nmi_addr, struct vcpu, nmi_addr);
   149.9 +    DEFINE(_VCPUF_nmi_pending, _VCPUF_nmi_pending);
  149.10 +    DEFINE(_VCPUF_nmi_masked, _VCPUF_nmi_masked);
  149.11      BLANK();
  149.12  
  149.13      OFFSET(VCPUINFO_upcall_pending, vcpu_info_t, evtchn_upcall_pending);
   150.1 --- a/xen/arch/x86/x86_64/entry.S	Fri Jan 13 10:38:44 2006 -0600
   150.2 +++ b/xen/arch/x86/x86_64/entry.S	Fri Jan 13 14:12:24 2006 -0600
   150.3 @@ -171,7 +171,9 @@ test_all_events:
   150.4          leaq  irq_stat(%rip),%rcx
   150.5          testl $~0,(%rcx,%rax,1)
   150.6          jnz   process_softirqs
   150.7 -/*test_guest_events:*/
   150.8 +        btr   $_VCPUF_nmi_pending,VCPU_flags(%rbx)
   150.9 +        jc    process_nmi
  150.10 +test_guest_events:
  150.11          movq  VCPU_vcpu_info(%rbx),%rax
  150.12          testb $0xFF,VCPUINFO_upcall_mask(%rax)
  150.13          jnz   restore_all_guest
  150.14 @@ -322,6 +324,23 @@ process_softirqs:
  150.15          call do_softirq
  150.16          jmp  test_all_events
  150.17  
  150.18 +	ALIGN
  150.19 +/* %rbx: struct vcpu */
  150.20 +process_nmi:
  150.21 +        movq VCPU_nmi_addr(%rbx),%rax
  150.22 +        test %rax,%rax
  150.23 +        jz   test_all_events
  150.24 +        bts  $_VCPUF_nmi_masked,VCPU_flags(%rbx)
  150.25 +        jc   1f
  150.26 +        sti
  150.27 +        leaq VCPU_trap_bounce(%rbx),%rdx
  150.28 +        movq %rax,TRAPBOUNCE_eip(%rdx)
  150.29 +        movw $(TBF_INTERRUPT|TBF_SLOW_IRET),TRAPBOUNCE_flags(%rdx)
  150.30 +        call create_bounce_frame
  150.31 +        jmp  test_all_events
  150.32 +1:      bts  $_VCPUF_nmi_pending,VCPU_flags(%rbx)
  150.33 +        jmp  test_guest_events
  150.34 +	
  150.35  /* CREATE A BASIC EXCEPTION FRAME ON GUEST OS STACK:                     */
  150.36  /*   { RCX, R11, [DS-GS,] [CR2,] [ERRCODE,] RIP, CS, RFLAGS, RSP, SS }   */
  150.37  /* %rdx: trap_bounce, %rbx: struct vcpu                           */
  150.38 @@ -339,6 +358,9 @@ create_bounce_frame:
  150.39  1:      /* In kernel context already: push new frame at existing %rsp. */
  150.40          movq  UREGS_rsp+8(%rsp),%rsi
  150.41          andb  $0xfc,UREGS_cs+8(%rsp)    # Indicate kernel context to guest.
  150.42 +	testw $(TBF_SLOW_IRET),TRAPBOUNCE_flags(%rdx)
  150.43 +	jz    2f
  150.44 +	orb   $0x01,UREGS_cs+8(%rsp)
  150.45  2:      andq  $~0xf,%rsi                # Stack frames are 16-byte aligned.
  150.46          movq  $HYPERVISOR_VIRT_START,%rax
  150.47          cmpq  %rax,%rsi
  150.48 @@ -569,7 +591,7 @@ ENTRY(nmi)
  150.49          SAVE_ALL
  150.50          movq  %rsp,%rdi
  150.51          call  do_nmi
  150.52 -	jmp   restore_all_xen
  150.53 +        jmp   ret_from_intr
  150.54  
  150.55  do_arch_sched_op:
  150.56          # Ensure we return success even if we return via schedule_tail()
  150.57 @@ -626,11 +648,12 @@ ENTRY(hypercall_table)
  150.58          .quad do_grant_table_op     /* 20 */
  150.59          .quad do_vm_assist
  150.60          .quad do_update_va_mapping_otherdomain
  150.61 -        .quad do_switch_to_user
  150.62 +        .quad do_iret
  150.63          .quad do_vcpu_op
  150.64          .quad do_set_segment_base   /* 25 */
  150.65          .quad do_mmuext_op
  150.66          .quad do_acm_op
  150.67 +        .quad do_nmi_op
  150.68          .rept NR_hypercalls-((.-hypercall_table)/4)
  150.69          .quad do_ni_hypercall
  150.70          .endr
  150.71 @@ -659,11 +682,12 @@ ENTRY(hypercall_args_table)
  150.72          .byte 3 /* do_grant_table_op    */  /* 20 */
  150.73          .byte 2 /* do_vm_assist         */
  150.74          .byte 4 /* do_update_va_mapping_otherdomain */
  150.75 -        .byte 0 /* do_switch_to_user    */
  150.76 +        .byte 0 /* do_iret              */
  150.77          .byte 3 /* do_vcpu_op           */
  150.78          .byte 2 /* do_set_segment_base  */  /* 25 */
  150.79          .byte 4 /* do_mmuext_op         */
  150.80          .byte 1 /* do_acm_op            */
  150.81 +        .byte 2 /* do_nmi_op            */
  150.82          .rept NR_hypercalls-(.-hypercall_args_table)
  150.83          .byte 0 /* do_ni_hypercall      */
  150.84          .endr
   151.1 --- a/xen/arch/x86/x86_64/mm.c	Fri Jan 13 10:38:44 2006 -0600
   151.2 +++ b/xen/arch/x86/x86_64/mm.c	Fri Jan 13 14:12:24 2006 -0600
   151.3 @@ -80,7 +80,7 @@ void __init paging_init(void)
   151.4      l2_pgentry_t *l2_ro_mpt;
   151.5      struct pfn_info *pg;
   151.6  
   151.7 -    idle0_vcpu.arch.monitor_table = mk_pagetable(__pa(idle_pg_table));
   151.8 +    idle_vcpu[0]->arch.monitor_table = mk_pagetable(__pa(idle_pg_table));
   151.9  
  151.10      /* Create user-accessible L2 directory to map the MPT for guests. */
  151.11      l3_ro_mpt = alloc_xenheap_page();
  151.12 @@ -119,6 +119,12 @@ void __init paging_init(void)
  151.13      /* Set up linear page table mapping. */
  151.14      idle_pg_table[l4_table_offset(LINEAR_PT_VIRT_START)] =
  151.15          l4e_from_paddr(__pa(idle_pg_table), __PAGE_HYPERVISOR);
  151.16 +
  151.17 +    /* Install per-domain mappings for idle domain. */
  151.18 +    idle_pg_table[l4_table_offset(PERDOMAIN_VIRT_START)] =
  151.19 +        l4e_from_page(
  151.20 +            virt_to_page(idle_vcpu[0]->domain->arch.mm_perdomain_l3),
  151.21 +            __PAGE_HYPERVISOR);
  151.22  }
  151.23  
  151.24  void __init zap_low_mappings(void)
   152.1 --- a/xen/arch/x86/x86_64/traps.c	Fri Jan 13 10:38:44 2006 -0600
   152.2 +++ b/xen/arch/x86/x86_64/traps.c	Fri Jan 13 14:12:24 2006 -0600
   152.3 @@ -12,6 +12,7 @@
   152.4  #include <asm/current.h>
   152.5  #include <asm/flushtlb.h>
   152.6  #include <asm/msr.h>
   152.7 +#include <asm/shadow.h>
   152.8  #include <asm/vmx.h>
   152.9  
  152.10  void show_registers(struct cpu_user_regs *regs)
  152.11 @@ -113,6 +114,52 @@ asmlinkage void do_double_fault(struct c
  152.12          __asm__ __volatile__ ( "hlt" );
  152.13  }
  152.14  
  152.15 +void toggle_guest_mode(struct vcpu *v)
  152.16 +{
  152.17 +    v->arch.flags ^= TF_kernel_mode;
  152.18 +    __asm__ __volatile__ ( "swapgs" );
  152.19 +    update_pagetables(v);
  152.20 +    write_ptbase(v);
  152.21 +}
  152.22 +
  152.23 +long do_iret(void)
  152.24 +{
  152.25 +    struct cpu_user_regs *regs = guest_cpu_user_regs();
  152.26 +    struct iret_context iret_saved;
  152.27 +    struct vcpu *v = current;
  152.28 +
  152.29 +    if ( unlikely(copy_from_user(&iret_saved, (void *)regs->rsp,
  152.30 +                                 sizeof(iret_saved))) )
  152.31 +        domain_crash_synchronous();
  152.32 +
  152.33 +    /* Returning to user mode? */
  152.34 +    if ( (iret_saved.cs & 3) == 3 )
  152.35 +    {
  152.36 +        if ( unlikely(pagetable_get_paddr(v->arch.guest_table_user) == 0) )
  152.37 +            return -EFAULT;
  152.38 +        toggle_guest_mode(v);
  152.39 +    }
  152.40 +
  152.41 +    regs->rip    = iret_saved.rip;
  152.42 +    regs->cs     = iret_saved.cs | 3; /* force guest privilege */
  152.43 +    regs->rflags = (iret_saved.rflags & ~(EF_IOPL|EF_VM)) | EF_IE;
  152.44 +    regs->rsp    = iret_saved.rsp;
  152.45 +    regs->ss     = iret_saved.ss | 3; /* force guest privilege */
  152.46 +
  152.47 +    if ( !(iret_saved.flags & VGCF_IN_SYSCALL) )
  152.48 +    {
  152.49 +        regs->entry_vector = 0;
  152.50 +        regs->r11 = iret_saved.r11;
  152.51 +        regs->rcx = iret_saved.rcx;
  152.52 +    }
  152.53 +
  152.54 +    /* No longer in NMI context. */
  152.55 +    clear_bit(_VCPUF_nmi_masked, &current->vcpu_flags);
  152.56 +
  152.57 +    /* Saved %rax gets written back to regs->rax in entry.S. */
  152.58 +    return iret_saved.rax;
  152.59 +}
  152.60 +
  152.61  asmlinkage void syscall_enter(void);
  152.62  void __init percpu_traps_init(void)
  152.63  {
   153.1 --- a/xen/common/ac_timer.c	Fri Jan 13 10:38:44 2006 -0600
   153.2 +++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
   153.3 @@ -1,286 +0,0 @@
   153.4 -/******************************************************************************
   153.5 - * ac_timer.c
   153.6 - * 
   153.7 - * Copyright (c) 2002-2003 Rolf Neugebauer
   153.8 - * Copyright (c) 2002-2005 K A Fraser
   153.9 - */
  153.10 -
  153.11 -#include <xen/config.h>
  153.12 -#include <xen/init.h>
  153.13 -#include <xen/types.h>
  153.14 -#include <xen/errno.h>
  153.15 -#include <xen/sched.h>
  153.16 -#include <xen/lib.h>
  153.17 -#include <xen/smp.h>
  153.18 -#include <xen/perfc.h>
  153.19 -#include <xen/time.h>
  153.20 -#include <xen/softirq.h>
  153.21 -#include <xen/ac_timer.h>
  153.22 -#include <xen/keyhandler.h>
  153.23 -#include <asm/system.h>
  153.24 -#include <asm/desc.h>
  153.25 -
  153.26 -/*
  153.27 - * We pull handlers off the timer list this far in future,
  153.28 - * rather than reprogramming the time hardware.
  153.29 - */
  153.30 -#define TIMER_SLOP (50*1000) /* ns */
  153.31 -
  153.32 -struct ac_timers {
  153.33 -    spinlock_t        lock;
  153.34 -    struct ac_timer **heap;
  153.35 -    unsigned int      softirqs;
  153.36 -} __cacheline_aligned;
  153.37 -
  153.38 -struct ac_timers ac_timers[NR_CPUS];
  153.39 -
  153.40 -extern int reprogram_ac_timer(s_time_t timeout);
  153.41 -
  153.42 -/****************************************************************************
  153.43 - * HEAP OPERATIONS.
  153.44 - */
  153.45 -
  153.46 -#define GET_HEAP_SIZE(_h)     ((int)(((u16 *)(_h))[0]))
  153.47 -#define SET_HEAP_SIZE(_h,_v)  (((u16 *)(_h))[0] = (u16)(_v))
  153.48 -
  153.49 -#define GET_HEAP_LIMIT(_h)    ((int)(((u16 *)(_h))[1]))
  153.50 -#define SET_HEAP_LIMIT(_h,_v) (((u16 *)(_h))[1] = (u16)(_v))
  153.51 -
  153.52 -/* Sink down element @pos of @heap. */
  153.53 -static void down_heap(struct ac_timer **heap, int pos)
  153.54 -{
  153.55 -    int sz = GET_HEAP_SIZE(heap), nxt;
  153.56 -    struct ac_timer *t = heap[pos];
  153.57 -
  153.58 -    while ( (nxt = (pos << 1)) <= sz )
  153.59 -    {
  153.60 -        if ( ((nxt+1) <= sz) && (heap[nxt+1]->expires < heap[nxt]->expires) )
  153.61 -            nxt++;
  153.62 -        if ( heap[nxt]->expires > t->expires )
  153.63 -            break;
  153.64 -        heap[pos] = heap[nxt];
  153.65 -        heap[pos]->heap_offset = pos;
  153.66 -        pos = nxt;
  153.67 -    }
  153.68 -
  153.69 -    heap[pos] = t;
  153.70 -    t->heap_offset = pos;
  153.71 -}
  153.72 -
  153.73 -/* Float element @pos up @heap. */
  153.74 -static void up_heap(struct ac_timer **heap, int pos)
  153.75 -{
  153.76 -    struct ac_timer *t = heap[pos];
  153.77 -
  153.78 -    while ( (pos > 1) && (t->expires < heap[pos>>1]->expires) )
  153.79 -    {
  153.80 -        heap[pos] = heap[pos>>1];
  153.81 -        heap[pos]->heap_offset = pos;
  153.82 -        pos >>= 1;
  153.83 -    }
  153.84 -
  153.85 -    heap[pos] = t;
  153.86 -    t->heap_offset = pos;
  153.87 -}
  153.88 -
  153.89 -
  153.90 -/* Delete @t from @heap. Return TRUE if new top of heap. */
  153.91 -static int remove_entry(struct ac_timer **heap, struct ac_timer *t)
  153.92 -{
  153.93 -    int sz = GET_HEAP_SIZE(heap);
  153.94 -    int pos = t->heap_offset;
  153.95 -
  153.96 -    t->heap_offset = 0;
  153.97 -
  153.98 -    if ( unlikely(pos == sz) )
  153.99 -    {
 153.100 -        SET_HEAP_SIZE(heap, sz-1);
 153.101 -        goto out;
 153.102 -    }
 153.103 -
 153.104 -    heap[pos] = heap[sz];
 153.105 -    heap[pos]->heap_offset = pos;
 153.106 -
 153.107 -    SET_HEAP_SIZE(heap, --sz);
 153.108 -
 153.109 -    if ( (pos > 1) && (heap[pos]->expires < heap[pos>>1]->expires) )
 153.110 -        up_heap(heap, pos);
 153.111 -    else
 153.112 -        down_heap(heap, pos);
 153.113 -
 153.114 - out:
 153.115 -    return (pos == 1);
 153.116 -}
 153.117 -
 153.118 -
 153.119 -/* Add new entry @t to @heap. Return TRUE if new top of heap. */
 153.120 -static int add_entry(struct ac_timer ***pheap, struct ac_timer *t)
 153.121 -{
 153.122 -    struct ac_timer **heap = *pheap;
 153.123 -    int sz = GET_HEAP_SIZE(heap);
 153.124 -
 153.125 -    /* Copy the heap if it is full. */
 153.126 -    if ( unlikely(sz == GET_HEAP_LIMIT(heap)) )
 153.127 -    {
 153.128 -        /* old_limit == (2^n)-1; new_limit == (2^(n+4))-1 */
 153.129 -        int old_limit = GET_HEAP_LIMIT(heap);
 153.130 -        int new_limit = ((old_limit + 1) << 4) - 1;
 153.131 -        heap = xmalloc_array(struct ac_timer *, new_limit + 1);
 153.132 -        BUG_ON(heap == NULL);
 153.133 -        memcpy(heap, *pheap, (old_limit + 1) * sizeof(*heap));
 153.134 -        SET_HEAP_LIMIT(heap, new_limit);
 153.135 -        if ( old_limit != 0 )
 153.136 -            xfree(*pheap);
 153.137 -        *pheap = heap;
 153.138 -    }
 153.139 -
 153.140 -    SET_HEAP_SIZE(heap, ++sz);
 153.141 -    heap[sz] = t;
 153.142 -    t->heap_offset = sz;
 153.143 -    up_heap(heap, sz);
 153.144 -    return (t->heap_offset == 1);
 153.145 -}
 153.146 -
 153.147 -
 153.148 -/****************************************************************************
 153.149 - * TIMER OPERATIONS.
 153.150 - */
 153.151 -
 153.152 -static inline void __add_ac_timer(struct ac_timer *timer)
 153.153 -{
 153.154 -    int cpu = timer->cpu;
 153.155 -    if ( add_entry(&ac_timers[cpu].heap, timer) )
 153.156 -        cpu_raise_softirq(cpu, AC_TIMER_SOFTIRQ);
 153.157 -}
 153.158 -
 153.159 -
 153.160 -static inline void __rem_ac_timer(struct ac_timer *timer)
 153.161 -{
 153.162 -    int cpu = timer->cpu;
 153.163 -    if ( remove_entry(ac_timers[cpu].heap, timer) )
 153.164 -        cpu_raise_softirq(cpu, AC_TIMER_SOFTIRQ);
 153.165 -}
 153.166 -
 153.167 -
 153.168 -void set_ac_timer(struct ac_timer *timer, s_time_t expires)
 153.169 -{
 153.170 -    int           cpu = timer->cpu;
 153.171 -    unsigned long flags;
 153.172 -
 153.173 -    spin_lock_irqsave(&ac_timers[cpu].lock, flags);
 153.174 -    ASSERT(timer != NULL);
 153.175 -    if ( active_ac_timer(timer) )
 153.176 -        __rem_ac_timer(timer);
 153.177 -    timer->expires = expires;
 153.178 -    __add_ac_timer(timer);
 153.179 -    spin_unlock_irqrestore(&ac_timers[cpu].lock, flags);
 153.180 -}
 153.181 -
 153.182 -
 153.183 -void rem_ac_timer(struct ac_timer *timer)
 153.184 -{
 153.185 -    int           cpu = timer->cpu;
 153.186 -    unsigned long flags;
 153.187 -
 153.188 -    spin_lock_irqsave(&ac_timers[cpu].lock, flags);
 153.189 -    ASSERT(timer != NULL);
 153.190 -    if ( active_ac_timer(timer) )
 153.191 -        __rem_ac_timer(timer);
 153.192 -    spin_unlock_irqrestore(&ac_timers[cpu].lock, flags);
 153.193 -}
 153.194 -
 153.195 -
 153.196 -static void ac_timer_softirq_action(void)
 153.197 -{
 153.198 -    int              cpu = smp_processor_id();
 153.199 -    struct ac_timer *t, **heap;
 153.200 -    s_time_t         now;
 153.201 -    void             (*fn)(void *);
 153.202 -
 153.203 -    spin_lock_irq(&ac_timers[cpu].lock);
 153.204 -    
 153.205 -    do {
 153.206 -        heap = ac_timers[cpu].heap;
 153.207 -        now  = NOW();
 153.208 -
 153.209 -        while ( (GET_HEAP_SIZE(heap) != 0) &&
 153.210 -                ((t = heap[1])->expires < (now + TIMER_SLOP)) )
 153.211 -        {
 153.212 -            remove_entry(heap, t);
 153.213 -
 153.214 -            if ( (fn = t->function) != NULL )
 153.215 -            {
 153.216 -                void *data = t->data;
 153.217 -                spin_unlock_irq(&ac_timers[cpu].lock);
 153.218 -                (*fn)(data);
 153.219 -                spin_lock_irq(&ac_timers[cpu].lock);
 153.220 -            }
 153.221 -
 153.222 -            /* Heap may have grown while the lock was released. */
 153.223 -            heap = ac_timers[cpu].heap;
 153.224 -        }
 153.225 -    }
 153.226 -    while ( !reprogram_ac_timer(GET_HEAP_SIZE(heap) ? heap[1]->expires : 0) );
 153.227 -
 153.228 -    spin_unlock_irq(&ac_timers[cpu].lock);
 153.229 -}
 153.230 -
 153.231 -
 153.232 -static void dump_timerq(unsigned char key)
 153.233 -{
 153.234 -    struct ac_timer *t;
 153.235 -    unsigned long    flags; 
 153.236 -    s_time_t         now = NOW();
 153.237 -    int              i, j;
 153.238 -
 153.239 -    printk("Dumping ac_timer queues: NOW=0x%08X%08X\n",
 153.240 -           (u32)(now>>32), (u32)now); 
 153.241 -
 153.242 -    for_each_online_cpu( i )
 153.243 -    {
 153.244 -        printk("CPU[%02d] ", i);
 153.245 -        spin_lock_irqsave(&ac_timers[i].lock, flags);
 153.246 -        for ( j = 1; j <= GET_HEAP_SIZE(ac_timers[i].heap); j++ )
 153.247 -        {
 153.248 -            t = ac_timers[i].heap[j];
 153.249 -            printk ("  %d : %p ex=0x%08X%08X %p\n",
 153.250 -                    j, t, (u32)(t->expires>>32), (u32)t->expires, t->data);
 153.251 -        }
 153.252 -        spin_unlock_irqrestore(&ac_timers[i].lock, flags);
 153.253 -        printk("\n");
 153.254 -    }
 153.255 -}
 153.256 -
 153.257 -
 153.258 -void __init ac_timer_init(void)
 153.259 -{
 153.260 -    static struct ac_timer *dummy_heap;
 153.261 -    int i;
 153.262 -
 153.263 -    open_softirq(AC_TIMER_SOFTIRQ, ac_timer_softirq_action);
 153.264 -
 153.265 -    /*
 153.266 -     * All CPUs initially share an empty dummy heap. Only those CPUs that
 153.267 -     * are brought online will be dynamically allocated their own heap.
 153.268 -     */
 153.269 -    SET_HEAP_SIZE(&dummy_heap, 0);
 153.270 -    SET_HEAP_LIMIT(&dummy_heap, 0);
 153.271 -
 153.272 -    for ( i = 0; i < NR_CPUS; i++ )
 153.273 -    {
 153.274 -        spin_lock_init(&ac_timers[i].lock);
 153.275 -        ac_timers[i].heap = &dummy_heap;
 153.276 -    }
 153.277 -
 153.278 -    register_keyhandler('a', dump_timerq, "dump ac_timer queues");
 153.279 -}
 153.280 -
 153.281 -/*
 153.282 - * Local variables:
 153.283 - * mode: C
 153.284 - * c-set-style: "BSD"
 153.285 - * c-basic-offset: 4
 153.286 - * tab-width: 4
 153.287 - * indent-tabs-mode: nil
 153.288 - * End:
 153.289 - */
   154.1 --- a/xen/common/bitmap.c	Fri Jan 13 10:38:44 2006 -0600
   154.2 +++ b/xen/common/bitmap.c	Fri Jan 13 14:12:24 2006 -0600
   154.3 @@ -282,6 +282,111 @@ int __bitmap_weight(const unsigned long 
   154.4  #endif
   154.5  EXPORT_SYMBOL(__bitmap_weight);
   154.6  
   154.7 +/*
   154.8 + * Bitmap printing & parsing functions: first version by Bill Irwin,
   154.9 + * second version by Paul Jackson, third by Joe Korty.
  154.10 + */
  154.11 +
  154.12 +#define CHUNKSZ				32
  154.13 +#define nbits_to_hold_value(val)	fls(val)
  154.14 +#define roundup_power2(val,modulus)	(((val) + (modulus) - 1) & ~((modulus) - 1))
  154.15 +#define unhex(c)			(isdigit(c) ? (c - '0') : (toupper(c) - 'A' + 10))
  154.16 +#define BASEDEC 10		/* fancier cpuset lists input in decimal */
  154.17 +
  154.18 +/**
  154.19 + * bitmap_scnprintf - convert bitmap to an ASCII hex string.
  154.20 + * @buf: byte buffer into which string is placed
  154.21 + * @buflen: reserved size of @buf, in bytes
  154.22 + * @maskp: pointer to bitmap to convert
  154.23 + * @nmaskbits: size of bitmap, in bits
  154.24 + *
  154.25 + * Exactly @nmaskbits bits are displayed.  Hex digits are grouped into
  154.26 + * comma-separated sets of eight digits per set.
  154.27 + */
  154.28 +int bitmap_scnprintf(char *buf, unsigned int buflen,
  154.29 +	const unsigned long *maskp, int nmaskbits)
  154.30 +{
  154.31 +	int i, word, bit, len = 0;
  154.32 +	unsigned long val;
  154.33 +	const char *sep = "";
  154.34 +	int chunksz;
  154.35 +	u32 chunkmask;
  154.36 +
  154.37 +	chunksz = nmaskbits & (CHUNKSZ - 1);
  154.38 +	if (chunksz == 0)
  154.39 +		chunksz = CHUNKSZ;
  154.40 +
  154.41 +	i = roundup_power2(nmaskbits, CHUNKSZ) - CHUNKSZ;
  154.42 +	for (; i >= 0; i -= CHUNKSZ) {
  154.43 +		chunkmask = ((1ULL << chunksz) - 1);
  154.44 +		word = i / BITS_PER_LONG;
  154.45 +		bit = i % BITS_PER_LONG;
  154.46 +		val = (maskp[word] >> bit) & chunkmask;
  154.47 +		len += scnprintf(buf+len, buflen-len, "%s%0*lx", sep,
  154.48 +			(chunksz+3)/4, val);
  154.49 +		chunksz = CHUNKSZ;
  154.50 +		sep = ",";
  154.51 +	}
  154.52 +	return len;
  154.53 +}
  154.54 +EXPORT_SYMBOL(bitmap_scnprintf);
  154.55 +
  154.56 +/*
  154.57 + * bscnl_emit(buf, buflen, rbot, rtop, bp)
  154.58 + *
  154.59 + * Helper routine for bitmap_scnlistprintf().  Write decimal number
  154.60 + * or range to buf, suppressing output past buf+buflen, with optional
  154.61 + * comma-prefix.  Return len of what would be written to buf, if it
  154.62 + * all fit.
  154.63 + */
  154.64 +static inline int bscnl_emit(char *buf, int buflen, int rbot, int rtop, int len)
  154.65 +{
  154.66 +	if (len > 0)
  154.67 +		len += scnprintf(buf + len, buflen - len, ",");
  154.68 +	if (rbot == rtop)
  154.69 +		len += scnprintf(buf + len, buflen - len, "%d", rbot);
  154.70 +	else
  154.71 +		len += scnprintf(buf + len, buflen - len, "%d-%d", rbot, rtop);
  154.72 +	return len;
  154.73 +}
  154.74 +
  154.75 +/**
  154.76 + * bitmap_scnlistprintf - convert bitmap to list format ASCII string
  154.77 + * @buf: byte buffer into which string is placed
  154.78 + * @buflen: reserved size of @buf, in bytes
  154.79 + * @maskp: pointer to bitmap to convert
  154.80 + * @nmaskbits: size of bitmap, in bits
  154.81 + *
  154.82 + * Output format is a comma-separated list of decimal numbers and
  154.83 + * ranges.  Consecutively set bits are shown as two hyphen-separated
  154.84 + * decimal numbers, the smallest and largest bit numbers set in
  154.85 + * the range.  Output format is compatible with the format
  154.86 + * accepted as input by bitmap_parselist().
  154.87 + *
  154.88 + * The return value is the number of characters which would be
  154.89 + * generated for the given input, excluding the trailing '\0', as
  154.90 + * per ISO C99.
  154.91 + */
  154.92 +int bitmap_scnlistprintf(char *buf, unsigned int buflen,
  154.93 +	const unsigned long *maskp, int nmaskbits)
  154.94 +{
  154.95 +	int len = 0;
  154.96 +	/* current bit is 'cur', most recently seen range is [rbot, rtop] */
  154.97 +	int cur, rbot, rtop;
  154.98 +
  154.99 +	rbot = cur = find_first_bit(maskp, nmaskbits);
 154.100 +	while (cur < nmaskbits) {
 154.101 +		rtop = cur;
 154.102 +		cur = find_next_bit(maskp, nmaskbits, cur+1);
 154.103 +		if (cur >= nmaskbits || cur > rtop + 1) {
 154.104 +			len = bscnl_emit(buf, buflen, rbot, rtop, len);
 154.105 +			rbot = cur;
 154.106 +		}
 154.107 +	}
 154.108 +	return len;
 154.109 +}
 154.110 +EXPORT_SYMBOL(bitmap_scnlistprintf);
 154.111 +
 154.112  /**
 154.113   *	bitmap_find_free_region - find a contiguous aligned mem region
 154.114   *	@bitmap: an array of unsigned longs corresponding to the bitmap
   155.1 --- a/xen/common/dom0_ops.c	Fri Jan 13 10:38:44 2006 -0600
   155.2 +++ b/xen/common/dom0_ops.c	Fri Jan 13 14:12:24 2006 -0600
   155.3 @@ -110,13 +110,13 @@ long do_dom0_op(dom0_op_t *u_dom0_op)
   155.4      switch ( op->cmd )
   155.5      {
   155.6  
   155.7 -    case DOM0_SETDOMAININFO:
   155.8 +    case DOM0_SETVCPUCONTEXT:
   155.9      {
  155.10 -        struct domain *d = find_domain_by_id(op->u.setdomaininfo.domain);
  155.11 +        struct domain *d = find_domain_by_id(op->u.setvcpucontext.domain);
  155.12          ret = -ESRCH;
  155.13          if ( d != NULL )
  155.14          {
  155.15 -            ret = set_info_guest(d, &op->u.setdomaininfo);
  155.16 +            ret = set_info_guest(d, &op->u.setvcpucontext);
  155.17              put_domain(d);
  155.18          }
  155.19      }
  155.20 @@ -284,11 +284,12 @@ long do_dom0_op(dom0_op_t *u_dom0_op)
  155.21      }
  155.22      break;
  155.23  
  155.24 -    case DOM0_PINCPUDOMAIN:
  155.25 +    case DOM0_SETVCPUAFFINITY:
  155.26      {
  155.27 -        domid_t dom = op->u.pincpudomain.domain;
  155.28 +        domid_t dom = op->u.setvcpuaffinity.domain;
  155.29          struct domain *d = find_domain_by_id(dom);
  155.30          struct vcpu *v;
  155.31 +        cpumask_t new_affinity;
  155.32  
  155.33          if ( d == NULL )
  155.34          {
  155.35 @@ -296,15 +297,15 @@ long do_dom0_op(dom0_op_t *u_dom0_op)
  155.36              break;
  155.37          }
  155.38          
  155.39 -        if ( (op->u.pincpudomain.vcpu >= MAX_VIRT_CPUS) ||
  155.40 -             !d->vcpu[op->u.pincpudomain.vcpu] )
  155.41 +        if ( (op->u.setvcpuaffinity.vcpu >= MAX_VIRT_CPUS) ||
  155.42 +             !d->vcpu[op->u.setvcpuaffinity.vcpu] )
  155.43          {
  155.44              ret = -EINVAL;
  155.45              put_domain(d);
  155.46              break;
  155.47          }
  155.48  
  155.49 -        v = d->vcpu[op->u.pincpudomain.vcpu];
  155.50 +        v = d->vcpu[op->u.setvcpuaffinity.vcpu];
  155.51          if ( v == NULL )
  155.52          {
  155.53              ret = -ESRCH;
  155.54 @@ -319,22 +320,13 @@ long do_dom0_op(dom0_op_t *u_dom0_op)
  155.55              break;
  155.56          }
  155.57  
  155.58 -        v->cpumap = op->u.pincpudomain.cpumap;
  155.59 +        new_affinity = v->cpu_affinity;
  155.60 +        memcpy(cpus_addr(new_affinity),
  155.61 +               &op->u.setvcpuaffinity.cpumap,
  155.62 +               min((int)(BITS_TO_LONGS(NR_CPUS) * sizeof(long)),
  155.63 +                   (int)sizeof(op->u.setvcpuaffinity.cpumap)));
  155.64  
  155.65 -        if ( v->cpumap == CPUMAP_RUNANYWHERE )
  155.66 -        {
  155.67 -            clear_bit(_VCPUF_cpu_pinned, &v->vcpu_flags);
  155.68 -        }
  155.69 -        else
  155.70 -        {
  155.71 -            /* pick a new cpu from the usable map */
  155.72 -            int new_cpu;
  155.73 -            new_cpu = (int)find_first_set_bit(v->cpumap) % num_online_cpus();
  155.74 -            vcpu_pause(v);
  155.75 -            vcpu_migrate_cpu(v, new_cpu);
  155.76 -            set_bit(_VCPUF_cpu_pinned, &v->vcpu_flags);
  155.77 -            vcpu_unpause(v);
  155.78 -        }
  155.79 +        ret = vcpu_set_affinity(v, &new_affinity);
  155.80  
  155.81          put_domain(d);
  155.82      }
  155.83 @@ -506,7 +498,11 @@ long do_dom0_op(dom0_op_t *u_dom0_op)
  155.84          op->u.getvcpuinfo.running  = test_bit(_VCPUF_running, &v->vcpu_flags);
  155.85          op->u.getvcpuinfo.cpu_time = v->cpu_time;
  155.86          op->u.getvcpuinfo.cpu      = v->processor;
  155.87 -        op->u.getvcpuinfo.cpumap   = v->cpumap;
  155.88 +        op->u.getvcpuinfo.cpumap   = 0;
  155.89 +        memcpy(&op->u.getvcpuinfo.cpumap,
  155.90 +               cpus_addr(v->cpu_affinity),
  155.91 +               min((int)(BITS_TO_LONGS(NR_CPUS) * sizeof(long)),
  155.92 +                   (int)sizeof(op->u.getvcpuinfo.cpumap)));
  155.93          ret = 0;
  155.94  
  155.95          if ( copy_to_user(u_dom0_op, op, sizeof(*op)) )     
   156.1 --- a/xen/common/domain.c	Fri Jan 13 10:38:44 2006 -0600
   156.2 +++ b/xen/common/domain.c	Fri Jan 13 14:12:24 2006 -0600
   156.3 @@ -46,12 +46,10 @@ struct domain *do_createdomain(domid_t d
   156.4      INIT_LIST_HEAD(&d->page_list);
   156.5      INIT_LIST_HEAD(&d->xenpage_list);
   156.6  
   156.7 -    if ( d->domain_id == IDLE_DOMAIN_ID )
   156.8 -        set_bit(_DOMF_idle_domain, &d->domain_flags);
   156.9 -    else
  156.10 +    if ( !is_idle_domain(d) )
  156.11          set_bit(_DOMF_ctrl_pause, &d->domain_flags);
  156.12  
  156.13 -    if ( !is_idle_task(d) &&
  156.14 +    if ( !is_idle_domain(d) &&
  156.15           ((evtchn_init(d) != 0) || (grant_table_create(d) != 0)) )
  156.16          goto fail1;
  156.17      
  156.18 @@ -68,7 +66,7 @@ struct domain *do_createdomain(domid_t d
  156.19           (arch_do_createdomain(v) != 0) )
  156.20          goto fail3;
  156.21  
  156.22 -    if ( !is_idle_task(d) )
  156.23 +    if ( !is_idle_domain(d) )
  156.24      {
  156.25          write_lock(&domlist_lock);
  156.26          pd = &domain_list; /* NB. domain_list maintained in order of dom_id. */
  156.27 @@ -173,20 +171,23 @@ static void domain_shutdown_finalise(voi
  156.28  
  156.29      BUG_ON(d == NULL);
  156.30      BUG_ON(d == current->domain);
  156.31 -    BUG_ON(!test_bit(_DOMF_shuttingdown, &d->domain_flags));
  156.32 -    BUG_ON(test_bit(_DOMF_shutdown, &d->domain_flags));
  156.33 +
  156.34 +    LOCK_BIGLOCK(d);
  156.35  
  156.36      /* Make sure that every vcpu is descheduled before we finalise. */
  156.37      for_each_vcpu ( d, v )
  156.38          vcpu_sleep_sync(v);
  156.39 -    BUG_ON(!cpus_empty(d->cpumask));
  156.40 +    BUG_ON(!cpus_empty(d->domain_dirty_cpumask));
  156.41  
  156.42      sync_pagetable_state(d);
  156.43  
  156.44 -    set_bit(_DOMF_shutdown, &d->domain_flags);
  156.45 -    clear_bit(_DOMF_shuttingdown, &d->domain_flags);
  156.46 +    /* Don't set DOMF_shutdown until execution contexts are sync'ed. */
  156.47 +    if ( !test_and_set_bit(_DOMF_shutdown, &d->domain_flags) )
  156.48 +        send_guest_virq(dom0->vcpu[0], VIRQ_DOM_EXC);
  156.49  
  156.50 -    send_guest_virq(dom0->vcpu[0], VIRQ_DOM_EXC);
  156.51 +    UNLOCK_BIGLOCK(d);
  156.52 +
  156.53 +    put_domain(d);
  156.54  }
  156.55  
  156.56  static __init int domain_shutdown_finaliser_init(void)
  156.57 @@ -222,16 +223,17 @@ void domain_shutdown(struct domain *d, u
  156.58  
  156.59      /* Mark the domain as shutting down. */
  156.60      d->shutdown_code = reason;
  156.61 -    if ( !test_and_set_bit(_DOMF_shuttingdown, &d->domain_flags) )
  156.62 -    {
  156.63 -        /* This vcpu won the race to finalise the shutdown. */
  156.64 -        domain_shuttingdown[smp_processor_id()] = d;
  156.65 -        raise_softirq(DOMAIN_SHUTDOWN_FINALISE_SOFTIRQ);
  156.66 -    }
  156.67  
  156.68      /* Put every vcpu to sleep, but don't wait (avoids inter-vcpu deadlock). */
  156.69      for_each_vcpu ( d, v )
  156.70 +    {
  156.71 +        atomic_inc(&v->pausecnt);
  156.72          vcpu_sleep_nosync(v);
  156.73 +    }
  156.74 +
  156.75 +    get_knownalive_domain(d);
  156.76 +    domain_shuttingdown[smp_processor_id()] = d;
  156.77 +    raise_softirq(DOMAIN_SHUTDOWN_FINALISE_SOFTIRQ);
  156.78  }
  156.79  
  156.80  
  156.81 @@ -357,11 +359,11 @@ void domain_unpause_by_systemcontroller(
  156.82   * of domains other than domain 0. ie. the domains that are being built by 
  156.83   * the userspace dom0 domain builder.
  156.84   */
  156.85 -int set_info_guest(struct domain *d, dom0_setdomaininfo_t *setdomaininfo)
  156.86 +int set_info_guest(struct domain *d, dom0_setvcpucontext_t *setvcpucontext)
  156.87  {
  156.88      int rc = 0;
  156.89      struct vcpu_guest_context *c = NULL;
  156.90 -    unsigned long vcpu = setdomaininfo->vcpu;
  156.91 +    unsigned long vcpu = setvcpucontext->vcpu;
  156.92      struct vcpu *v; 
  156.93  
  156.94      if ( (vcpu >= MAX_VIRT_CPUS) || ((v = d->vcpu[vcpu]) == NULL) )
  156.95 @@ -374,7 +376,7 @@ int set_info_guest(struct domain *d, dom
  156.96          return -ENOMEM;
  156.97  
  156.98      rc = -EFAULT;
  156.99 -    if ( copy_from_user(c, setdomaininfo->ctxt, sizeof(*c)) == 0 )
 156.100 +    if ( copy_from_user(c, setvcpucontext->ctxt, sizeof(*c)) == 0 )
 156.101          rc = arch_set_info_guest(v, c);
 156.102  
 156.103      xfree(c);
   157.1 --- a/xen/common/grant_table.c	Fri Jan 13 10:38:44 2006 -0600
   157.2 +++ b/xen/common/grant_table.c	Fri Jan 13 14:12:24 2006 -0600
   157.3 @@ -312,8 +312,6 @@ static int
   157.4      if ( !act->pin )
   157.5          clear_bit(_GTF_reading, &sha->flags);
   157.6  
   157.7 -    spin_unlock(&rd->grant_table->lock);
   157.8 -
   157.9   unlock_out:
  157.10      spin_unlock(&rd->grant_table->lock);
  157.11      (void)__put_user(rc, &uop->status);
  157.12 @@ -471,7 +469,7 @@ gnttab_unmap_grant_ref(
  157.13      for ( i = 0; i < count; i++ )
  157.14          (void)__gnttab_unmap_grant_ref(&uop[i]);
  157.15  
  157.16 -    flush_tlb_mask(current->domain->cpumask);
  157.17 +    flush_tlb_mask(current->domain->domain_dirty_cpumask);
  157.18  
  157.19      return 0;
  157.20  }
   158.1 --- a/xen/common/kernel.c	Fri Jan 13 10:38:44 2006 -0600
   158.2 +++ b/xen/common/kernel.c	Fri Jan 13 14:12:24 2006 -0600
   158.3 @@ -11,6 +11,7 @@
   158.4  #include <xen/compile.h>
   158.5  #include <xen/sched.h>
   158.6  #include <asm/current.h>
   158.7 +#include <public/nmi.h>
   158.8  #include <public/version.h>
   158.9  
  158.10  void cmdline_parse(char *cmdline)
  158.11 @@ -148,6 +149,43 @@ long do_xen_version(int cmd, void *arg)
  158.12      return -ENOSYS;
  158.13  }
  158.14  
  158.15 +long do_nmi_op(unsigned int cmd, void *arg)
  158.16 +{
  158.17 +    struct vcpu *v = current;
  158.18 +    struct domain *d = current->domain;
  158.19 +    long rc = 0;
  158.20 +
  158.21 +    switch ( cmd )
  158.22 +    {
  158.23 +    case XENNMI_register_callback:
  158.24 +        if ( (d->domain_id != 0) || (v->vcpu_id != 0) )
  158.25 +        { 
  158.26 +           rc = -EINVAL;
  158.27 +        }
  158.28 +        else
  158.29 +        {
  158.30 +            v->nmi_addr = (unsigned long)arg;
  158.31 +#ifdef CONFIG_X86
  158.32 +            /*
  158.33 +             * If no handler was registered we can 'lose the NMI edge'.
  158.34 +             * Re-assert it now.
  158.35 +             */
  158.36 +            if ( d->shared_info->arch.nmi_reason != 0 )
  158.37 +                set_bit(_VCPUF_nmi_pending, &v->vcpu_flags);
  158.38 +#endif
  158.39 +        }
  158.40 +        break;
  158.41 +    case XENNMI_unregister_callback:
  158.42 +        v->nmi_addr = 0;
  158.43 +        break;
  158.44 +    default:
  158.45 +        rc = -ENOSYS;
  158.46 +        break;
  158.47 +    }
  158.48 +
  158.49 +    return rc;
  158.50 +}
  158.51 +
  158.52  long do_vm_assist(unsigned int cmd, unsigned int type)
  158.53  {
  158.54      return vm_assist(current->domain, cmd, type);
   159.1 --- a/xen/common/keyhandler.c	Fri Jan 13 10:38:44 2006 -0600
   159.2 +++ b/xen/common/keyhandler.c	Fri Jan 13 14:12:24 2006 -0600
   159.3 @@ -97,13 +97,22 @@ static void halt_machine(unsigned char k
   159.4      machine_restart(NULL); 
   159.5  }
   159.6  
   159.7 -static void do_task_queues(unsigned char key)
   159.8 +static void cpuset_print(char *set, int size, cpumask_t mask)
   159.9 +{
  159.10 +    *set++ = '{';
  159.11 +    set += cpulist_scnprintf(set, size-2, mask);
  159.12 +    *set++ = '}';
  159.13 +    *set++ = '\0';
  159.14 +}
  159.15 +
  159.16 +static void dump_domains(unsigned char key)
  159.17  {
  159.18      struct domain *d;
  159.19      struct vcpu   *v;
  159.20      s_time_t       now = NOW();
  159.21 +    char           cpuset[100];
  159.22  
  159.23 -    printk("'%c' pressed -> dumping task queues (now=0x%X:%08X)\n", key,
  159.24 +    printk("'%c' pressed -> dumping domain info (now=0x%X:%08X)\n", key,
  159.25             (u32)(now>>32), (u32)now); 
  159.26  
  159.27      read_lock(&domlist_lock);
  159.28 @@ -111,9 +120,11 @@ static void do_task_queues(unsigned char
  159.29      for_each_domain ( d )
  159.30      {
  159.31          printk("General information for domain %u:\n", d->domain_id);
  159.32 -        printk("    flags=%lx refcnt=%d nr_pages=%d xenheap_pages=%d\n",
  159.33 +        cpuset_print(cpuset, sizeof(cpuset), d->domain_dirty_cpumask);
  159.34 +        printk("    flags=%lx refcnt=%d nr_pages=%d xenheap_pages=%d "
  159.35 +               "dirty_cpus=%s\n",
  159.36                 d->domain_flags, atomic_read(&d->refcnt),
  159.37 -               d->tot_pages, d->xenheap_pages);
  159.38 +               d->tot_pages, d->xenheap_pages, cpuset);
  159.39          printk("    handle=%02x%02x%02x%02x-%02x%02x-%02x%02x-"
  159.40                 "%02x%02x-%02x%02x%02x%02x%02x%02x\n",
  159.41                 d->handle[ 0], d->handle[ 1], d->handle[ 2], d->handle[ 3],
  159.42 @@ -129,12 +140,16 @@ static void do_task_queues(unsigned char
  159.43                 d->domain_id);
  159.44          for_each_vcpu ( d, v ) {
  159.45              printk("    VCPU%d: CPU%d [has=%c] flags=%lx "
  159.46 -                   "upcall_pend = %02x, upcall_mask = %02x\n",
  159.47 +                   "upcall_pend = %02x, upcall_mask = %02x ",
  159.48                     v->vcpu_id, v->processor,
  159.49                     test_bit(_VCPUF_running, &v->vcpu_flags) ? 'T':'F',
  159.50                     v->vcpu_flags,
  159.51                     v->vcpu_info->evtchn_upcall_pending, 
  159.52                     v->vcpu_info->evtchn_upcall_mask);
  159.53 +            cpuset_print(cpuset, sizeof(cpuset), v->vcpu_dirty_cpumask);
  159.54 +            printk("dirty_cpus=%s ", cpuset);
  159.55 +            cpuset_print(cpuset, sizeof(cpuset), v->cpu_affinity);
  159.56 +            printk("cpu_affinity=%s\n", cpuset);
  159.57              printk("    Notifying guest (virq %d, port %d, stat %d/%d/%d)\n",
  159.58                     VIRQ_DEBUG, v->virq_to_evtchn[VIRQ_DEBUG],
  159.59                     test_bit(v->virq_to_evtchn[VIRQ_DEBUG], 
  159.60 @@ -170,6 +185,27 @@ void do_debug_key(unsigned char key, str
  159.61                               bit. */
  159.62  }
  159.63  
  159.64 +void do_nmi_stats(unsigned char key)
  159.65 +{
  159.66 +    int i;
  159.67 +    struct domain *d;
  159.68 +    struct vcpu *v;
  159.69 +    printk("CPU\tNMI\n");
  159.70 +    for_each_cpu(i)
  159.71 +        printk("%3d\t%3d\n", i, nmi_count(i));
  159.72 +
  159.73 +    if ((d = dom0) == NULL)
  159.74 +        return;
  159.75 +    if ((v = d->vcpu[0]) == NULL)
  159.76 +        return;
  159.77 +    if (v->vcpu_flags & (VCPUF_nmi_pending|VCPUF_nmi_masked))
  159.78 +        printk("dom0 vpu0: NMI %s%s\n",
  159.79 +               v->vcpu_flags & VCPUF_nmi_pending ? "pending " : "",
  159.80 +               v->vcpu_flags & VCPUF_nmi_masked ? "masked " : "");
  159.81 +    else
  159.82 +        printk("dom0 vcpu0: NMI neither pending nor masked\n");
  159.83 +}
  159.84 +
  159.85  #ifndef NDEBUG
  159.86  void debugtrace_key(unsigned char key)
  159.87  {
  159.88 @@ -193,11 +229,12 @@ void initialize_keytable(void)
  159.89      register_keyhandler(
  159.90          'L', reset_sched_histo, "reset sched latency histogram");
  159.91      register_keyhandler(
  159.92 -        'q', do_task_queues, "dump task queues + guest state");
  159.93 +        'q', dump_domains, "dump domain (and guest debug) info");
  159.94      register_keyhandler(
  159.95          'r', dump_runq,      "dump run queues");
  159.96      register_irq_keyhandler(
  159.97          'R', halt_machine,   "reboot machine"); 
  159.98 +    register_keyhandler('N', do_nmi_stats,   "NMI statistics");
  159.99  
 159.100  #ifndef NDEBUG
 159.101      register_keyhandler(
   160.1 --- a/xen/common/memory.c	Fri Jan 13 10:38:44 2006 -0600
   160.2 +++ b/xen/common/memory.c	Fri Jan 13 14:12:24 2006 -0600
   160.3 @@ -38,10 +38,7 @@ increase_reservation(
   160.4  
   160.5      if ( (extent_order != 0) &&
   160.6           !multipage_allocation_permitted(current->domain) )
   160.7 -    {
   160.8 -        DPRINTK("Only I/O-capable domains may allocate multi-page extents.\n");
   160.9          return 0;
  160.10 -    }
  160.11  
  160.12      for ( i = 0; i < nr_extents; i++ )
  160.13      {
   161.1 --- a/xen/common/page_alloc.c	Fri Jan 13 10:38:44 2006 -0600
   161.2 +++ b/xen/common/page_alloc.c	Fri Jan 13 14:12:24 2006 -0600
   161.3 @@ -615,7 +615,7 @@ void free_domheap_pages(struct pfn_info 
   161.4              shadow_drop_references(d, &pg[i]);
   161.5              ASSERT((pg[i].u.inuse.type_info & PGT_count_mask) == 0);
   161.6              pg[i].tlbflush_timestamp  = tlbflush_current_time();
   161.7 -            pg[i].u.free.cpumask      = d->cpumask;
   161.8 +            pg[i].u.free.cpumask      = d->domain_dirty_cpumask;
   161.9              list_del(&pg[i].list);
  161.10          }
  161.11  
   162.1 --- a/xen/common/sched_bvt.c	Fri Jan 13 10:38:44 2006 -0600
   162.2 +++ b/xen/common/sched_bvt.c	Fri Jan 13 14:12:24 2006 -0600
   162.3 @@ -20,7 +20,7 @@
   162.4  #include <xen/delay.h>
   162.5  #include <xen/event.h>
   162.6  #include <xen/time.h>
   162.7 -#include <xen/ac_timer.h>
   162.8 +#include <xen/timer.h>
   162.9  #include <xen/perfc.h>
  162.10  #include <xen/sched-if.h>
  162.11  #include <xen/softirq.h>
  162.12 @@ -31,7 +31,8 @@ struct bvt_vcpu_info
  162.13      struct list_head    run_list;         /* runqueue list pointers */
  162.14      u32                 avt;              /* actual virtual time */
  162.15      u32                 evt;              /* effective virtual time */
  162.16 -    struct vcpu  *vcpu;
  162.17 +    int                 migrated;         /* migrated to a new CPU */
  162.18 +    struct vcpu         *vcpu;
  162.19      struct bvt_dom_info *inf;
  162.20  };
  162.21  
  162.22 @@ -44,9 +45,9 @@ struct bvt_dom_info
  162.23                                               limits*/
  162.24      s32                 warp_value;       /* virtual time warp */
  162.25      s_time_t            warpl;            /* warp limit */
  162.26 -    struct ac_timer     warp_timer;       /* deals with warpl */
  162.27 +    struct timer        warp_timer;       /* deals with warpl */
  162.28      s_time_t            warpu;            /* unwarp time requirement */
  162.29 -    struct ac_timer     unwarp_timer;     /* deals with warpu */
  162.30 +    struct timer        unwarp_timer;     /* deals with warpu */
  162.31  
  162.32      struct bvt_vcpu_info vcpu_inf[MAX_VIRT_CPUS];
  162.33  };
  162.34 @@ -97,9 +98,9 @@ static inline int __task_on_runqueue(str
  162.35  static void warp_timer_fn(void *data)
  162.36  {
  162.37      struct bvt_dom_info *inf = data;
  162.38 -    unsigned int cpu = inf->domain->vcpu[0]->processor;
  162.39 -    
  162.40 -    spin_lock_irq(&schedule_data[cpu].schedule_lock);
  162.41 +    struct vcpu *v = inf->domain->vcpu[0];
  162.42 +
  162.43 +    vcpu_schedule_lock_irq(v);
  162.44  
  162.45      inf->warp = 0;
  162.46  
  162.47 @@ -107,28 +108,28 @@ static void warp_timer_fn(void *data)
  162.48      if ( inf->warpu == 0 )
  162.49      {
  162.50          inf->warpback = 0;
  162.51 -        cpu_raise_softirq(cpu, SCHEDULE_SOFTIRQ);   
  162.52 +        cpu_raise_softirq(v->processor, SCHEDULE_SOFTIRQ);   
  162.53      }
  162.54      
  162.55 -    set_ac_timer(&inf->unwarp_timer, NOW() + inf->warpu);
  162.56 +    set_timer(&inf->unwarp_timer, NOW() + inf->warpu);
  162.57  
  162.58 -    spin_unlock_irq(&schedule_data[cpu].schedule_lock);
  162.59 +    vcpu_schedule_unlock_irq(v);
  162.60  }
  162.61  
  162.62  static void unwarp_timer_fn(void *data)
  162.63  {
  162.64      struct bvt_dom_info *inf = data;
  162.65 -    unsigned int cpu = inf->domain->vcpu[0]->processor;
  162.66 +    struct vcpu *v = inf->domain->vcpu[0];
  162.67  
  162.68 -    spin_lock_irq(&schedule_data[cpu].schedule_lock);
  162.69 +    vcpu_schedule_lock_irq(v);
  162.70  
  162.71      if ( inf->warpback )
  162.72      {
  162.73          inf->warp = 1;
  162.74 -        cpu_raise_softirq(cpu, SCHEDULE_SOFTIRQ);   
  162.75 +        cpu_raise_softirq(v->processor, SCHEDULE_SOFTIRQ);   
  162.76      }
  162.77       
  162.78 -    spin_unlock_irq(&schedule_data[cpu].schedule_lock);
  162.79 +    vcpu_schedule_unlock_irq(v);
  162.80  }
  162.81  
  162.82  static inline u32 calc_avt(struct vcpu *d, s_time_t now)
  162.83 @@ -167,6 +168,7 @@ static inline u32 calc_evt(struct vcpu *
  162.84  static int bvt_alloc_task(struct vcpu *v)
  162.85  {
  162.86      struct domain *d = v->domain;
  162.87 +    struct bvt_dom_info *inf;
  162.88  
  162.89      if ( (d->sched_priv == NULL) )
  162.90      {
  162.91 @@ -175,32 +177,12 @@ static int bvt_alloc_task(struct vcpu *v
  162.92          memset(d->sched_priv, 0, sizeof(struct bvt_dom_info));
  162.93      }
  162.94  
  162.95 -    v->sched_priv = &BVT_INFO(d)->vcpu_inf[v->vcpu_id];
  162.96 -
  162.97 -    BVT_INFO(d)->vcpu_inf[v->vcpu_id].inf = BVT_INFO(d);
  162.98 -    BVT_INFO(d)->vcpu_inf[v->vcpu_id].vcpu = v;
  162.99 -
 162.100 -    return 0;
 162.101 -}
 162.102 +    inf = BVT_INFO(d);
 162.103  
 162.104 -/*
 162.105 - * Add and remove a domain
 162.106 - */
 162.107 -static void bvt_add_task(struct vcpu *v) 
 162.108 -{
 162.109 -    struct bvt_dom_info *inf = BVT_INFO(v->domain);
 162.110 -    struct bvt_vcpu_info *einf = EBVT_INFO(v);
 162.111 -    ASSERT(inf != NULL);
 162.112 -    ASSERT(v   != NULL);
 162.113 +    v->sched_priv = &inf->vcpu_inf[v->vcpu_id];
 162.114  
 162.115 -    /* Allocate per-CPU context if this is the first domain to be added. */
 162.116 -    if ( CPU_INFO(v->processor) == NULL )
 162.117 -    {
 162.118 -        schedule_data[v->processor].sched_priv = xmalloc(struct bvt_cpu_info);
 162.119 -        BUG_ON(CPU_INFO(v->processor) == NULL);
 162.120 -        INIT_LIST_HEAD(RUNQUEUE(v->processor));
 162.121 -        CPU_SVT(v->processor) = 0;
 162.122 -    }
 162.123 +    inf->vcpu_inf[v->vcpu_id].inf  = BVT_INFO(d);
 162.124 +    inf->vcpu_inf[v->vcpu_id].vcpu = v;
 162.125  
 162.126      if ( v->vcpu_id == 0 )
 162.127      {
 162.128 @@ -213,13 +195,30 @@ static void bvt_add_task(struct vcpu *v)
 162.129          inf->warpl       = MILLISECS(2000);
 162.130          inf->warpu       = MILLISECS(1000);
 162.131          /* Initialise the warp timers. */
 162.132 -        init_ac_timer(&inf->warp_timer, warp_timer_fn, inf, v->processor);
 162.133 -        init_ac_timer(&inf->unwarp_timer, unwarp_timer_fn, inf, v->processor);
 162.134 +        init_timer(&inf->warp_timer, warp_timer_fn, inf, v->processor);
 162.135 +        init_timer(&inf->unwarp_timer, unwarp_timer_fn, inf, v->processor);
 162.136      }
 162.137  
 162.138 -    einf->vcpu = v;
 162.139 +    return 0;
 162.140 +}
 162.141 +
 162.142 +/*
 162.143 + * Add and remove a domain
 162.144 + */
 162.145 +static void bvt_add_task(struct vcpu *v) 
 162.146 +{
 162.147 +    struct bvt_vcpu_info *einf = EBVT_INFO(v);
 162.148  
 162.149 -    if ( is_idle_task(v->domain) )
 162.150 +    /* Allocate per-CPU context if this is the first domain to be added. */
 162.151 +    if ( CPU_INFO(v->processor) == NULL )
 162.152 +    {
 162.153 +        schedule_data[v->processor].sched_priv = xmalloc(struct bvt_cpu_info);
 162.154 +        BUG_ON(CPU_INFO(v->processor) == NULL);
 162.155 +        INIT_LIST_HEAD(RUNQUEUE(v->processor));
 162.156 +        CPU_SVT(v->processor) = 0;
 162.157 +    }
 162.158 +
 162.159 +    if ( is_idle_vcpu(v) )
 162.160      {
 162.161          einf->avt = einf->evt = ~0U;
 162.162          BUG_ON(__task_on_runqueue(v));
 162.163 @@ -250,9 +249,11 @@ static void bvt_wake(struct vcpu *v)
 162.164  
 162.165      /* Set the BVT parameters. AVT should always be updated 
 162.166         if CPU migration ocurred.*/
 162.167 -    if ( einf->avt < CPU_SVT(cpu) || 
 162.168 -         unlikely(test_bit(_VCPUF_cpu_migrated, &v->vcpu_flags)) )
 162.169 +    if ( (einf->avt < CPU_SVT(cpu)) || einf->migrated )
 162.170 +    {
 162.171          einf->avt = CPU_SVT(cpu);
 162.172 +        einf->migrated = 0;
 162.173 +    }
 162.174  
 162.175      /* Deal with warping here. */
 162.176      einf->evt = calc_evt(v, einf->avt);
 162.177 @@ -265,29 +266,51 @@ static void bvt_wake(struct vcpu *v)
 162.178          ((einf->evt - curr_evt) / BVT_INFO(curr->domain)->mcu_advance) +
 162.179          ctx_allow;
 162.180  
 162.181 -    if ( is_idle_task(curr->domain) || (einf->evt <= curr_evt) )
 162.182 +    if ( is_idle_vcpu(curr) || (einf->evt <= curr_evt) )
 162.183          cpu_raise_softirq(cpu, SCHEDULE_SOFTIRQ);
 162.184      else if ( schedule_data[cpu].s_timer.expires > r_time )
 162.185 -        set_ac_timer(&schedule_data[cpu].s_timer, r_time);
 162.186 +        set_timer(&schedule_data[cpu].s_timer, r_time);
 162.187  }
 162.188  
 162.189  
 162.190  static void bvt_sleep(struct vcpu *v)
 162.191  {
 162.192 -    if ( test_bit(_VCPUF_running, &v->vcpu_flags) )
 162.193 +    if ( schedule_data[v->processor].curr == v )
 162.194          cpu_raise_softirq(v->processor, SCHEDULE_SOFTIRQ);
 162.195      else  if ( __task_on_runqueue(v) )
 162.196          __del_from_runqueue(v);
 162.197  }
 162.198  
 162.199 +
 162.200 +static int bvt_set_affinity(struct vcpu *v, cpumask_t *affinity)
 162.201 +{
 162.202 +    if ( v == current )
 162.203 +        return cpu_isset(v->processor, *affinity) ? 0 : -EBUSY;
 162.204 +
 162.205 +    vcpu_pause(v);
 162.206 +    v->cpu_affinity = *affinity;
 162.207 +    v->processor = first_cpu(v->cpu_affinity);
 162.208 +    EBVT_INFO(v)->migrated = 1;
 162.209 +    vcpu_unpause(v);
 162.210 +
 162.211 +    return 0;
 162.212 +}
 162.213 +
 162.214 +
 162.215  /**
 162.216   * bvt_free_task - free BVT private structures for a task
 162.217   * @d:             task
 162.218   */
 162.219  static void bvt_free_task(struct domain *d)
 162.220  {
 162.221 -    ASSERT(d->sched_priv != NULL);
 162.222 -    xfree(d->sched_priv);
 162.223 +    struct bvt_dom_info *inf = BVT_INFO(d);
 162.224 +
 162.225 +    ASSERT(inf != NULL);
 162.226 +
 162.227 +    kill_timer(&inf->warp_timer);
 162.228 +    kill_timer(&inf->unwarp_timer);
 162.229 +
 162.230 +    xfree(inf);
 162.231  }
 162.232  
 162.233  /* Control the scheduler. */
 162.234 @@ -336,10 +359,10 @@ static int bvt_adjdom(
 162.235          inf->warpu = MILLISECS(warpu);
 162.236          
 162.237          /* If the unwarp timer set up it needs to be removed */
 162.238 -        rem_ac_timer(&inf->unwarp_timer);
 162.239 +        stop_timer(&inf->unwarp_timer);
 162.240          /* If we stop warping the warp timer needs to be removed */
 162.241          if ( !warpback )
 162.242 -            rem_ac_timer(&inf->warp_timer);
 162.243 +            stop_timer(&inf->warp_timer);
 162.244      }
 162.245      else if ( cmd->direction == SCHED_INFO_GET )
 162.246      {
 162.247 @@ -380,17 +403,17 @@ static struct task_slice bvt_do_schedule
 162.248      ASSERT(prev_einf != NULL);
 162.249      ASSERT(__task_on_runqueue(prev));
 162.250  
 162.251 -    if ( likely(!is_idle_task(prev->domain)) ) 
 162.252 +    if ( likely(!is_idle_vcpu(prev)) )
 162.253      {
 162.254          prev_einf->avt = calc_avt(prev, now);
 162.255          prev_einf->evt = calc_evt(prev, prev_einf->avt);
 162.256         
 162.257          if(prev_inf->warpback && prev_inf->warpl > 0)
 162.258 -            rem_ac_timer(&prev_inf->warp_timer);
 162.259 +            stop_timer(&prev_inf->warp_timer);
 162.260          
 162.261          __del_from_runqueue(prev);
 162.262          
 162.263 -        if ( domain_runnable(prev) )
 162.264 +        if ( vcpu_runnable(prev) )
 162.265              __add_to_runqueue_tail(prev);
 162.266      }
 162.267  
 162.268 @@ -436,7 +459,7 @@ static struct task_slice bvt_do_schedule
 162.269      }
 162.270      
 162.271      if ( next_einf->inf->warp && next_einf->inf->warpl > 0 )
 162.272 -        set_ac_timer(&next_einf->inf->warp_timer, now + next_einf->inf->warpl);
 162.273 +        set_timer(&next_einf->inf->warp_timer, now + next_einf->inf->warpl);
 162.274     
 162.275      /* Extract the domain pointers from the dom infos */
 162.276      next        = next_einf->vcpu;
 162.277 @@ -471,13 +494,13 @@ static struct task_slice bvt_do_schedule
 162.278      }
 162.279  
 162.280      /* work out time for next run through scheduler */
 162.281 -    if ( is_idle_task(next->domain) ) 
 162.282 +    if ( is_idle_vcpu(next) )
 162.283      {
 162.284          r_time = ctx_allow;
 162.285          goto sched_done;
 162.286      }
 162.287  
 162.288 -    if ( (next_prime == NULL) || is_idle_task(next_prime->domain) )
 162.289 +    if ( (next_prime == NULL) || is_idle_vcpu(next_prime) )
 162.290      {
 162.291          /* We have only one runnable task besides the idle task. */
 162.292          r_time = 10 * ctx_allow;     /* RN: random constant */
 162.293 @@ -557,6 +580,7 @@ struct scheduler sched_bvt_def = {
 162.294      .dump_cpu_state = bvt_dump_cpu_state,
 162.295      .sleep          = bvt_sleep,
 162.296      .wake           = bvt_wake,
 162.297 +    .set_affinity   = bvt_set_affinity
 162.298  };
 162.299  
 162.300  /*
   163.1 --- a/xen/common/sched_sedf.c	Fri Jan 13 10:38:44 2006 -0600
   163.2 +++ b/xen/common/sched_sedf.c	Fri Jan 13 14:12:24 2006 -0600
   163.3 @@ -9,7 +9,7 @@
   163.4  #include <xen/sched.h>
   163.5  #include <xen/sched-if.h>
   163.6  #include <public/sched_ctl.h>
   163.7 -#include <xen/ac_timer.h>
   163.8 +#include <xen/timer.h>
   163.9  #include <xen/softirq.h>
  163.10  #include <xen/time.h>
  163.11  
  163.12 @@ -325,22 +325,30 @@ DOMAIN_COMPARER(runq, list, d1->deadl_ab
  163.13      list_insert_sort(RUNQ(d->processor), LIST(d), runq_comp);
  163.14  }
  163.15  
  163.16 +
  163.17  /* Allocates memory for per domain private scheduling data*/
  163.18 -static int sedf_alloc_task(struct vcpu *d) {
  163.19 -    PRINT(2,"sedf_alloc_task was called, domain-id %i.%i\n",d->domain->domain_id,
  163.20 -          d->vcpu_id);
  163.21 -    if (d->domain->sched_priv == NULL) {
  163.22 -        if ((d->domain->sched_priv = 
  163.23 -             xmalloc(struct sedf_dom_info)) == NULL )
  163.24 +static int sedf_alloc_task(struct vcpu *d)
  163.25 +{
  163.26 +    PRINT(2, "sedf_alloc_task was called, domain-id %i.%i\n",
  163.27 +          d->domain->domain_id, d->vcpu_id);
  163.28 +
  163.29 +    if ( d->domain->sched_priv == NULL )
  163.30 +    {
  163.31 +        d->domain->sched_priv = xmalloc(struct sedf_dom_info);
  163.32 +        if ( d->domain->sched_priv == NULL )
  163.33              return -1;
  163.34          memset(d->domain->sched_priv, 0, sizeof(struct sedf_dom_info));
  163.35      }
  163.36 -    if ((d->sched_priv = xmalloc(struct sedf_vcpu_info)) == NULL )
  163.37 +
  163.38 +    if ( (d->sched_priv = xmalloc(struct sedf_vcpu_info)) == NULL )
  163.39          return -1;
  163.40 +
  163.41      memset(d->sched_priv, 0, sizeof(struct sedf_vcpu_info));
  163.42 +
  163.43      return 0;
  163.44  }
  163.45  
  163.46 +
  163.47  /* Setup the sedf_dom_info */
  163.48  static void sedf_add_task(struct vcpu *d)
  163.49  {
  163.50 @@ -363,14 +371,17 @@ static void sedf_add_task(struct vcpu *d
  163.51          INIT_LIST_HEAD(EXTRAQ(d->processor,EXTRA_UTIL_Q));
  163.52      }
  163.53         
  163.54 -    if (d->domain->domain_id==0) {
  163.55 +    if ( d->domain->domain_id == 0 )
  163.56 +    {
  163.57          /*set dom0 to something useful to boot the machine*/
  163.58          inf->period    = MILLISECS(20);
  163.59          inf->slice     = MILLISECS(15);
  163.60          inf->latency   = 0;
  163.61          inf->deadl_abs = 0;
  163.62          inf->status     = EXTRA_AWARE | SEDF_ASLEEP;
  163.63 -    } else {
  163.64 +    }
  163.65 +    else
  163.66 +    {
  163.67          /*other domains run in best effort mode*/
  163.68          inf->period    = WEIGHT_PERIOD;
  163.69          inf->slice     = 0;
  163.70 @@ -379,14 +390,18 @@ static void sedf_add_task(struct vcpu *d
  163.71          inf->status     = EXTRA_AWARE | SEDF_ASLEEP;
  163.72          inf->extraweight = 1;
  163.73      }
  163.74 +
  163.75      inf->period_orig = inf->period; inf->slice_orig = inf->slice;
  163.76      INIT_LIST_HEAD(&(inf->list));
  163.77      INIT_LIST_HEAD(&(inf->extralist[EXTRA_PEN_Q]));
  163.78      INIT_LIST_HEAD(&(inf->extralist[EXTRA_UTIL_Q]));
  163.79   
  163.80 -    if (!is_idle_task(d->domain)) {
  163.81 +    if ( !is_idle_vcpu(d) )
  163.82 +    {
  163.83          extraq_check(d);
  163.84 -    } else {
  163.85 +    }
  163.86 +    else
  163.87 +    {
  163.88          EDOM_INFO(d)->deadl_abs = 0;
  163.89          EDOM_INFO(d)->status &= ~SEDF_ASLEEP;
  163.90      }
  163.91 @@ -396,19 +411,28 @@ static void sedf_add_task(struct vcpu *d
  163.92  static void sedf_free_task(struct domain *d)
  163.93  {
  163.94      int i;
  163.95 +
  163.96      PRINT(2,"sedf_free_task was called, domain-id %i\n",d->domain_id);
  163.97 +
  163.98      ASSERT(d->sched_priv != NULL);
  163.99      xfree(d->sched_priv);
 163.100   
 163.101 -    for (i = 0; i < MAX_VIRT_CPUS; i++)
 163.102 -        if ( d->vcpu[i] ) {
 163.103 +    for ( i = 0; i < MAX_VIRT_CPUS; i++ )
 163.104 +    {
 163.105 +        if ( d->vcpu[i] )
 163.106 +        {
 163.107              ASSERT(d->vcpu[i]->sched_priv != NULL);
 163.108              xfree(d->vcpu[i]->sched_priv);
 163.109          }
 163.110 +    }
 163.111  }
 163.112  
 163.113 -/* handles the rescheduling, bookkeeping of domains running in their realtime-time :)*/
 163.114 -static inline void desched_edf_dom (s_time_t now, struct vcpu* d) {
 163.115 +/*
 163.116 + * Handles the rescheduling & bookkeeping of domains running in their
 163.117 + * guaranteed timeslice.
 163.118 + */
 163.119 +static void desched_edf_dom(s_time_t now, struct vcpu* d)
 163.120 +{
 163.121      struct sedf_vcpu_info* inf = EDOM_INFO(d);
 163.122      /*current domain is running in real time mode*/
 163.123   
 163.124 @@ -418,27 +442,30 @@ static inline void desched_edf_dom (s_ti
 163.125  
 163.126      /*scheduling decisions, which don't remove the running domain
 163.127        from the runq*/
 163.128 -    if ((inf->cputime < inf->slice) && sedf_runnable(d))
 163.129 +    if ( (inf->cputime < inf->slice) && sedf_runnable(d) )
 163.130          return;
 163.131    
 163.132      __del_from_queue(d);
 163.133    
 163.134      /*manage bookkeeping (i.e. calculate next deadline,
 163.135        memorize overun-time of slice) of finished domains*/
 163.136 -    if (inf->cputime >= inf->slice) {
 163.137 +    if ( inf->cputime >= inf->slice )
 163.138 +    {
 163.139          inf->cputime -= inf->slice;
 163.140    
 163.141 -        if (inf->period < inf->period_orig) {
 163.142 +        if ( inf->period < inf->period_orig )
 163.143 +        {
 163.144              /*this domain runs in latency scaling or burst mode*/
 163.145  #if (UNBLOCK == UNBLOCK_BURST)
 163.146              /*if we are runnig in burst scaling wait for two periods
 163.147                before scaling periods up again*/ 
 163.148 -            if (now - inf->unblock_abs >= 2 * inf->period)
 163.149 +            if ( (now - inf->unblock_abs) >= (2 * inf->period) )
 163.150  #endif
 163.151              {
 163.152                  inf->period *= 2; inf->slice *= 2;
 163.153 -                if ((inf->period > inf->period_orig) ||
 163.154 -                    (inf->slice > inf->slice_orig)) {
 163.155 +                if ( (inf->period > inf->period_orig) ||
 163.156 +                     (inf->slice > inf->slice_orig) )
 163.157 +                {
 163.158                      /*reset slice & period*/
 163.159                      inf->period = inf->period_orig;
 163.160                      inf->slice = inf->slice_orig;
 163.161 @@ -450,36 +477,46 @@ static inline void desched_edf_dom (s_ti
 163.162      }
 163.163   
 163.164      /*add a runnable domain to the waitqueue*/
 163.165 -    if (sedf_runnable(d))
 163.166 +    if ( sedf_runnable(d) )
 163.167 +    {
 163.168          __add_to_waitqueue_sort(d);
 163.169 -    else {
 163.170 +    }
 163.171 +    else
 163.172 +    {
 163.173          /*we have a blocked realtime task -> remove it from exqs too*/
 163.174  #if (EXTRA > EXTRA_OFF)
 163.175  #if (EXTRA == EXTRA_BLOCK_WEIGHT)
 163.176 -        if (extraq_on(d, EXTRA_PEN_Q)) extraq_del(d, EXTRA_PEN_Q);
 163.177 +        if ( extraq_on(d, EXTRA_PEN_Q) )
 163.178 +            extraq_del(d, EXTRA_PEN_Q);
 163.179  #endif
 163.180 -        if (extraq_on(d, EXTRA_UTIL_Q)) extraq_del(d, EXTRA_UTIL_Q);
 163.181 +        if ( extraq_on(d, EXTRA_UTIL_Q) )
 163.182 +            extraq_del(d, EXTRA_UTIL_Q);
 163.183  #endif
 163.184      }
 163.185 +
 163.186      ASSERT(EQ(sedf_runnable(d), __task_on_queue(d)));
 163.187      ASSERT(IMPLY(extraq_on(d, EXTRA_UTIL_Q) || extraq_on(d, EXTRA_PEN_Q), 
 163.188                   sedf_runnable(d)));
 163.189  }
 163.190  
 163.191 +
 163.192  /* Update all elements on the queues */
 163.193 -static inline void update_queues(s_time_t now, struct list_head* runq, 
 163.194 -                                 struct list_head* waitq) {
 163.195 -    struct list_head     *cur,*tmp;
 163.196 +static void update_queues(
 163.197 +    s_time_t now, struct list_head *runq, struct list_head *waitq)
 163.198 +{
 163.199 +    struct list_head     *cur, *tmp;
 163.200      struct sedf_vcpu_info *curinf;
 163.201   
 163.202      PRINT(3,"Updating waitq..\n");
 163.203 +
 163.204      /*check for the first elements of the waitqueue, whether their
 163.205        next period has already started*/
 163.206      list_for_each_safe(cur, tmp, waitq) {
 163.207          curinf = list_entry(cur, struct sedf_vcpu_info, list);
 163.208          PRINT(4,"\tLooking @ dom %i.%i\n",
 163.209                curinf->vcpu->domain->domain_id, curinf->vcpu->vcpu_id);
 163.210 -        if (PERIOD_BEGIN(curinf) <= now) {
 163.211 +        if ( PERIOD_BEGIN(curinf) <= now )
 163.212 +        {
 163.213              __del_from_queue(curinf->vcpu);
 163.214              __add_to_runqueue_sort(curinf->vcpu);
 163.215          }
 163.216 @@ -488,13 +525,16 @@ static inline void update_queues(s_time_
 163.217      }
 163.218   
 163.219      PRINT(3,"Updating runq..\n");
 163.220 +
 163.221      /*process the runq, find domains that are on
 163.222        the runqueue which shouldn't be there*/
 163.223      list_for_each_safe(cur, tmp, runq) {
 163.224          curinf = list_entry(cur,struct sedf_vcpu_info,list);
 163.225          PRINT(4,"\tLooking @ dom %i.%i\n",
 163.226                curinf->vcpu->domain->domain_id, curinf->vcpu->vcpu_id);
 163.227 -        if (unlikely(curinf->slice == 0)) {
 163.228 +
 163.229 +        if ( unlikely(curinf->slice == 0) )
 163.230 +        {
 163.231              /*ignore domains with empty slice*/
 163.232              PRINT(4,"\tUpdating zero-slice domain %i.%i\n",
 163.233                    curinf->vcpu->domain->domain_id,
 163.234 @@ -504,7 +544,8 @@ static inline void update_queues(s_time_
 163.235              /*move them to their next period*/
 163.236              curinf->deadl_abs += curinf->period;
 163.237              /*ensure that the start of the next period is in the future*/
 163.238 -            if (unlikely(PERIOD_BEGIN(curinf) < now)) {
 163.239 +            if ( unlikely(PERIOD_BEGIN(curinf) < now) )
 163.240 +            {
 163.241                  curinf->deadl_abs += 
 163.242                      (DIV_UP(now - PERIOD_BEGIN(curinf),
 163.243                             curinf->period)) * curinf->period;
 163.244 @@ -513,8 +554,10 @@ static inline void update_queues(s_time_
 163.245              __add_to_waitqueue_sort(curinf->vcpu);
 163.246              continue;
 163.247          }
 163.248 -        if (unlikely((curinf->deadl_abs < now) ||
 163.249 -                     (curinf->cputime > curinf->slice))) {
 163.250 +
 163.251 +        if ( unlikely((curinf->deadl_abs < now) ||
 163.252 +                      (curinf->cputime > curinf->slice)) )
 163.253 +        {
 163.254              /*we missed the deadline or the slice was
 163.255                already finished... might hapen because
 163.256                of dom_adj.*/
 163.257 @@ -550,6 +593,7 @@ static inline void update_queues(s_time_
 163.258      PRINT(3,"done updating the queues\n");
 163.259  }
 163.260  
 163.261 +
 163.262  #if (EXTRA > EXTRA_OFF)
 163.263  /* removes a domain from the head of the according extraQ and
 163.264     requeues it at a specified position:
 163.265 @@ -557,9 +601,10 @@ static inline void update_queues(s_time_
 163.266       weighted ext.: insert in sorted list by score
 163.267     if the domain is blocked / has regained its short-block-loss
 163.268     time it is not put on any queue */
 163.269 -static inline void desched_extra_dom(s_time_t now, struct vcpu* d) {
 163.270 +static void desched_extra_dom(s_time_t now, struct vcpu* d)
 163.271 +{
 163.272      struct sedf_vcpu_info *inf = EDOM_INFO(d);
 163.273 -    int    i    = extra_get_cur_q(inf);
 163.274 +    int i = extra_get_cur_q(inf);
 163.275   
 163.276  #if (EXTRA == EXTRA_SLICE_WEIGHT || EXTRA == EXTRA_BLOCK_WEIGHT)
 163.277      unsigned long         oldscore;
 163.278 @@ -575,14 +620,15 @@ static inline void desched_extra_dom(s_t
 163.279      extraq_del(d, i);
 163.280  
 163.281  #if (EXTRA == EXTRA_ROUNDR)
 163.282 -    if (sedf_runnable(d) && (inf->status & EXTRA_AWARE))
 163.283 +    if ( sedf_runnable(d) && (inf->status & EXTRA_AWARE) )
 163.284          /*add to the tail if it is runnable => round-robin*/
 163.285          extraq_add_tail(d, EXTRA_UTIL_Q);
 163.286  #elif (EXTRA == EXTRA_SLICE_WEIGHT || EXTRA == EXTRA_BLOCK_WEIGHT)
 163.287      /*update the score*/
 163.288 -    oldscore      = inf->score[i];
 163.289 +    oldscore = inf->score[i];
 163.290  #if (EXTRA == EXTRA_BLOCK_WEIGHT)
 163.291 -    if (i == EXTRA_PEN_Q) {
 163.292 +    if ( i == EXTRA_PEN_Q )
 163.293 +    {
 163.294          /*domain was running in L0 extraq*/
 163.295          /*reduce block lost, probably more sophistication here!*/
 163.296          /*inf->short_block_lost_tot -= EXTRA_QUANTUM;*/
 163.297 @@ -605,12 +651,13 @@ static inline void desched_extra_dom(s_t
 163.298          inf->score[EXTRA_PEN_Q] = (inf->period << 10) /
 163.299              inf->short_block_lost_tot;
 163.300          oldscore = 0;
 163.301 -    } else
 163.302 +    }
 163.303 +    else
 163.304  #endif
 163.305      {
 163.306          /*domain was running in L1 extraq => score is inverse of
 163.307            utilization and is used somewhat incremental!*/
 163.308 -        if (!inf->extraweight)
 163.309 +        if ( !inf->extraweight )
 163.310              /*NB: use fixed point arithmetic with 10 bits*/
 163.311              inf->score[EXTRA_UTIL_Q] = (inf->period << 10) /
 163.312                  inf->slice;
 163.313 @@ -619,24 +666,32 @@ static inline void desched_extra_dom(s_t
 163.314                full (ie 100%) utilization is equivalent to 128 extraweight*/
 163.315              inf->score[EXTRA_UTIL_Q] = (1<<17) / inf->extraweight;
 163.316      }
 163.317 +
 163.318   check_extra_queues:
 163.319      /* Adding a runnable domain to the right queue and removing blocked ones*/
 163.320 -    if (sedf_runnable(d)) {
 163.321 +    if ( sedf_runnable(d) )
 163.322 +    {
 163.323          /*add according to score: weighted round robin*/
 163.324          if (((inf->status & EXTRA_AWARE) && (i == EXTRA_UTIL_Q)) ||
 163.325              ((inf->status & EXTRA_WANT_PEN_Q) && (i == EXTRA_PEN_Q)))
 163.326              extraq_add_sort_update(d, i, oldscore);
 163.327      }
 163.328 -    else {
 163.329 +    else
 163.330 +    {
 163.331          /*remove this blocked domain from the waitq!*/
 163.332          __del_from_queue(d);
 163.333  #if (EXTRA == EXTRA_BLOCK_WEIGHT)
 163.334          /*make sure that we remove a blocked domain from the other
 163.335            extraq too*/
 163.336 -        if (i == EXTRA_PEN_Q) {
 163.337 -            if (extraq_on(d, EXTRA_UTIL_Q)) extraq_del(d, EXTRA_UTIL_Q);
 163.338 -        } else {
 163.339 -            if (extraq_on(d, EXTRA_PEN_Q)) extraq_del(d, EXTRA_PEN_Q);
 163.340 +        if ( i == EXTRA_PEN_Q )
 163.341 +        {
 163.342 +            if ( extraq_on(d, EXTRA_UTIL_Q) )
 163.343 +                extraq_del(d, EXTRA_UTIL_Q);
 163.344 +        }
 163.345 +        else
 163.346 +        {
 163.347 +            if ( extraq_on(d, EXTRA_PEN_Q) )
 163.348 +                extraq_del(d, EXTRA_PEN_Q);
 163.349          }
 163.350  #endif
 163.351      }
 163.352 @@ -647,16 +702,21 @@ static inline void desched_extra_dom(s_t
 163.353  }
 163.354  #endif
 163.355  
 163.356 -static inline struct task_slice sedf_do_extra_schedule (s_time_t now,
 163.357 -                                                        s_time_t end_xt, struct list_head *extraq[], int cpu) {
 163.358 +
 163.359 +static struct task_slice sedf_do_extra_schedule(
 163.360 +    s_time_t now, s_time_t end_xt, struct list_head *extraq[], int cpu)
 163.361 +{
 163.362      struct task_slice   ret;
 163.363      struct sedf_vcpu_info *runinf;
 163.364      ASSERT(end_xt > now);
 163.365 +
 163.366      /* Enough time left to use for extratime? */
 163.367 -    if (end_xt - now < EXTRA_QUANTUM)
 163.368 +    if ( end_xt - now < EXTRA_QUANTUM )
 163.369          goto return_idle;
 163.370 +
 163.371  #if (EXTRA == EXTRA_BLOCK_WEIGHT)
 163.372 -    if (!list_empty(extraq[EXTRA_PEN_Q])) {
 163.373 +    if ( !list_empty(extraq[EXTRA_PEN_Q]) )
 163.374 +    {
 163.375          /*we still have elements on the level 0 extraq 
 163.376            => let those run first!*/
 163.377          runinf   = list_entry(extraq[EXTRA_PEN_Q]->next, 
 163.378 @@ -667,9 +727,12 @@ static inline struct task_slice sedf_do_
 163.379  #ifdef SEDF_STATS
 163.380          runinf->pen_extra_slices++;
 163.381  #endif
 163.382 -    } else
 163.383 +    }
 163.384 +    else
 163.385  #endif
 163.386 -        if (!list_empty(extraq[EXTRA_UTIL_Q])) {
 163.387 +    {
 163.388 +        if ( !list_empty(extraq[EXTRA_UTIL_Q]) )
 163.389 +        {
 163.390              /*use elements from the normal extraqueue*/
 163.391              runinf   = list_entry(extraq[EXTRA_UTIL_Q]->next,
 163.392                                    struct sedf_vcpu_info,
 163.393 @@ -680,6 +743,7 @@ static inline struct task_slice sedf_do_
 163.394          }
 163.395          else
 163.396              goto return_idle;
 163.397 +    }
 163.398  
 163.399      ASSERT(ret.time > 0);
 163.400      ASSERT(sedf_runnable(ret.task));
 163.401 @@ -692,6 +756,8 @@ static inline struct task_slice sedf_do_
 163.402      ASSERT(sedf_runnable(ret.task));
 163.403      return ret;
 163.404  }
 163.405 +
 163.406 +
 163.407  /* Main scheduling function
 163.408     Reasons for calling this function are:
 163.409     -timeslice for the current period used up
 163.410 @@ -699,7 +765,7 @@ static inline struct task_slice sedf_do_
 163.411     -and various others ;) in general: determine which domain to run next*/
 163.412  static struct task_slice sedf_do_schedule(s_time_t now)
 163.413  {
 163.414 -    int                   cpu      = current->processor;
 163.415 +    int                   cpu      = smp_processor_id();
 163.416      struct list_head     *runq     = RUNQ(cpu);
 163.417      struct list_head     *waitq    = WAITQ(cpu);
 163.418  #if (EXTRA > EXTRA_OFF)
 163.419 @@ -711,20 +777,21 @@ static struct task_slice sedf_do_schedul
 163.420      struct task_slice      ret;
 163.421  
 163.422      /*idle tasks don't need any of the following stuf*/
 163.423 -    if (is_idle_task(current->domain))
 163.424 +    if ( is_idle_vcpu(current) )
 163.425          goto check_waitq;
 163.426   
 163.427      /* create local state of the status of the domain, in order to avoid
 163.428         inconsistent state during scheduling decisions, because data for
 163.429 -       domain_runnable is not protected by the scheduling lock!*/
 163.430 -    if(!domain_runnable(current))
 163.431 +       vcpu_runnable is not protected by the scheduling lock!*/
 163.432 +    if ( !vcpu_runnable(current) )
 163.433          inf->status |= SEDF_ASLEEP;
 163.434   
 163.435 -    if (inf->status & SEDF_ASLEEP)
 163.436 +    if ( inf->status & SEDF_ASLEEP )
 163.437          inf->block_abs = now;
 163.438  
 163.439  #if (EXTRA > EXTRA_OFF)
 163.440 -    if (unlikely(extra_runs(inf))) {
 163.441 +    if ( unlikely(extra_runs(inf)) )
 163.442 +    {
 163.443          /*special treatment of domains running in extra time*/
 163.444          desched_extra_dom(now, current);
 163.445      }
 163.446 @@ -739,10 +806,12 @@ static struct task_slice sedf_do_schedul
 163.447      /*now simply pick the first domain from the runqueue, which has the
 163.448        earliest deadline, because the list is sorted*/
 163.449   
 163.450 -    if (!list_empty(runq)) {
 163.451 +    if ( !list_empty(runq) )
 163.452 +    {
 163.453          runinf   = list_entry(runq->next,struct sedf_vcpu_info,list);
 163.454          ret.task = runinf->vcpu;
 163.455 -        if (!list_empty(waitq)) {
 163.456 +        if ( !list_empty(waitq) )
 163.457 +        {
 163.458              waitinf  = list_entry(waitq->next,
 163.459                                    struct sedf_vcpu_info,list);
 163.460              /*rerun scheduler, when scheduled domain reaches it's
 163.461 @@ -751,14 +820,16 @@ static struct task_slice sedf_do_schedul
 163.462              ret.time = MIN(now + runinf->slice - runinf->cputime,
 163.463                             PERIOD_BEGIN(waitinf)) - now;
 163.464          }
 163.465 -        else {
 163.466 +        else
 163.467 +        {
 163.468              ret.time = runinf->slice - runinf->cputime;
 163.469          }
 163.470          CHECK(ret.time > 0);
 163.471          goto sched_done;
 163.472      }
 163.473   
 163.474 -    if (!list_empty(waitq)) {
 163.475 +    if ( !list_empty(waitq) )
 163.476 +    {
 163.477          waitinf  = list_entry(waitq->next,struct sedf_vcpu_info, list);
 163.478          /*we could not find any suitable domain 
 163.479            => look for domains that are aware of extratime*/
 163.480 @@ -771,7 +842,8 @@ static struct task_slice sedf_do_schedul
 163.481  #endif
 163.482          CHECK(ret.time > 0);
 163.483      }
 163.484 -    else {
 163.485 +    else
 163.486 +    {
 163.487          /*this could probably never happen, but one never knows...*/
 163.488          /*it can... imagine a second CPU, which is pure scifi ATM,
 163.489            but one never knows ;)*/
 163.490 @@ -782,11 +854,13 @@ static struct task_slice sedf_do_schedul
 163.491   sched_done: 
 163.492      /*TODO: Do something USEFUL when this happens and find out, why it
 163.493        still can happen!!!*/
 163.494 -    if (ret.time<0) {
 163.495 +    if ( ret.time < 0)
 163.496 +    {
 163.497          printk("Ouch! We are seriously BEHIND schedule! %"PRIi64"\n",
 163.498                 ret.time);
 163.499          ret.time = EXTRA_QUANTUM;
 163.500      }
 163.501 +
 163.502      EDOM_INFO(ret.task)->sched_start_abs = now;
 163.503      CHECK(ret.time > 0);
 163.504      ASSERT(sedf_runnable(ret.task));
 163.505 @@ -794,31 +868,37 @@ static struct task_slice sedf_do_schedul
 163.506      return ret;
 163.507  }
 163.508  
 163.509 -static void sedf_sleep(struct vcpu *d) {
 163.510 -    PRINT(2,"sedf_sleep was called, domain-id %i.%i\n",d->domain->domain_id, d->vcpu_id);
 163.511 +
 163.512 +static void sedf_sleep(struct vcpu *d)
 163.513 +{
 163.514 +    PRINT(2,"sedf_sleep was called, domain-id %i.%i\n",
 163.515 +          d->domain->domain_id, d->vcpu_id);
 163.516   
 163.517 -    if (is_idle_task(d->domain))
 163.518 +    if ( is_idle_vcpu(d) )
 163.519          return;
 163.520  
 163.521      EDOM_INFO(d)->status |= SEDF_ASLEEP;
 163.522   
 163.523 -    if ( test_bit(_VCPUF_running, &d->vcpu_flags) ) {
 163.524 +    if ( schedule_data[d->processor].curr == d )
 163.525 +    {
 163.526          cpu_raise_softirq(d->processor, SCHEDULE_SOFTIRQ);
 163.527      }
 163.528 -    else  {
 163.529 +    else
 163.530 +    {
 163.531          if ( __task_on_queue(d) )
 163.532              __del_from_queue(d);
 163.533  #if (EXTRA > EXTRA_OFF)
 163.534 -        if (extraq_on(d, EXTRA_UTIL_Q)) 
 163.535 +        if ( extraq_on(d, EXTRA_UTIL_Q) ) 
 163.536              extraq_del(d, EXTRA_UTIL_Q);
 163.537  #endif
 163.538  #if (EXTRA == EXTRA_BLOCK_WEIGHT)
 163.539 -        if (extraq_on(d, EXTRA_PEN_Q))
 163.540 +        if ( extraq_on(d, EXTRA_PEN_Q) )
 163.541              extraq_del(d, EXTRA_PEN_Q);
 163.542  #endif
 163.543      }
 163.544  }
 163.545  
 163.546 +
 163.547  /* This function wakes up a domain, i.e. moves them into the waitqueue
 163.548   * things to mention are: admission control is taking place nowhere at
 163.549   * the moment, so we can't be sure, whether it is safe to wake the domain
 163.550 @@ -890,17 +970,21 @@ static void sedf_sleep(struct vcpu *d) {
 163.551   *     -either behaviour can lead to missed deadlines in other domains as
 163.552   *      opposed to approaches 1,2a,2b
 163.553   */
 163.554 -static inline void unblock_short_vcons
 163.555 -(struct sedf_vcpu_info* inf, s_time_t now) {
 163.556 +#if (UNBLOCK <= UNBLOCK_SHORT_RESUME)
 163.557 +static void unblock_short_vcons(struct sedf_vcpu_info* inf, s_time_t now)
 163.558 +{
 163.559      inf->deadl_abs += inf->period;
 163.560      inf->cputime = 0;
 163.561  }
 163.562 +#endif
 163.563  
 163.564 -static inline void unblock_short_cons(struct sedf_vcpu_info* inf, s_time_t now)
 163.565 +#if (UNBLOCK == UNBLOCK_SHORT_RESUME)
 163.566 +static void unblock_short_cons(struct sedf_vcpu_info* inf, s_time_t now)
 163.567  {
 163.568      /*treat blocked time as consumed by the domain*/
 163.569      inf->cputime += now - inf->block_abs; 
 163.570 -    if (inf->cputime + EXTRA_QUANTUM > inf->slice) {
 163.571 +    if ( (inf->cputime + EXTRA_QUANTUM) > inf->slice )
 163.572 +    {
 163.573          /*we don't have a reasonable amount of time in 
 163.574            our slice left :( => start in next period!*/
 163.575          unblock_short_vcons(inf, now);
 163.576 @@ -910,8 +994,11 @@ static inline void unblock_short_cons(st
 163.577          inf->short_cont++;
 163.578  #endif
 163.579  }
 163.580 -static inline void unblock_short_extra_support (struct sedf_vcpu_info* inf,
 163.581 -                                                s_time_t now) {
 163.582 +#endif
 163.583 +
 163.584 +static void unblock_short_extra_support(
 163.585 +    struct sedf_vcpu_info* inf, s_time_t now)
 163.586 +{
 163.587      /*this unblocking scheme tries to support the domain, by assigning it
 163.588      a priority in extratime distribution according to the loss of time
 163.589      in this slice due to blocking*/
 163.590 @@ -919,26 +1006,29 @@ static inline void unblock_short_extra_s
 163.591   
 163.592      /*no more realtime execution in this period!*/
 163.593      inf->deadl_abs += inf->period;
 163.594 -    if (likely(inf->block_abs)) {
 163.595 +    if ( likely(inf->block_abs) )
 163.596 +    {
 163.597          //treat blocked time as consumed by the domain*/
 163.598          /*inf->cputime += now - inf->block_abs;*/
 163.599          /*penalty is time the domain would have
 163.600            had if it continued to run */
 163.601          pen = (inf->slice - inf->cputime);
 163.602 -        if (pen < 0) pen = 0;
 163.603 +        if ( pen < 0 )
 163.604 +            pen = 0;
 163.605          /*accumulate all penalties over the periods*/
 163.606          /*inf->short_block_lost_tot += pen;*/
 163.607          /*set penalty to the current value*/
 163.608          inf->short_block_lost_tot = pen;
 163.609          /*not sure which one is better.. but seems to work well...*/
 163.610    
 163.611 -        if (inf->short_block_lost_tot) {
 163.612 +        if ( inf->short_block_lost_tot )
 163.613 +        {
 163.614              inf->score[0] = (inf->period << 10) /
 163.615                  inf->short_block_lost_tot;
 163.616  #ifdef SEDF_STATS
 163.617              inf->pen_extra_blocks++;
 163.618  #endif
 163.619 -            if (extraq_on(inf->vcpu, EXTRA_PEN_Q))
 163.620 +            if ( extraq_on(inf->vcpu, EXTRA_PEN_Q) )
 163.621                  /*remove domain for possible resorting!*/
 163.622                  extraq_del(inf->vcpu, EXTRA_PEN_Q);
 163.623              else
 163.624 @@ -951,36 +1041,53 @@ static inline void unblock_short_extra_s
 163.625              extraq_add_sort_update(inf->vcpu, EXTRA_PEN_Q, 0);
 163.626          }
 163.627      }
 163.628 +
 163.629      /*give it a fresh slice in the next period!*/
 163.630      inf->cputime = 0;
 163.631  }
 163.632 -static inline void unblock_long_vcons(struct sedf_vcpu_info* inf, s_time_t now)
 163.633 +
 163.634 +
 163.635 +#if (UNBLOCK == UNBLOCK_ISOCHRONOUS_EDF)
 163.636 +static void unblock_long_vcons(struct sedf_vcpu_info* inf, s_time_t now)
 163.637  {
 163.638      /* align to next future period */
 163.639      inf->deadl_abs += (DIV_UP(now - inf->deadl_abs, inf->period) +1)
 163.640          * inf->period;
 163.641      inf->cputime = 0;
 163.642  }
 163.643 +#endif
 163.644  
 163.645 -static inline void unblock_long_cons_a (struct sedf_vcpu_info* inf,
 163.646 -                                        s_time_t now) {
 163.647 +
 163.648 +#if 0
 163.649 +static void unblock_long_cons_a (struct sedf_vcpu_info* inf, s_time_t now)
 163.650 +{
 163.651      /*treat the time the domain was blocked in the
 163.652 -   CURRENT period as consumed by the domain*/
 163.653 +     CURRENT period as consumed by the domain*/
 163.654      inf->cputime = (now - inf->deadl_abs) % inf->period; 
 163.655 -    if (inf->cputime + EXTRA_QUANTUM > inf->slice) {
 163.656 +    if ( (inf->cputime + EXTRA_QUANTUM) > inf->slice )
 163.657 +    {
 163.658          /*we don't have a reasonable amount of time in our slice
 163.659            left :( => start in next period!*/
 163.660          unblock_long_vcons(inf, now);
 163.661      }
 163.662  }
 163.663 -static inline void unblock_long_cons_b(struct sedf_vcpu_info* inf,s_time_t now) {
 163.664 +#endif
 163.665 +
 163.666 +
 163.667 +static void unblock_long_cons_b(struct sedf_vcpu_info* inf,s_time_t now)
 163.668 +{
 163.669      /*Conservative 2b*/
 163.670      /*Treat the unblocking time as a start of a new period */
 163.671      inf->deadl_abs = now + inf->period;
 163.672      inf->cputime = 0;
 163.673  }
 163.674 -static inline void unblock_long_cons_c(struct sedf_vcpu_info* inf,s_time_t now) {
 163.675 -    if (likely(inf->latency)) {
 163.676 +
 163.677 +
 163.678 +#if (UNBLOCK == UNBLOCK_ATROPOS)
 163.679 +static void unblock_long_cons_c(struct sedf_vcpu_info* inf,s_time_t now)
 163.680 +{
 163.681 +    if ( likely(inf->latency) )
 163.682 +    {
 163.683          /*scale the slice and period accordingly to the latency hint*/
 163.684          /*reduce period temporarily to the latency hint*/
 163.685          inf->period = inf->latency;
 163.686 @@ -993,18 +1100,24 @@ static inline void unblock_long_cons_c(s
 163.687          inf->deadl_abs = now + inf->period;
 163.688          inf->cputime = 0;
 163.689      } 
 163.690 -    else {
 163.691 +    else
 163.692 +    {
 163.693          /*we don't have a latency hint.. use some other technique*/
 163.694          unblock_long_cons_b(inf, now);
 163.695      }
 163.696  }
 163.697 +#endif
 163.698 +
 163.699 +
 163.700 +#if (UNBLOCK == UNBLOCK_BURST)
 163.701  /*a new idea of dealing with short blocks: burst period scaling*/
 163.702 -static inline void unblock_short_burst(struct sedf_vcpu_info* inf, s_time_t now)
 163.703 +static void unblock_short_burst(struct sedf_vcpu_info* inf, s_time_t now)
 163.704  {
 163.705      /*treat blocked time as consumed by the domain*/
 163.706      inf->cputime += now - inf->block_abs;
 163.707   
 163.708 -    if (inf->cputime + EXTRA_QUANTUM <= inf->slice) {
 163.709 +    if ( (inf->cputime + EXTRA_QUANTUM) <= inf->slice )
 163.710 +    {
 163.711          /*if we can still use some time in the current slice
 163.712            then use it!*/
 163.713  #ifdef SEDF_STATS
 163.714 @@ -1012,10 +1125,12 @@ static inline void unblock_short_burst(s
 163.715          inf->short_cont++;
 163.716  #endif
 163.717      }
 163.718 -    else {
 163.719 +    else
 163.720 +    {
 163.721          /*we don't have a reasonable amount of time in
 163.722            our slice left => switch to burst mode*/
 163.723 -        if (likely(inf->unblock_abs)) {
 163.724 +        if ( likely(inf->unblock_abs) )
 163.725 +        {
 163.726              /*set the period-length to the current blocking
 163.727                interval, possible enhancements: average over last
 163.728                blocking intervals, user-specified minimum,...*/
 163.729 @@ -1030,17 +1145,23 @@ static inline void unblock_short_burst(s
 163.730              /*set new (shorter) deadline*/
 163.731              inf->deadl_abs += inf->period;
 163.732          }
 163.733 -        else {
 163.734 +        else
 163.735 +        {
 163.736              /*in case we haven't unblocked before
 163.737                start in next period!*/
 163.738              inf->cputime=0;
 163.739              inf->deadl_abs += inf->period;
 163.740          }
 163.741      }
 163.742 +
 163.743      inf->unblock_abs = now;
 163.744  }
 163.745 -static inline void unblock_long_burst(struct sedf_vcpu_info* inf, s_time_t now) {
 163.746 -    if (unlikely(inf->latency && (inf->period > inf->latency))) {
 163.747 +
 163.748 +
 163.749 +static void unblock_long_burst(struct sedf_vcpu_info* inf, s_time_t now)
 163.750 +{
 163.751 +    if ( unlikely(inf->latency && (inf->period > inf->latency)) )
 163.752 +    {
 163.753          /*scale the slice and period accordingly to the latency hint*/
 163.754          inf->period = inf->latency;
 163.755          /*check for overflows on multiplication*/
 163.756 @@ -1052,23 +1173,28 @@ static inline void unblock_long_burst(st
 163.757          inf->deadl_abs = now + inf->period;
 163.758          inf->cputime = 0;
 163.759      }
 163.760 -    else {
 163.761 +    else
 163.762 +    {
 163.763          /*we don't have a latency hint.. or we are currently in 
 163.764            "burst mode": use some other technique
 163.765            NB: this should be in fact the normal way of operation,
 163.766            when we are in sync with the device!*/
 163.767          unblock_long_cons_b(inf, now);
 163.768      }
 163.769 +
 163.770      inf->unblock_abs = now;
 163.771  }
 163.772 +#endif /* UNBLOCK == UNBLOCK_BURST */
 163.773 +
 163.774  
 163.775  #define DOMAIN_EDF   1
 163.776  #define DOMAIN_EXTRA_PEN  2
 163.777  #define DOMAIN_EXTRA_UTIL  3
 163.778  #define DOMAIN_IDLE   4
 163.779 -static inline int get_run_type(struct vcpu* d) {
 163.780 +static inline int get_run_type(struct vcpu* d)
 163.781 +{
 163.782      struct sedf_vcpu_info* inf = EDOM_INFO(d);
 163.783 -    if (is_idle_task(d->domain))
 163.784 +    if (is_idle_vcpu(d))
 163.785          return DOMAIN_IDLE;
 163.786      if (inf->status & EXTRA_RUN_PEN)
 163.787          return DOMAIN_EXTRA_PEN;
 163.788 @@ -1076,6 +1202,8 @@ static inline int get_run_type(struct vc
 163.789          return DOMAIN_EXTRA_UTIL;
 163.790      return DOMAIN_EDF;
 163.791  }
 163.792 +
 163.793 +
 163.794  /*Compares two domains in the relation of whether the one is allowed to
 163.795    interrupt the others execution.
 163.796    It returns true (!=0) if a switch to the other domain is good.
 163.797 @@ -1085,8 +1213,10 @@ static inline int get_run_type(struct vc
 163.798    In the same class priorities are assigned as following:
 163.799     EDF: early deadline > late deadline
 163.800     L0 extra-time: lower score > higher score*/
 163.801 -static inline int should_switch(struct vcpu* cur,
 163.802 -                                struct vcpu* other, s_time_t now) {
 163.803 +static inline int should_switch(struct vcpu *cur,
 163.804 +                                struct vcpu *other,
 163.805 +                                s_time_t now)
 163.806 +{
 163.807      struct sedf_vcpu_info *cur_inf, *other_inf;
 163.808      cur_inf   = EDOM_INFO(cur);
 163.809      other_inf = EDOM_INFO(other);
 163.810 @@ -1119,41 +1249,51 @@ static inline int should_switch(struct v
 163.811      }
 163.812      return 1;
 163.813  }
 163.814 -void sedf_wake(struct vcpu *d) {
 163.815 +
 163.816 +void sedf_wake(struct vcpu *d)
 163.817 +{
 163.818      s_time_t              now = NOW();
 163.819      struct sedf_vcpu_info* inf = EDOM_INFO(d);
 163.820  
 163.821      PRINT(3, "sedf_wake was called, domain-id %i.%i\n",d->domain->domain_id,
 163.822            d->vcpu_id);
 163.823  
 163.824 -    if (unlikely(is_idle_task(d->domain)))
 163.825 +    if ( unlikely(is_idle_vcpu(d)) )
 163.826          return;
 163.827     
 163.828 -    if ( unlikely(__task_on_queue(d)) ) {
 163.829 +    if ( unlikely(__task_on_queue(d)) )
 163.830 +    {
 163.831          PRINT(3,"\tdomain %i.%i is already in some queue\n",
 163.832                d->domain->domain_id, d->vcpu_id);
 163.833          return;
 163.834      }
 163.835 +
 163.836      ASSERT(!sedf_runnable(d));
 163.837      inf->status &= ~SEDF_ASLEEP;
 163.838      ASSERT(!extraq_on(d, EXTRA_UTIL_Q));
 163.839      ASSERT(!extraq_on(d, EXTRA_PEN_Q));
 163.840   
 163.841 -    if (unlikely(inf->deadl_abs == 0))
 163.842 +    if ( unlikely(inf->deadl_abs == 0) )
 163.843 +    {
 163.844          /*initial setup of the deadline*/
 163.845          inf->deadl_abs = now + inf->slice;
 163.846 +    }
 163.847    
 163.848 -    PRINT(3,"waking up domain %i.%i (deadl= %"PRIu64" period= %"PRIu64" "\
 163.849 -          "now= %"PRIu64")\n", d->domain->domain_id, d->vcpu_id, inf->deadl_abs,
 163.850 -          inf->period, now);
 163.851 +    PRINT(3, "waking up domain %i.%i (deadl= %"PRIu64" period= %"PRIu64
 163.852 +          "now= %"PRIu64")\n",
 163.853 +          d->domain->domain_id, d->vcpu_id, inf->deadl_abs, inf->period, now);
 163.854 +
 163.855  #ifdef SEDF_STATS 
 163.856      inf->block_tot++;
 163.857  #endif
 163.858 -    if (unlikely(now < PERIOD_BEGIN(inf))) {
 163.859 +
 163.860 +    if ( unlikely(now < PERIOD_BEGIN(inf)) )
 163.861 +    {
 163.862          PRINT(4,"extratime unblock\n");
 163.863          /* unblocking in extra-time! */
 163.864  #if (EXTRA == EXTRA_BLOCK_WEIGHT)
 163.865 -        if (inf->status & EXTRA_WANT_PEN_Q) {
 163.866 +        if ( inf->status & EXTRA_WANT_PEN_Q )
 163.867 +        {
 163.868              /*we have a domain that wants compensation
 163.869                for block penalty and did just block in
 163.870                its compensation time. Give it another
 163.871 @@ -1163,8 +1303,10 @@ void sedf_wake(struct vcpu *d) {
 163.872  #endif
 163.873          extraq_check_add_unblocked(d, 0);
 163.874      }  
 163.875 -    else {  
 163.876 -        if (now < inf->deadl_abs) {
 163.877 +    else
 163.878 +    {  
 163.879 +        if ( now < inf->deadl_abs )
 163.880 +        {
 163.881              PRINT(4,"short unblocking\n");
 163.882              /*short blocking*/
 163.883  #ifdef SEDF_STATS
 163.884 @@ -1182,7 +1324,8 @@ void sedf_wake(struct vcpu *d) {
 163.885  
 163.886              extraq_check_add_unblocked(d, 1);
 163.887          }
 163.888 -        else {
 163.889 +        else
 163.890 +        {
 163.891              PRINT(4,"long unblocking\n");
 163.892              /*long unblocking*/
 163.893  #ifdef SEDF_STATS
 163.894 @@ -1197,7 +1340,6 @@ void sedf_wake(struct vcpu *d) {
 163.895              unblock_long_cons_c(inf, now);
 163.896  #elif (UNBLOCK == UNBLOCK_SHORT_RESUME)
 163.897              unblock_long_cons_b(inf, now);
 163.898 -            /*unblock_short_cons_c(inf, now);*/
 163.899  #elif (UNBLOCK == UNBLOCK_BURST)
 163.900              unblock_long_burst(inf, now);
 163.901  #endif
 163.902 @@ -1205,26 +1347,33 @@ void sedf_wake(struct vcpu *d) {
 163.903              extraq_check_add_unblocked(d, 1);
 163.904          }
 163.905      }
 163.906 -    PRINT(3,"woke up domain %i.%i (deadl= %"PRIu64" period= %"PRIu64" "\
 163.907 -          "now= %"PRIu64")\n", d->domain->domain_id, d->vcpu_id, inf->deadl_abs,
 163.908 +
 163.909 +    PRINT(3, "woke up domain %i.%i (deadl= %"PRIu64" period= %"PRIu64
 163.910 +          "now= %"PRIu64")\n",
 163.911 +          d->domain->domain_id, d->vcpu_id, inf->deadl_abs,
 163.912            inf->period, now);
 163.913 -    if (PERIOD_BEGIN(inf) > now) {
 163.914 +
 163.915 +    if ( PERIOD_BEGIN(inf) > now )
 163.916 +    {
 163.917          __add_to_waitqueue_sort(d);
 163.918          PRINT(3,"added to waitq\n");
 163.919      }
 163.920 -    else {
 163.921 +    else
 163.922 +    {
 163.923          __add_to_runqueue_sort(d);
 163.924          PRINT(3,"added to runq\n");
 163.925      }
 163.926   
 163.927  #ifdef SEDF_STATS
 163.928      /*do some statistics here...*/
 163.929 -    if (inf->block_abs != 0) {
 163.930 +    if ( inf->block_abs != 0 )
 163.931 +    {
 163.932          inf->block_time_tot += now - inf->block_abs;
 163.933          inf->penalty_time_tot +=
 163.934              PERIOD_BEGIN(inf) + inf->cputime - inf->block_abs;
 163.935      }
 163.936  #endif
 163.937 +
 163.938      /*sanity check: make sure each extra-aware domain IS on the util-q!*/
 163.939      ASSERT(IMPLY(inf->status & EXTRA_AWARE, extraq_on(d, EXTRA_UTIL_Q)));
 163.940      ASSERT(__task_on_queue(d));
 163.941 @@ -1234,27 +1383,48 @@ void sedf_wake(struct vcpu *d) {
 163.942      ASSERT(d->processor >= 0);
 163.943      ASSERT(d->processor < NR_CPUS);
 163.944      ASSERT(schedule_data[d->processor].curr);
 163.945 -    if (should_switch(schedule_data[d->processor].curr, d, now))
 163.946 +
 163.947 +    if ( should_switch(schedule_data[d->processor].curr, d, now) )
 163.948          cpu_raise_softirq(d->processor, SCHEDULE_SOFTIRQ);
 163.949  }
 163.950  
 163.951 -/*Print a lot of use-{full, less} information about a domains in the system*/
 163.952 -static void sedf_dump_domain(struct vcpu *d) {
 163.953 +
 163.954 +static int sedf_set_affinity(struct vcpu *v, cpumask_t *affinity)
 163.955 +{
 163.956 +    if ( v == current )
 163.957 +        return cpu_isset(v->processor, *affinity) ? 0 : -EBUSY;
 163.958 +
 163.959 +    vcpu_pause(v);
 163.960 +    v->cpu_affinity = *affinity;
 163.961 +    v->processor = first_cpu(v->cpu_affinity);
 163.962 +    vcpu_unpause(v);
 163.963 +
 163.964 +    return 0;
 163.965 +}
 163.966 +
 163.967 +
 163.968 +/* Print a lot of useful information about a domains in the system */
 163.969 +static void sedf_dump_domain(struct vcpu *d)
 163.970 +{
 163.971      printk("%i.%i has=%c ", d->domain->domain_id, d->vcpu_id,
 163.972             test_bit(_VCPUF_running, &d->vcpu_flags) ? 'T':'F');
 163.973 -    printk("p=%"PRIu64" sl=%"PRIu64" ddl=%"PRIu64" w=%hu c=%"PRIu64" sc=%i xtr(%s)=%"PRIu64" ew=%hu",
 163.974 +    printk("p=%"PRIu64" sl=%"PRIu64" ddl=%"PRIu64" w=%hu c=%"PRIu64
 163.975 +           " sc=%i xtr(%s)=%"PRIu64" ew=%hu",
 163.976             EDOM_INFO(d)->period, EDOM_INFO(d)->slice, EDOM_INFO(d)->deadl_abs,
 163.977 -           EDOM_INFO(d)->weight, d->cpu_time, EDOM_INFO(d)->score[EXTRA_UTIL_Q],
 163.978 +           EDOM_INFO(d)->weight, d->cpu_time,
 163.979 +           EDOM_INFO(d)->score[EXTRA_UTIL_Q],
 163.980             (EDOM_INFO(d)->status & EXTRA_AWARE) ? "yes" : "no",
 163.981             EDOM_INFO(d)->extra_time_tot, EDOM_INFO(d)->extraweight);
 163.982 -    if (d->cpu_time !=0)
 163.983 +    
 163.984 +    if ( d->cpu_time != 0 )
 163.985          printf(" (%"PRIu64"%%)", (EDOM_INFO(d)->extra_time_tot * 100)
 163.986                 / d->cpu_time);
 163.987 +
 163.988  #ifdef SEDF_STATS
 163.989 -    if (EDOM_INFO(d)->block_time_tot!=0)
 163.990 +    if ( EDOM_INFO(d)->block_time_tot != 0 )
 163.991          printf(" pen=%"PRIu64"%%", (EDOM_INFO(d)->penalty_time_tot * 100) /
 163.992                 EDOM_INFO(d)->block_time_tot);
 163.993 -    if (EDOM_INFO(d)->block_tot!=0)
 163.994 +    if ( EDOM_INFO(d)->block_tot != 0 )
 163.995          printf("\n   blks=%u sh=%u (%u%%) (shc=%u (%u%%) shex=%i "\
 163.996                 "shexsl=%i) l=%u (%u%%) avg: b=%"PRIu64" p=%"PRIu64"",
 163.997                 EDOM_INFO(d)->block_tot, EDOM_INFO(d)->short_block_tot,
 163.998 @@ -1271,7 +1441,8 @@ static void sedf_dump_domain(struct vcpu
 163.999      printf("\n");
163.1000  }
163.1001  
163.1002 -/*dumps all domains on hte s