ia64/xen-unstable

changeset 10324:9d86c1a70f34

Merged.
author emellor@leeni.uk.xensource.com
date Wed Jun 07 11:03:51 2006 +0100 (2006-06-07)
parents b09dbe439169 e5c17d2d85a4
children ec903b8e6612
files linux-2.6-xen-sparse/arch/ia64/xen/drivers/Makefile linux-2.6-xen-sparse/arch/ia64/xen/drivers/coreMakefile
line diff
     1.1 --- a/linux-2.6-xen-sparse/arch/i386/kernel/head-xen.S	Wed Jun 07 11:03:15 2006 +0100
     1.2 +++ b/linux-2.6-xen-sparse/arch/i386/kernel/head-xen.S	Wed Jun 07 11:03:51 2006 +0100
     1.3 @@ -173,7 +173,7 @@ ENTRY(cpu_gdt_table)
     1.4  	.ascii	         "|pae_pgdir_above_4gb"
     1.5  	.ascii	         "|supervisor_mode_kernel"
     1.6  #ifdef CONFIG_X86_PAE
     1.7 -	.ascii	",PAE=yes"
     1.8 +	.ascii	",PAE=yes[extended-cr3]"
     1.9  #else
    1.10  	.ascii	",PAE=no"
    1.11  #endif
     2.1 --- a/linux-2.6-xen-sparse/arch/i386/kernel/vm86.c	Wed Jun 07 11:03:15 2006 +0100
     2.2 +++ b/linux-2.6-xen-sparse/arch/i386/kernel/vm86.c	Wed Jun 07 11:03:51 2006 +0100
     2.3 @@ -132,7 +132,9 @@ struct pt_regs * fastcall save_v86_state
     2.4  	current->thread.sysenter_cs = __KERNEL_CS;
     2.5  	load_esp0(tss, &current->thread);
     2.6  	current->thread.saved_esp0 = 0;
     2.7 +#ifndef CONFIG_X86_NO_TSS
     2.8  	put_cpu();
     2.9 +#endif
    2.10  
    2.11  	loadsegment(fs, current->thread.saved_fs);
    2.12  	loadsegment(gs, current->thread.saved_gs);
    2.13 @@ -310,7 +312,9 @@ static void do_sys_vm86(struct kernel_vm
    2.14  	if (cpu_has_sep)
    2.15  		tsk->thread.sysenter_cs = 0;
    2.16  	load_esp0(tss, &tsk->thread);
    2.17 +#ifndef CONFIG_X86_NO_TSS
    2.18  	put_cpu();
    2.19 +#endif
    2.20  
    2.21  	tsk->thread.screen_bitmap = info->screen_bitmap;
    2.22  	if (info->flags & VM86_SCREEN_BITMAP)
     3.1 --- a/linux-2.6-xen-sparse/arch/i386/mm/init-xen.c	Wed Jun 07 11:03:15 2006 +0100
     3.2 +++ b/linux-2.6-xen-sparse/arch/i386/mm/init-xen.c	Wed Jun 07 11:03:51 2006 +0100
     3.3 @@ -558,15 +558,11 @@ void __init paging_init(void)
     3.4  
     3.5  	kmap_init();
     3.6  
     3.7 -	if (!xen_feature(XENFEAT_auto_translated_physmap) ||
     3.8 -	    xen_start_info->shared_info >= xen_start_info->nr_pages) {
     3.9 -		/* Switch to the real shared_info page, and clear the
    3.10 -		 * dummy page. */
    3.11 -		set_fixmap(FIX_SHARED_INFO, xen_start_info->shared_info);
    3.12 -		HYPERVISOR_shared_info =
    3.13 -			(shared_info_t *)fix_to_virt(FIX_SHARED_INFO);
    3.14 -		memset(empty_zero_page, 0, sizeof(empty_zero_page));
    3.15 -	}
    3.16 +	/* Switch to the real shared_info page, and clear the
    3.17 +	 * dummy page. */
    3.18 +	set_fixmap(FIX_SHARED_INFO, xen_start_info->shared_info);
    3.19 +	HYPERVISOR_shared_info = (shared_info_t *)fix_to_virt(FIX_SHARED_INFO);
    3.20 +	memset(empty_zero_page, 0, sizeof(empty_zero_page));
    3.21  
    3.22  	/* Setup mapping of lower 1st MB */
    3.23  	for (i = 0; i < NR_FIX_ISAMAPS; i++)
     4.1 --- a/linux-2.6-xen-sparse/arch/ia64/Kconfig	Wed Jun 07 11:03:15 2006 +0100
     4.2 +++ b/linux-2.6-xen-sparse/arch/ia64/Kconfig	Wed Jun 07 11:03:51 2006 +0100
     4.3 @@ -73,7 +73,7 @@ config XEN_IA64_DOM0_VP
     4.4  
     4.5  config XEN_IA64_DOM0_NON_VP
     4.6  	bool
     4.7 -	depends on !(XEN && XEN_IA64_DOM0_VP)
     4.8 +	depends on XEN && !XEN_IA64_DOM0_VP
     4.9  	default y
    4.10  	help
    4.11  	  dom0 P=M model
    4.12 @@ -496,15 +496,39 @@ source "security/Kconfig"
    4.13  
    4.14  source "crypto/Kconfig"
    4.15  
    4.16 +#
    4.17  # override default values of drivers/xen/Kconfig
    4.18 -if !XEN_IA64_DOM0_VP
    4.19 +#
    4.20 +if XEN
    4.21 +config XEN_UTIL
    4.22 +	default n if XEN_IA64_DOM0_VP
    4.23 +
    4.24  config HAVE_ARCH_ALLOC_SKB
    4.25 -        bool
    4.26 -        default n
    4.27 +	default n if !XEN_IA64_DOM0_VP
    4.28  
    4.29  config HAVE_ARCH_DEV_ALLOC_SKB
    4.30 -        bool
    4.31 -        default n
    4.32 +	default n if !XEN_IA64_DOM0_VP
    4.33 +
    4.34 +config XEN_BALLOON
    4.35 +	default n if !XEN_IA64_DOM0_VP
    4.36 +
    4.37 +config XEN_SKBUFF
    4.38 +	default n if !XEN_IA64_DOM0_VP
    4.39 +
    4.40 +config XEN_NETDEV_BACKEND
    4.41 +	default n if !XEN_IA64_DOM0_VP
    4.42 +
    4.43 +config XEN_NETDEV_FRONTEND
    4.44 +	default n if !XEN_IA64_DOM0_VP
    4.45 +
    4.46 +config XEN_DEVMEM
    4.47 +	default n
    4.48 +
    4.49 +config XEN_REBOOT
    4.50 +	default n
    4.51 +
    4.52 +config XEN_SMPBOOT
    4.53 +	default n
    4.54  endif
    4.55  
    4.56  source "drivers/xen/Kconfig"
     5.1 --- a/linux-2.6-xen-sparse/arch/ia64/xen-mkbuildtree-pre	Wed Jun 07 11:03:15 2006 +0100
     5.2 +++ b/linux-2.6-xen-sparse/arch/ia64/xen-mkbuildtree-pre	Wed Jun 07 11:03:51 2006 +0100
     5.3 @@ -10,12 +10,6 @@
     5.4  #eventually asm-xsi-offsets needs to be part of hypervisor.h/hypercall.h
     5.5  ln -sf ../../../../xen/include/asm-ia64/asm-xsi-offsets.h include/asm-ia64/xen/
     5.6  
     5.7 -#ia64 drivers/xen isn't fully functional yet, workaround...
     5.8 -#also ignore core/evtchn.c which uses a different irq mechanism than ia64
     5.9 -#(warning: there be dragons here if these files diverge)
    5.10 -ln -sf ../../arch/ia64/xen/drivers/Makefile drivers/xen/Makefile
    5.11 -ln -sf ../../../arch/ia64/xen/drivers/coreMakefile drivers/xen/core/Makefile
    5.12 -
    5.13  #not sure where these ia64-specific files will end up in the future
    5.14  ln -sf ../../../arch/ia64/xen/drivers/xenia64_init.c drivers/xen/core
    5.15  
     6.1 --- a/linux-2.6-xen-sparse/arch/ia64/xen/drivers/Makefile	Wed Jun 07 11:03:15 2006 +0100
     6.2 +++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
     6.3 @@ -1,22 +0,0 @@
     6.4 -
     6.5 -ifneq ($(CONFIG_XEN_IA64_DOM0_VP),y)
     6.6 -obj-y   += util.o
     6.7 -endif
     6.8 -
     6.9 -obj-y	+= core/
    6.10 -#obj-y	+= char/
    6.11 -obj-y	+= console/
    6.12 -obj-y	+= evtchn/
    6.13 -obj-$(CONFIG_XEN_IA64_DOM0_VP)	+= balloon/
    6.14 -obj-y	+= privcmd/
    6.15 -obj-y	+= xenbus/
    6.16 -
    6.17 -obj-$(CONFIG_XEN_BLKDEV_BACKEND)	+= blkback/
    6.18 -obj-$(CONFIG_XEN_NETDEV_BACKEND)	+= netback/
    6.19 -obj-$(CONFIG_XEN_TPMDEV_BACKEND)	+= tpmback/
    6.20 -obj-$(CONFIG_XEN_BLKDEV_FRONTEND)	+= blkfront/
    6.21 -obj-$(CONFIG_XEN_NETDEV_FRONTEND)	+= netfront/
    6.22 -obj-$(CONFIG_XEN_BLKDEV_TAP)		+= blktap/
    6.23 -obj-$(CONFIG_XEN_TPMDEV_FRONTEND)	+= tpmfront/
    6.24 -obj-$(CONFIG_XEN_PCIDEV_BACKEND)	+= pciback/
    6.25 -obj-$(CONFIG_XEN_PCIDEV_FRONTEND)	+= pcifront/
     7.1 --- a/linux-2.6-xen-sparse/arch/ia64/xen/drivers/coreMakefile	Wed Jun 07 11:03:15 2006 +0100
     7.2 +++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
     7.3 @@ -1,20 +0,0 @@
     7.4 -#
     7.5 -# Makefile for the linux kernel.
     7.6 -#
     7.7 -
     7.8 -obj-y   := gnttab.o features.o
     7.9 -obj-$(CONFIG_PROC_FS) += xen_proc.o
    7.10 -
    7.11 -ifeq ($(ARCH),ia64)
    7.12 -obj-y   += evtchn.o
    7.13 -obj-y   += xenia64_init.o
    7.14 -ifeq ($(CONFIG_XEN_IA64_DOM0_VP),y)
    7.15 -obj-$(CONFIG_NET)     += skbuff.o
    7.16 -endif
    7.17 -else
    7.18 -obj-y   += reboot.o evtchn.o fixup.o 
    7.19 -obj-$(CONFIG_SMP)     += smp.o		# setup_profiling_timer def'd in ia64
    7.20 -obj-$(CONFIG_NET)     += skbuff.o	# until networking is up on ia64
    7.21 -endif
    7.22 -obj-$(CONFIG_SYSFS)   += hypervisor_sysfs.o
    7.23 -obj-$(CONFIG_XEN_SYSFS) += xen_sysfs.o
     8.1 --- a/linux-2.6-xen-sparse/arch/x86_64/kernel/setup-xen.c	Wed Jun 07 11:03:15 2006 +0100
     8.2 +++ b/linux-2.6-xen-sparse/arch/x86_64/kernel/setup-xen.c	Wed Jun 07 11:03:51 2006 +0100
     8.3 @@ -665,13 +665,6 @@ void __init setup_arch(char **cmdline_p)
     8.4  
     8.5  	setup_xen_features();
     8.6  
     8.7 -	if (xen_feature(XENFEAT_auto_translated_physmap) &&
     8.8 -	    xen_start_info->shared_info < xen_start_info->nr_pages) {
     8.9 -		HYPERVISOR_shared_info =
    8.10 -			(shared_info_t *)__va(xen_start_info->shared_info);
    8.11 -		memset(empty_zero_page, 0, sizeof(empty_zero_page));
    8.12 -	}
    8.13 -
    8.14  	HYPERVISOR_vm_assist(VMASST_CMD_enable,
    8.15  			     VMASST_TYPE_writable_pagetables);
    8.16  
     9.1 --- a/linux-2.6-xen-sparse/arch/x86_64/kernel/smp-xen.c	Wed Jun 07 11:03:15 2006 +0100
     9.2 +++ b/linux-2.6-xen-sparse/arch/x86_64/kernel/smp-xen.c	Wed Jun 07 11:03:51 2006 +0100
     9.3 @@ -488,7 +488,7 @@ static void smp_really_stop_cpu(void *du
     9.4  {
     9.5  	smp_stop_cpu(); 
     9.6  	for (;;) 
     9.7 -		asm("hlt"); 
     9.8 +		halt();
     9.9  } 
    9.10  
    9.11  void smp_send_stop(void)
    10.1 --- a/linux-2.6-xen-sparse/arch/x86_64/mm/init-xen.c	Wed Jun 07 11:03:15 2006 +0100
    10.2 +++ b/linux-2.6-xen-sparse/arch/x86_64/mm/init-xen.c	Wed Jun 07 11:03:51 2006 +0100
    10.3 @@ -666,7 +666,18 @@ void __meminit init_memory_mapping(unsig
    10.4  			set_pgd(pgd_offset_k(start), mk_kernel_pgd(pud_phys));
    10.5  	}
    10.6  
    10.7 -	BUG_ON(!after_bootmem && start_pfn != table_end);
    10.8 +	if (!after_bootmem) {
    10.9 +		BUG_ON(start_pfn != table_end);
   10.10 +		/*
   10.11 +		 * Destroy the temporary mappings created above. Prevents
   10.12 +		 * overlap with modules area (if init mapping is very big).
   10.13 +		 */
   10.14 +		start = __START_KERNEL_map + (table_start << PAGE_SHIFT);
   10.15 +		end   = __START_KERNEL_map + (table_end   << PAGE_SHIFT);
   10.16 +		for (; start < end; start += PAGE_SIZE)
   10.17 +			WARN_ON(HYPERVISOR_update_va_mapping(
   10.18 +				start, __pte_ma(0), 0));
   10.19 +	}
   10.20  
   10.21  	__flush_tlb_all();
   10.22  }
   10.23 @@ -752,15 +763,11 @@ void __init paging_init(void)
   10.24  	free_area_init_node(0, NODE_DATA(0), zones,
   10.25  			    __pa(PAGE_OFFSET) >> PAGE_SHIFT, holes);
   10.26  
   10.27 -	if (!xen_feature(XENFEAT_auto_translated_physmap) ||
   10.28 -	    xen_start_info->shared_info >= xen_start_info->nr_pages) {
   10.29 -		/* Switch to the real shared_info page, and clear the
   10.30 -		 * dummy page. */
   10.31 -		set_fixmap(FIX_SHARED_INFO, xen_start_info->shared_info);
   10.32 -		HYPERVISOR_shared_info =
   10.33 -			(shared_info_t *)fix_to_virt(FIX_SHARED_INFO);
   10.34 -		memset(empty_zero_page, 0, sizeof(empty_zero_page));
   10.35 -	}
   10.36 +	/* Switch to the real shared_info page, and clear the
   10.37 +	 * dummy page. */
   10.38 +	set_fixmap(FIX_SHARED_INFO, xen_start_info->shared_info);
   10.39 +	HYPERVISOR_shared_info = (shared_info_t *)fix_to_virt(FIX_SHARED_INFO);
   10.40 +	memset(empty_zero_page, 0, sizeof(empty_zero_page));
   10.41  
   10.42  	init_mm.context.pinned = 1;
   10.43  
    11.1 --- a/linux-2.6-xen-sparse/drivers/xen/Kconfig	Wed Jun 07 11:03:15 2006 +0100
    11.2 +++ b/linux-2.6-xen-sparse/drivers/xen/Kconfig	Wed Jun 07 11:03:51 2006 +0100
    11.3 @@ -228,4 +228,30 @@ config NO_IDLE_HZ
    11.4  	bool
    11.5  	default y
    11.6  
    11.7 +config XEN_UTIL
    11.8 +	bool
    11.9 +	default y
   11.10 +
   11.11 +config XEN_BALLOON
   11.12 +	bool
   11.13 +	default y
   11.14 +
   11.15 +config XEN_DEVMEM
   11.16 +	bool
   11.17 +	default y
   11.18 +
   11.19 +config XEN_SKBUFF
   11.20 +	bool
   11.21 +	default y
   11.22 +	depends on NET
   11.23 +
   11.24 +config XEN_REBOOT
   11.25 +	bool
   11.26 +	default y
   11.27 +
   11.28 +config XEN_SMPBOOT
   11.29 +	bool
   11.30 +	default y
   11.31 +	depends on SMP
   11.32 +
   11.33  endif
    12.1 --- a/linux-2.6-xen-sparse/drivers/xen/Makefile	Wed Jun 07 11:03:15 2006 +0100
    12.2 +++ b/linux-2.6-xen-sparse/drivers/xen/Makefile	Wed Jun 07 11:03:51 2006 +0100
    12.3 @@ -1,14 +1,12 @@
    12.4 -
    12.5 -obj-y	+= util.o
    12.6 -
    12.7  obj-y	+= core/
    12.8 -obj-y	+= char/
    12.9  obj-y	+= console/
   12.10  obj-y	+= evtchn/
   12.11 -obj-y	+= balloon/
   12.12  obj-y	+= privcmd/
   12.13  obj-y	+= xenbus/
   12.14  
   12.15 +obj-$(CONFIG_XEN_UTIL)			+= util.o
   12.16 +obj-$(CONFIG_XEN_BALLOON)		+= balloon/
   12.17 +obj-$(CONFIG_XEN_DEVMEM)		+= char/
   12.18  obj-$(CONFIG_XEN_BLKDEV_BACKEND)	+= blkback/
   12.19  obj-$(CONFIG_XEN_NETDEV_BACKEND)	+= netback/
   12.20  obj-$(CONFIG_XEN_TPMDEV_BACKEND)	+= tpmback/
   12.21 @@ -17,4 +15,3 @@ obj-$(CONFIG_XEN_NETDEV_FRONTEND)	+= net
   12.22  obj-$(CONFIG_XEN_BLKDEV_TAP)    	+= blktap/
   12.23  obj-$(CONFIG_XEN_PCIDEV_BACKEND)	+= pciback/
   12.24  obj-$(CONFIG_XEN_PCIDEV_FRONTEND)	+= pcifront/
   12.25 -
    13.1 --- a/linux-2.6-xen-sparse/drivers/xen/blkfront/blkfront.c	Wed Jun 07 11:03:15 2006 +0100
    13.2 +++ b/linux-2.6-xen-sparse/drivers/xen/blkfront/blkfront.c	Wed Jun 07 11:03:51 2006 +0100
    13.3 @@ -452,10 +452,6 @@ int blkif_ioctl(struct inode *inode, str
    13.4  		      command, (long)argument, inode->i_rdev);
    13.5  
    13.6  	switch (command) {
    13.7 -	case HDIO_GETGEO:
    13.8 -		/* return ENOSYS to use defaults */
    13.9 -		return -ENOSYS;
   13.10 -
   13.11  	case CDROMMULTISESSION:
   13.12  		DPRINTK("FIXME: support multisession CDs later\n");
   13.13  		for (i = 0; i < sizeof(struct cdrom_multisession); i++)
   13.14 @@ -473,6 +469,23 @@ int blkif_ioctl(struct inode *inode, str
   13.15  }
   13.16  
   13.17  
   13.18 +int blkif_getgeo(struct block_device *bd, struct hd_geometry *hg)
   13.19 +{
   13.20 +	/* We don't have real geometry info, but let's at least return
   13.21 +	   values consistent with the size of the device */
   13.22 +	sector_t nsect = get_capacity(bd->bd_disk);
   13.23 +	sector_t cylinders = nsect;
   13.24 +
   13.25 +	hg->heads = 0xff;
   13.26 +	hg->sectors = 0x3f;
   13.27 +	sector_div(cylinders, hg->heads * hg->sectors);
   13.28 +	hg->cylinders = cylinders;
   13.29 +	if ((sector_t)(hg->cylinders + 1) * hg->heads * hg->sectors < nsect)
   13.30 +		hg->cylinders = 0xffff;
   13.31 +	return 0;
   13.32 +}
   13.33 +
   13.34 +
   13.35  /*
   13.36   * blkif_queue_request
   13.37   *
    14.1 --- a/linux-2.6-xen-sparse/drivers/xen/blkfront/block.h	Wed Jun 07 11:03:15 2006 +0100
    14.2 +++ b/linux-2.6-xen-sparse/drivers/xen/blkfront/block.h	Wed Jun 07 11:03:51 2006 +0100
    14.3 @@ -140,6 +140,7 @@ extern int blkif_open(struct inode *inod
    14.4  extern int blkif_release(struct inode *inode, struct file *filep);
    14.5  extern int blkif_ioctl(struct inode *inode, struct file *filep,
    14.6                         unsigned command, unsigned long argument);
    14.7 +extern int blkif_getgeo(struct block_device *, struct hd_geometry *);
    14.8  extern int blkif_check(dev_t dev);
    14.9  extern int blkif_revalidate(dev_t dev);
   14.10  extern void do_blkif_request (request_queue_t *rq);
    15.1 --- a/linux-2.6-xen-sparse/drivers/xen/blkfront/vbd.c	Wed Jun 07 11:03:15 2006 +0100
    15.2 +++ b/linux-2.6-xen-sparse/drivers/xen/blkfront/vbd.c	Wed Jun 07 11:03:51 2006 +0100
    15.3 @@ -91,6 +91,7 @@ static struct block_device_operations xl
    15.4  	.open = blkif_open,
    15.5  	.release = blkif_release,
    15.6  	.ioctl  = blkif_ioctl,
    15.7 +	.getgeo = blkif_getgeo
    15.8  };
    15.9  
   15.10  DEFINE_SPINLOCK(blkif_io_lock);
    16.1 --- a/linux-2.6-xen-sparse/drivers/xen/core/Makefile	Wed Jun 07 11:03:15 2006 +0100
    16.2 +++ b/linux-2.6-xen-sparse/drivers/xen/core/Makefile	Wed Jun 07 11:03:51 2006 +0100
    16.3 @@ -2,11 +2,13 @@
    16.4  # Makefile for the linux kernel.
    16.5  #
    16.6  
    16.7 -obj-y   := evtchn.o reboot.o gnttab.o features.o
    16.8 +obj-y := evtchn.o gnttab.o features.o
    16.9  
   16.10 -obj-$(CONFIG_PROC_FS)     += xen_proc.o
   16.11 -obj-$(CONFIG_NET)         += skbuff.o
   16.12 -obj-$(CONFIG_SMP)         += smpboot.o
   16.13 -obj-$(CONFIG_HOTPLUG_CPU) += cpu_hotplug.o
   16.14 -obj-$(CONFIG_SYSFS)       += hypervisor_sysfs.o
   16.15 -obj-$(CONFIG_XEN_SYSFS)   += xen_sysfs.o
   16.16 +obj-$(CONFIG_PROC_FS)		+= xen_proc.o
   16.17 +obj-$(CONFIG_SYSFS)		+= hypervisor_sysfs.o
   16.18 +obj-$(CONFIG_HOTPLUG_CPU)	+= cpu_hotplug.o
   16.19 +obj-$(CONFIG_XEN_SYSFS)		+= xen_sysfs.o
   16.20 +obj-$(CONFIG_IA64)		+= xenia64_init.o
   16.21 +obj-$(CONFIG_XEN_SKBUFF)	+= skbuff.o
   16.22 +obj-$(CONFIG_XEN_REBOOT)	+= reboot.o
   16.23 +obj-$(CONFIG_XEN_SMPBOOT)	+= smpboot.o
    17.1 --- a/linux-2.6-xen-sparse/drivers/xen/core/smpboot.c	Wed Jun 07 11:03:15 2006 +0100
    17.2 +++ b/linux-2.6-xen-sparse/drivers/xen/core/smpboot.c	Wed Jun 07 11:03:51 2006 +0100
    17.3 @@ -89,9 +89,8 @@ void __init prefill_possible_map(void)
    17.4  
    17.5  	for (i = 0; i < NR_CPUS; i++) {
    17.6  		rc = HYPERVISOR_vcpu_op(VCPUOP_is_up, i, NULL);
    17.7 -		if (rc == -ENOENT)
    17.8 -			break;
    17.9 -		cpu_set(i, cpu_possible_map);
   17.10 +		if (rc >= 0)
   17.11 +			cpu_set(i, cpu_possible_map);
   17.12  	}
   17.13  }
   17.14  
   17.15 @@ -209,7 +208,7 @@ void cpu_initialize_context(unsigned int
   17.16  	ctxt.failsafe_callback_cs  = __KERNEL_CS;
   17.17  	ctxt.failsafe_callback_eip = (unsigned long)failsafe_callback;
   17.18  
   17.19 -	ctxt.ctrlreg[3] = virt_to_mfn(swapper_pg_dir) << PAGE_SHIFT;
   17.20 +	ctxt.ctrlreg[3] = xen_pfn_to_cr3(virt_to_mfn(swapper_pg_dir));
   17.21  #else /* __x86_64__ */
   17.22  	ctxt.user_regs.cs = __KERNEL_CS;
   17.23  	ctxt.user_regs.esp = idle->thread.rsp0 - sizeof(struct pt_regs);
   17.24 @@ -221,7 +220,7 @@ void cpu_initialize_context(unsigned int
   17.25  	ctxt.failsafe_callback_eip = (unsigned long)failsafe_callback;
   17.26  	ctxt.syscall_callback_eip  = (unsigned long)system_call;
   17.27  
   17.28 -	ctxt.ctrlreg[3] = virt_to_mfn(init_level4_pgt) << PAGE_SHIFT;
   17.29 +	ctxt.ctrlreg[3] = xen_pfn_to_cr3(virt_to_mfn(init_level4_pgt));
   17.30  
   17.31  	ctxt.gs_base_kernel = (unsigned long)(cpu_pda(cpu));
   17.32  #endif
    18.1 --- a/linux-2.6-xen-sparse/drivers/xen/netback/loopback.c	Wed Jun 07 11:03:15 2006 +0100
    18.2 +++ b/linux-2.6-xen-sparse/drivers/xen/netback/loopback.c	Wed Jun 07 11:03:51 2006 +0100
    18.3 @@ -146,11 +146,13 @@ static void loopback_construct(struct ne
    18.4  	dev->hard_start_xmit = loopback_start_xmit;
    18.5  	dev->get_stats       = loopback_get_stats;
    18.6  	dev->set_multicast_list = loopback_set_multicast_list;
    18.7 +	dev->change_mtu	     = NULL; /* allow arbitrary mtu */
    18.8  
    18.9  	dev->tx_queue_len    = 0;
   18.10  
   18.11  	dev->features        = (NETIF_F_HIGHDMA |
   18.12  				NETIF_F_LLTX |
   18.13 +				NETIF_F_SG |
   18.14  				NETIF_F_IP_CSUM);
   18.15  
   18.16  	SET_ETHTOOL_OPS(dev, &network_ethtool_ops);
    19.1 --- a/linux-2.6-xen-sparse/drivers/xen/netback/netback.c	Wed Jun 07 11:03:15 2006 +0100
    19.2 +++ b/linux-2.6-xen-sparse/drivers/xen/netback/netback.c	Wed Jun 07 11:03:51 2006 +0100
    19.3 @@ -458,6 +458,9 @@ inline static void net_tx_action_dealloc
    19.4  	dc = dealloc_cons;
    19.5  	dp = dealloc_prod;
    19.6  
    19.7 +	/* Ensure we see all indexes enqueued by netif_idx_release(). */
    19.8 +	smp_rmb();
    19.9 +
   19.10  	/*
   19.11  	 * Free up any grants we have finished using
   19.12  	 */
   19.13 @@ -487,6 +490,177 @@ inline static void net_tx_action_dealloc
   19.14  	}
   19.15  }
   19.16  
   19.17 +static void netbk_tx_err(netif_t *netif, RING_IDX end)
   19.18 +{
   19.19 +	RING_IDX cons = netif->tx.req_cons;
   19.20 +
   19.21 +	do {
   19.22 +		netif_tx_request_t *txp = RING_GET_REQUEST(&netif->tx, cons);
   19.23 +		make_tx_response(netif, txp->id, NETIF_RSP_ERROR);
   19.24 +	} while (++cons < end);
   19.25 +	netif->tx.req_cons = cons;
   19.26 +	netif_schedule_work(netif);
   19.27 +	netif_put(netif);
   19.28 +}
   19.29 +
   19.30 +static int netbk_count_requests(netif_t *netif, netif_tx_request_t *txp,
   19.31 +				int work_to_do)
   19.32 +{
   19.33 +	netif_tx_request_t *first = txp;
   19.34 +	RING_IDX cons = netif->tx.req_cons;
   19.35 +	int frags = 1;
   19.36 +
   19.37 +	while (txp->flags & NETTXF_more_data) {
   19.38 +		if (frags >= work_to_do) {
   19.39 +			DPRINTK("Need more frags\n");
   19.40 +			return -frags;
   19.41 +		}
   19.42 +
   19.43 +		txp = RING_GET_REQUEST(&netif->tx, cons + frags);
   19.44 +		if (txp->size > first->size) {
   19.45 +			DPRINTK("Frags galore\n");
   19.46 +			return -frags;
   19.47 +		}
   19.48 +
   19.49 +		first->size -= txp->size;
   19.50 +		frags++;
   19.51 +
   19.52 +		if (unlikely((txp->offset + txp->size) > PAGE_SIZE)) {
   19.53 +			DPRINTK("txp->offset: %x, size: %u\n",
   19.54 +				txp->offset, txp->size);
   19.55 +			return -frags;
   19.56 +		}
   19.57 +	}
   19.58 +
   19.59 +	return frags;
   19.60 +}
   19.61 +
   19.62 +static gnttab_map_grant_ref_t *netbk_get_requests(netif_t *netif,
   19.63 +						  struct sk_buff *skb,
   19.64 +						  gnttab_map_grant_ref_t *mop)
   19.65 +{
   19.66 +	struct skb_shared_info *shinfo = skb_shinfo(skb);
   19.67 +	skb_frag_t *frags = shinfo->frags;
   19.68 +	netif_tx_request_t *txp;
   19.69 +	unsigned long pending_idx = *((u16 *)skb->data);
   19.70 +	RING_IDX cons = netif->tx.req_cons + 1;
   19.71 +	int i, start;
   19.72 +
   19.73 +	/* Skip first skb fragment if it is on same page as header fragment. */
   19.74 +	start = ((unsigned long)shinfo->frags[0].page == pending_idx);
   19.75 +
   19.76 +	for (i = start; i < shinfo->nr_frags; i++) {
   19.77 +		txp = RING_GET_REQUEST(&netif->tx, cons++);
   19.78 +		pending_idx = pending_ring[MASK_PEND_IDX(pending_cons++)];
   19.79 +
   19.80 +		gnttab_set_map_op(mop++, MMAP_VADDR(pending_idx),
   19.81 +				  GNTMAP_host_map | GNTMAP_readonly,
   19.82 +				  txp->gref, netif->domid);
   19.83 +
   19.84 +		memcpy(&pending_tx_info[pending_idx].req, txp, sizeof(*txp));
   19.85 +		netif_get(netif);
   19.86 +		pending_tx_info[pending_idx].netif = netif;
   19.87 +		frags[i].page = (void *)pending_idx;
   19.88 +	}
   19.89 +
   19.90 +	return mop;
   19.91 +}
   19.92 +
   19.93 +static int netbk_tx_check_mop(struct sk_buff *skb,
   19.94 +			       gnttab_map_grant_ref_t **mopp)
   19.95 +{
   19.96 +	gnttab_map_grant_ref_t *mop = *mopp;
   19.97 +	int pending_idx = *((u16 *)skb->data);
   19.98 +	netif_t *netif = pending_tx_info[pending_idx].netif;
   19.99 +	netif_tx_request_t *txp;
  19.100 +	struct skb_shared_info *shinfo = skb_shinfo(skb);
  19.101 +	int nr_frags = shinfo->nr_frags;
  19.102 +	int i, err, start;
  19.103 +
  19.104 +	/* Check status of header. */
  19.105 +	err = mop->status;
  19.106 +	if (unlikely(err)) {
  19.107 +		txp = &pending_tx_info[pending_idx].req;
  19.108 +		make_tx_response(netif, txp->id, NETIF_RSP_ERROR);
  19.109 +		pending_ring[MASK_PEND_IDX(pending_prod++)] = pending_idx;
  19.110 +		netif_put(netif);
  19.111 +	} else {
  19.112 +		set_phys_to_machine(
  19.113 +			__pa(MMAP_VADDR(pending_idx)) >> PAGE_SHIFT,
  19.114 +			FOREIGN_FRAME(mop->dev_bus_addr >> PAGE_SHIFT));
  19.115 +		grant_tx_handle[pending_idx] = mop->handle;
  19.116 +	}
  19.117 +
  19.118 +	/* Skip first skb fragment if it is on same page as header fragment. */
  19.119 +	start = ((unsigned long)shinfo->frags[0].page == pending_idx);
  19.120 +
  19.121 +	for (i = start; i < nr_frags; i++) {
  19.122 +		int j, newerr;
  19.123 +
  19.124 +		pending_idx = (unsigned long)shinfo->frags[i].page;
  19.125 +
  19.126 +		/* Check error status: if okay then remember grant handle. */
  19.127 +		newerr = (++mop)->status;
  19.128 +		if (likely(!newerr)) {
  19.129 +			set_phys_to_machine(
  19.130 +				__pa(MMAP_VADDR(pending_idx))>>PAGE_SHIFT,
  19.131 +				FOREIGN_FRAME(mop->dev_bus_addr>>PAGE_SHIFT));
  19.132 +			grant_tx_handle[pending_idx] = mop->handle;
  19.133 +			/* Had a previous error? Invalidate this fragment. */
  19.134 +			if (unlikely(err))
  19.135 +				netif_idx_release(pending_idx);
  19.136 +			continue;
  19.137 +		}
  19.138 +
  19.139 +		/* Error on this fragment: respond to client with an error. */
  19.140 +		txp = &pending_tx_info[pending_idx].req;
  19.141 +		make_tx_response(netif, txp->id, NETIF_RSP_ERROR);
  19.142 +		pending_ring[MASK_PEND_IDX(pending_prod++)] = pending_idx;
  19.143 +		netif_put(netif);
  19.144 +
  19.145 +		/* Not the first error? Preceding frags already invalidated. */
  19.146 +		if (err)
  19.147 +			continue;
  19.148 +
  19.149 +		/* First error: invalidate header and preceding fragments. */
  19.150 +		pending_idx = *((u16 *)skb->data);
  19.151 +		netif_idx_release(pending_idx);
  19.152 +		for (j = start; j < i; j++) {
  19.153 +			pending_idx = (unsigned long)shinfo->frags[i].page;
  19.154 +			netif_idx_release(pending_idx);
  19.155 +		}
  19.156 +
  19.157 +		/* Remember the error: invalidate all subsequent fragments. */
  19.158 +		err = newerr;
  19.159 +	}
  19.160 +
  19.161 +	*mopp = mop + 1;
  19.162 +	return err;
  19.163 +}
  19.164 +
  19.165 +static void netbk_fill_frags(struct sk_buff *skb)
  19.166 +{
  19.167 +	struct skb_shared_info *shinfo = skb_shinfo(skb);
  19.168 +	int nr_frags = shinfo->nr_frags;
  19.169 +	int i;
  19.170 +
  19.171 +	for (i = 0; i < nr_frags; i++) {
  19.172 +		skb_frag_t *frag = shinfo->frags + i;
  19.173 +		netif_tx_request_t *txp;
  19.174 +		unsigned long pending_idx;
  19.175 +
  19.176 +		pending_idx = (unsigned long)frag->page;
  19.177 +		txp = &pending_tx_info[pending_idx].req;
  19.178 +		frag->page = virt_to_page(MMAP_VADDR(pending_idx));
  19.179 +		frag->size = txp->size;
  19.180 +		frag->page_offset = txp->offset;
  19.181 +
  19.182 +		skb->len += txp->size;
  19.183 +		skb->data_len += txp->size;
  19.184 +		skb->truesize += txp->size;
  19.185 +	}
  19.186 +}
  19.187 +
  19.188  /* Called after netfront has transmitted */
  19.189  static void net_tx_action(unsigned long unused)
  19.190  {
  19.191 @@ -504,7 +678,7 @@ static void net_tx_action(unsigned long 
  19.192  		net_tx_action_dealloc();
  19.193  
  19.194  	mop = tx_map_ops;
  19.195 -	while ((NR_PENDING_REQS < MAX_PENDING_REQS) &&
  19.196 +	while (((NR_PENDING_REQS + MAX_SKB_FRAGS) < MAX_PENDING_REQS) &&
  19.197  		!list_empty(&net_schedule_list)) {
  19.198  		/* Get a netif from the list with work to do. */
  19.199  		ent = net_schedule_list.next;
  19.200 @@ -552,38 +726,44 @@ static void net_tx_action(unsigned long 
  19.201  		}
  19.202  		netif->remaining_credit -= txreq.size;
  19.203  
  19.204 -		netif->tx.req_cons++;
  19.205 -
  19.206 -		netif_schedule_work(netif);
  19.207 +		ret = netbk_count_requests(netif, &txreq, work_to_do);
  19.208 +		if (unlikely(ret < 0)) {
  19.209 +			netbk_tx_err(netif, i - ret);
  19.210 +			continue;
  19.211 +		}
  19.212 +		i += ret;
  19.213  
  19.214 -		if (unlikely(txreq.size < ETH_HLEN) || 
  19.215 -		    unlikely(txreq.size > ETH_FRAME_LEN)) {
  19.216 +		if (unlikely(ret > MAX_SKB_FRAGS + 1)) {
  19.217 +			DPRINTK("Too many frags\n");
  19.218 +			netbk_tx_err(netif, i);
  19.219 +			continue;
  19.220 +		}
  19.221 +
  19.222 +		if (unlikely(txreq.size < ETH_HLEN)) {
  19.223  			DPRINTK("Bad packet size: %d\n", txreq.size);
  19.224 -			make_tx_response(netif, txreq.id, NETIF_RSP_ERROR);
  19.225 -			netif_put(netif);
  19.226 +			netbk_tx_err(netif, i);
  19.227  			continue; 
  19.228  		}
  19.229  
  19.230  		/* No crossing a page as the payload mustn't fragment. */
  19.231 -		if (unlikely((txreq.offset + txreq.size) >= PAGE_SIZE)) {
  19.232 +		if (unlikely((txreq.offset + txreq.size) > PAGE_SIZE)) {
  19.233  			DPRINTK("txreq.offset: %x, size: %u, end: %lu\n", 
  19.234  				txreq.offset, txreq.size, 
  19.235  				(txreq.offset &~PAGE_MASK) + txreq.size);
  19.236 -			make_tx_response(netif, txreq.id, NETIF_RSP_ERROR);
  19.237 -			netif_put(netif);
  19.238 +			netbk_tx_err(netif, i);
  19.239  			continue;
  19.240  		}
  19.241  
  19.242  		pending_idx = pending_ring[MASK_PEND_IDX(pending_cons)];
  19.243  
  19.244 -		data_len = (txreq.size > PKT_PROT_LEN) ?
  19.245 +		data_len = (txreq.size > PKT_PROT_LEN &&
  19.246 +			    ret < MAX_SKB_FRAGS + 1) ?
  19.247  			PKT_PROT_LEN : txreq.size;
  19.248  
  19.249  		skb = alloc_skb(data_len+16, GFP_ATOMIC);
  19.250  		if (unlikely(skb == NULL)) {
  19.251  			DPRINTK("Can't allocate a skb in start_xmit.\n");
  19.252 -			make_tx_response(netif, txreq.id, NETIF_RSP_ERROR);
  19.253 -			netif_put(netif);
  19.254 +			netbk_tx_err(netif, i);
  19.255  			break;
  19.256  		}
  19.257  
  19.258 @@ -600,10 +780,24 @@ static void net_tx_action(unsigned long 
  19.259  		pending_tx_info[pending_idx].netif = netif;
  19.260  		*((u16 *)skb->data) = pending_idx;
  19.261  
  19.262 +		__skb_put(skb, data_len);
  19.263 +
  19.264 +		skb_shinfo(skb)->nr_frags = ret - 1;
  19.265 +		if (data_len < txreq.size) {
  19.266 +			skb_shinfo(skb)->nr_frags++;
  19.267 +			skb_shinfo(skb)->frags[0].page =
  19.268 +				(void *)(unsigned long)pending_idx;
  19.269 +		}
  19.270 +
  19.271  		__skb_queue_tail(&tx_queue, skb);
  19.272  
  19.273  		pending_cons++;
  19.274  
  19.275 +		mop = netbk_get_requests(netif, skb, mop);
  19.276 +
  19.277 +		netif->tx.req_cons = i;
  19.278 +		netif_schedule_work(netif);
  19.279 +
  19.280  		if ((mop - tx_map_ops) >= ARRAY_SIZE(tx_map_ops))
  19.281  			break;
  19.282  	}
  19.283 @@ -617,75 +811,56 @@ static void net_tx_action(unsigned long 
  19.284  
  19.285  	mop = tx_map_ops;
  19.286  	while ((skb = __skb_dequeue(&tx_queue)) != NULL) {
  19.287 +		netif_tx_request_t *txp;
  19.288 +
  19.289  		pending_idx = *((u16 *)skb->data);
  19.290  		netif       = pending_tx_info[pending_idx].netif;
  19.291 -		memcpy(&txreq, &pending_tx_info[pending_idx].req,
  19.292 -		       sizeof(txreq));
  19.293 +		txp         = &pending_tx_info[pending_idx].req;
  19.294  
  19.295  		/* Check the remap error code. */
  19.296 -		if (unlikely(mop->status)) {
  19.297 +		if (unlikely(netbk_tx_check_mop(skb, &mop))) {
  19.298  			printk(KERN_ALERT "#### netback grant fails\n");
  19.299 -			make_tx_response(netif, txreq.id, NETIF_RSP_ERROR);
  19.300 -			netif_put(netif);
  19.301 +			skb_shinfo(skb)->nr_frags = 0;
  19.302  			kfree_skb(skb);
  19.303 -			mop++;
  19.304 -			pending_ring[MASK_PEND_IDX(pending_prod++)] =
  19.305 -				pending_idx;
  19.306  			continue;
  19.307  		}
  19.308 -		set_phys_to_machine(
  19.309 -			__pa(MMAP_VADDR(pending_idx)) >> PAGE_SHIFT,
  19.310 -			FOREIGN_FRAME(mop->dev_bus_addr >> PAGE_SHIFT));
  19.311 -		grant_tx_handle[pending_idx] = mop->handle;
  19.312  
  19.313 -		data_len = (txreq.size > PKT_PROT_LEN) ?
  19.314 -			PKT_PROT_LEN : txreq.size;
  19.315 -
  19.316 -		__skb_put(skb, data_len);
  19.317 +		data_len = skb->len;
  19.318  		memcpy(skb->data, 
  19.319 -		       (void *)(MMAP_VADDR(pending_idx)|txreq.offset),
  19.320 +		       (void *)(MMAP_VADDR(pending_idx)|txp->offset),
  19.321  		       data_len);
  19.322 -		if (data_len < txreq.size) {
  19.323 +		if (data_len < txp->size) {
  19.324  			/* Append the packet payload as a fragment. */
  19.325 -			skb_shinfo(skb)->frags[0].page        = 
  19.326 -				virt_to_page(MMAP_VADDR(pending_idx));
  19.327 -			skb_shinfo(skb)->frags[0].size        =
  19.328 -				txreq.size - data_len;
  19.329 -			skb_shinfo(skb)->frags[0].page_offset = 
  19.330 -				txreq.offset + data_len;
  19.331 -			skb_shinfo(skb)->nr_frags = 1;
  19.332 +			txp->offset += data_len;
  19.333 +			txp->size -= data_len;
  19.334  		} else {
  19.335  			/* Schedule a response immediately. */
  19.336  			netif_idx_release(pending_idx);
  19.337  		}
  19.338  
  19.339 -		skb->data_len  = txreq.size - data_len;
  19.340 -		skb->len      += skb->data_len;
  19.341 -		skb->truesize += skb->data_len;
  19.342 -
  19.343 -		skb->dev      = netif->dev;
  19.344 -		skb->protocol = eth_type_trans(skb, skb->dev);
  19.345 -
  19.346  		/*
  19.347  		 * Old frontends do not assert data_validated but we
  19.348  		 * can infer it from csum_blank so test both flags.
  19.349  		 */
  19.350 -		if (txreq.flags & (NETTXF_data_validated|NETTXF_csum_blank)) {
  19.351 +		if (txp->flags & (NETTXF_data_validated|NETTXF_csum_blank)) {
  19.352  			skb->ip_summed = CHECKSUM_UNNECESSARY;
  19.353  			skb->proto_data_valid = 1;
  19.354  		} else {
  19.355  			skb->ip_summed = CHECKSUM_NONE;
  19.356  			skb->proto_data_valid = 0;
  19.357  		}
  19.358 -		skb->proto_csum_blank = !!(txreq.flags & NETTXF_csum_blank);
  19.359 +		skb->proto_csum_blank = !!(txp->flags & NETTXF_csum_blank);
  19.360 +
  19.361 +		netbk_fill_frags(skb);
  19.362  
  19.363 -		netif->stats.rx_bytes += txreq.size;
  19.364 +		skb->dev      = netif->dev;
  19.365 +		skb->protocol = eth_type_trans(skb, skb->dev);
  19.366 +
  19.367 +		netif->stats.rx_bytes += skb->len;
  19.368  		netif->stats.rx_packets++;
  19.369  
  19.370  		netif_rx(skb);
  19.371  		netif->dev->last_rx = jiffies;
  19.372 -
  19.373 -		mop++;
  19.374  	}
  19.375  }
  19.376  
  19.377 @@ -695,7 +870,10 @@ static void netif_idx_release(u16 pendin
  19.378  	unsigned long flags;
  19.379  
  19.380  	spin_lock_irqsave(&_lock, flags);
  19.381 -	dealloc_ring[MASK_PEND_IDX(dealloc_prod++)] = pending_idx;
  19.382 +	dealloc_ring[MASK_PEND_IDX(dealloc_prod)] = pending_idx;
  19.383 +	/* Sync with net_tx_action_dealloc: insert idx /then/ incr producer. */
  19.384 +	smp_wmb();
  19.385 +	dealloc_prod++;
  19.386  	spin_unlock_irqrestore(&_lock, flags);
  19.387  
  19.388  	tasklet_schedule(&net_tx_tasklet);
    20.1 --- a/linux-2.6-xen-sparse/drivers/xen/netback/xenbus.c	Wed Jun 07 11:03:15 2006 +0100
    20.2 +++ b/linux-2.6-xen-sparse/drivers/xen/netback/xenbus.c	Wed Jun 07 11:03:51 2006 +0100
    20.3 @@ -69,6 +69,8 @@ static int netback_remove(struct xenbus_
    20.4  static int netback_probe(struct xenbus_device *dev,
    20.5  			 const struct xenbus_device_id *id)
    20.6  {
    20.7 +	const char *message;
    20.8 +	xenbus_transaction_t xbt;
    20.9  	int err;
   20.10  	struct backend_info *be = kzalloc(sizeof(struct backend_info),
   20.11  					  GFP_KERNEL);
   20.12 @@ -86,6 +88,27 @@ static int netback_probe(struct xenbus_d
   20.13  	if (err)
   20.14  		goto fail;
   20.15  
   20.16 +	do {
   20.17 +		err = xenbus_transaction_start(&xbt);
   20.18 +		if (err) {
   20.19 +			xenbus_dev_fatal(dev, err, "starting transaction");
   20.20 +			goto fail;
   20.21 +		}
   20.22 +
   20.23 +		err = xenbus_printf(xbt, dev->nodename, "feature-sg", "%d", 1);
   20.24 +		if (err) {
   20.25 +			message = "writing feature-sg";
   20.26 +			goto abort_transaction;
   20.27 +		}
   20.28 +
   20.29 +		err = xenbus_transaction_end(xbt, 0);
   20.30 +	} while (err == -EAGAIN);
   20.31 +
   20.32 +	if (err) {
   20.33 +		xenbus_dev_fatal(dev, err, "completing transaction");
   20.34 +		goto fail;
   20.35 +	}
   20.36 +
   20.37  	err = xenbus_switch_state(dev, XenbusStateInitWait);
   20.38  	if (err) {
   20.39  		goto fail;
   20.40 @@ -93,6 +116,9 @@ static int netback_probe(struct xenbus_d
   20.41  
   20.42  	return 0;
   20.43  
   20.44 +abort_transaction:
   20.45 +	xenbus_transaction_end(xbt, 1);
   20.46 +	xenbus_dev_fatal(dev, err, "%s", message);
   20.47  fail:
   20.48  	DPRINTK("failed");
   20.49  	netback_remove(dev);
    21.1 --- a/linux-2.6-xen-sparse/drivers/xen/netfront/netfront.c	Wed Jun 07 11:03:15 2006 +0100
    21.2 +++ b/linux-2.6-xen-sparse/drivers/xen/netfront/netfront.c	Wed Jun 07 11:03:51 2006 +0100
    21.3 @@ -45,6 +45,7 @@
    21.4  #include <linux/bitops.h>
    21.5  #include <linux/ethtool.h>
    21.6  #include <linux/in.h>
    21.7 +#include <linux/if_ether.h>
    21.8  #include <net/sock.h>
    21.9  #include <net/pkt_sched.h>
   21.10  #include <net/arp.h>
   21.11 @@ -173,6 +174,11 @@ static void xennet_sysfs_delif(struct ne
   21.12  #define xennet_sysfs_delif(dev) do { } while(0)
   21.13  #endif
   21.14  
   21.15 +static inline int xennet_can_sg(struct net_device *dev)
   21.16 +{
   21.17 +	return dev->features & NETIF_F_SG;
   21.18 +}
   21.19 +
   21.20  /**
   21.21   * Entry point to this code when a new device is created.  Allocate the basic
   21.22   * structures and the ring buffers for communication with the backend, and
   21.23 @@ -307,8 +313,6 @@ again:
   21.24  		goto destroy_ring;
   21.25  	}
   21.26  
   21.27 -	xenbus_switch_state(dev, XenbusStateConnected);
   21.28 -
   21.29  	return 0;
   21.30  
   21.31   abort_transaction:
   21.32 @@ -370,12 +374,9 @@ static int setup_device(struct xenbus_de
   21.33  		goto fail;
   21.34  
   21.35  	memcpy(netdev->dev_addr, info->mac, ETH_ALEN);
   21.36 -	network_connect(netdev);
   21.37  	info->irq = bind_evtchn_to_irqhandler(
   21.38  		info->evtchn, netif_int, SA_SAMPLE_RANDOM, netdev->name,
   21.39  		netdev);
   21.40 -	(void)send_fake_arp(netdev);
   21.41 -	show_device(info);
   21.42  
   21.43  	return 0;
   21.44  
   21.45 @@ -391,17 +392,26 @@ static int setup_device(struct xenbus_de
   21.46  static void backend_changed(struct xenbus_device *dev,
   21.47  			    enum xenbus_state backend_state)
   21.48  {
   21.49 +	struct netfront_info *np = dev->data;
   21.50 +	struct net_device *netdev = np->netdev;
   21.51 +
   21.52  	DPRINTK("\n");
   21.53  
   21.54  	switch (backend_state) {
   21.55  	case XenbusStateInitialising:
   21.56 -	case XenbusStateInitWait:
   21.57  	case XenbusStateInitialised:
   21.58  	case XenbusStateConnected:
   21.59  	case XenbusStateUnknown:
   21.60  	case XenbusStateClosed:
   21.61  		break;
   21.62  
   21.63 +	case XenbusStateInitWait:
   21.64 +		network_connect(netdev);
   21.65 +		xenbus_switch_state(dev, XenbusStateConnected);
   21.66 +		(void)send_fake_arp(netdev);
   21.67 +		show_device(np);
   21.68 +		break;
   21.69 +
   21.70  	case XenbusStateClosing:
   21.71  		netfront_closing(dev);
   21.72  		break;
   21.73 @@ -452,13 +462,17 @@ static int network_open(struct net_devic
   21.74  	return 0;
   21.75  }
   21.76  
   21.77 +static inline int netfront_tx_slot_available(struct netfront_info *np)
   21.78 +{
   21.79 +	return RING_FREE_REQUESTS(&np->tx) >= MAX_SKB_FRAGS + 1;
   21.80 +}
   21.81 +
   21.82  static inline void network_maybe_wake_tx(struct net_device *dev)
   21.83  {
   21.84  	struct netfront_info *np = netdev_priv(dev);
   21.85  
   21.86  	if (unlikely(netif_queue_stopped(dev)) &&
   21.87 -	    !RING_FULL(&np->tx) &&
   21.88 -	    !gnttab_empty_grant_references(&np->gref_tx_head) &&
   21.89 +	    netfront_tx_slot_available(np) &&
   21.90  	    likely(netif_running(dev)))
   21.91  		netif_wake_queue(dev);
   21.92  }
   21.93 @@ -485,7 +499,7 @@ static void network_tx_buf_gc(struct net
   21.94  				printk(KERN_ALERT "network_tx_buf_gc: warning "
   21.95  				       "-- grant still in use by backend "
   21.96  				       "domain.\n");
   21.97 -				break; /* bail immediately */
   21.98 +				BUG();
   21.99  			}
  21.100  			gnttab_end_foreign_access_ref(
  21.101  				np->grant_tx_ref[id], GNTMAP_readonly);
  21.102 @@ -638,36 +652,95 @@ static void network_alloc_rx_buffers(str
  21.103  	RING_PUSH_REQUESTS(&np->rx);
  21.104  }
  21.105  
  21.106 +static void xennet_make_frags(struct sk_buff *skb, struct net_device *dev,
  21.107 +			      struct netif_tx_request *tx)
  21.108 +{
  21.109 +	struct netfront_info *np = netdev_priv(dev);
  21.110 +	char *data = skb->data;
  21.111 +	unsigned long mfn;
  21.112 +	RING_IDX prod = np->tx.req_prod_pvt;
  21.113 +	int frags = skb_shinfo(skb)->nr_frags;
  21.114 +	unsigned int offset = offset_in_page(data);
  21.115 +	unsigned int len = skb_headlen(skb);
  21.116 +	unsigned int id;
  21.117 +	grant_ref_t ref;
  21.118 +	int i;
  21.119 +
  21.120 +	while (len > PAGE_SIZE - offset) {
  21.121 +		tx->size = PAGE_SIZE - offset;
  21.122 +		tx->flags |= NETTXF_more_data;
  21.123 +		len -= tx->size;
  21.124 +		data += tx->size;
  21.125 +		offset = 0;
  21.126 +
  21.127 +		id = get_id_from_freelist(np->tx_skbs);
  21.128 +		np->tx_skbs[id] = skb_get(skb);
  21.129 +		tx = RING_GET_REQUEST(&np->tx, prod++);
  21.130 +		tx->id = id;
  21.131 +		ref = gnttab_claim_grant_reference(&np->gref_tx_head);
  21.132 +		BUG_ON((signed short)ref < 0);
  21.133 +
  21.134 +		mfn = virt_to_mfn(data);
  21.135 +		gnttab_grant_foreign_access_ref(ref, np->xbdev->otherend_id,
  21.136 +						mfn, GNTMAP_readonly);
  21.137 +
  21.138 +		tx->gref = np->grant_tx_ref[id] = ref;
  21.139 +		tx->offset = offset;
  21.140 +		tx->size = len;
  21.141 +		tx->flags = 0;
  21.142 +	}
  21.143 +
  21.144 +	for (i = 0; i < frags; i++) {
  21.145 +		skb_frag_t *frag = skb_shinfo(skb)->frags + i;
  21.146 +
  21.147 +		tx->flags |= NETTXF_more_data;
  21.148 +
  21.149 +		id = get_id_from_freelist(np->tx_skbs);
  21.150 +		np->tx_skbs[id] = skb_get(skb);
  21.151 +		tx = RING_GET_REQUEST(&np->tx, prod++);
  21.152 +		tx->id = id;
  21.153 +		ref = gnttab_claim_grant_reference(&np->gref_tx_head);
  21.154 +		BUG_ON((signed short)ref < 0);
  21.155 +
  21.156 +		mfn = pfn_to_mfn(page_to_pfn(frag->page));
  21.157 +		gnttab_grant_foreign_access_ref(ref, np->xbdev->otherend_id,
  21.158 +						mfn, GNTMAP_readonly);
  21.159 +
  21.160 +		tx->gref = np->grant_tx_ref[id] = ref;
  21.161 +		tx->offset = frag->page_offset;
  21.162 +		tx->size = frag->size;
  21.163 +		tx->flags = 0;
  21.164 +	}
  21.165 +
  21.166 +	np->tx.req_prod_pvt = prod;
  21.167 +}
  21.168  
  21.169  static int network_start_xmit(struct sk_buff *skb, struct net_device *dev)
  21.170  {
  21.171  	unsigned short id;
  21.172  	struct netfront_info *np = netdev_priv(dev);
  21.173  	struct netif_tx_request *tx;
  21.174 +	char *data = skb->data;
  21.175  	RING_IDX i;
  21.176  	grant_ref_t ref;
  21.177  	unsigned long mfn;
  21.178  	int notify;
  21.179 +	int frags = skb_shinfo(skb)->nr_frags;
  21.180 +	unsigned int offset = offset_in_page(data);
  21.181 +	unsigned int len = skb_headlen(skb);
  21.182  
  21.183 -	if (unlikely((((unsigned long)skb->data & ~PAGE_MASK) + skb->len) >=
  21.184 -		     PAGE_SIZE)) {
  21.185 -		struct sk_buff *nskb;
  21.186 -		nskb = __dev_alloc_skb(skb->len, GFP_ATOMIC|__GFP_NOWARN);
  21.187 -		if (unlikely(nskb == NULL))
  21.188 -			goto drop;
  21.189 -		skb_put(nskb, skb->len);
  21.190 -		memcpy(nskb->data, skb->data, skb->len);
  21.191 -		/* Copy only the header fields we use in this driver. */
  21.192 -		nskb->dev = skb->dev;
  21.193 -		nskb->ip_summed = skb->ip_summed;
  21.194 -		nskb->proto_data_valid = skb->proto_data_valid;
  21.195 -		dev_kfree_skb(skb);
  21.196 -		skb = nskb;
  21.197 +	frags += (offset + len + PAGE_SIZE - 1) / PAGE_SIZE;
  21.198 +	if (unlikely(frags > MAX_SKB_FRAGS + 1)) {
  21.199 +		printk(KERN_ALERT "xennet: skb rides the rocket: %d frags\n",
  21.200 +		       frags);
  21.201 +		dump_stack();
  21.202 +		goto drop;
  21.203  	}
  21.204  
  21.205  	spin_lock_irq(&np->tx_lock);
  21.206  
  21.207 -	if (unlikely(!netif_carrier_ok(dev))) {
  21.208 +	if (unlikely(!netif_carrier_ok(dev) ||
  21.209 +		     (frags > 1 && !xennet_can_sg(dev)))) {
  21.210  		spin_unlock_irq(&np->tx_lock);
  21.211  		goto drop;
  21.212  	}
  21.213 @@ -682,12 +755,12 @@ static int network_start_xmit(struct sk_
  21.214  	tx->id   = id;
  21.215  	ref = gnttab_claim_grant_reference(&np->gref_tx_head);
  21.216  	BUG_ON((signed short)ref < 0);
  21.217 -	mfn = virt_to_mfn(skb->data);
  21.218 +	mfn = virt_to_mfn(data);
  21.219  	gnttab_grant_foreign_access_ref(
  21.220  		ref, np->xbdev->otherend_id, mfn, GNTMAP_readonly);
  21.221  	tx->gref = np->grant_tx_ref[id] = ref;
  21.222 -	tx->offset = (unsigned long)skb->data & ~PAGE_MASK;
  21.223 -	tx->size = skb->len;
  21.224 +	tx->offset = offset;
  21.225 +	tx->size = len;
  21.226  
  21.227  	tx->flags = 0;
  21.228  	if (skb->ip_summed == CHECKSUM_HW) /* local packet? */
  21.229 @@ -696,14 +769,17 @@ static int network_start_xmit(struct sk_
  21.230  		tx->flags |= NETTXF_data_validated;
  21.231  
  21.232  	np->tx.req_prod_pvt = i + 1;
  21.233 +
  21.234 +	xennet_make_frags(skb, dev, tx);
  21.235 +	tx->size = skb->len;
  21.236 +
  21.237  	RING_PUSH_REQUESTS_AND_CHECK_NOTIFY(&np->tx, notify);
  21.238  	if (notify)
  21.239  		notify_remote_via_irq(np->irq);
  21.240  
  21.241  	network_tx_buf_gc(dev);
  21.242  
  21.243 -	if (RING_FULL(&np->tx) ||
  21.244 -	    gnttab_empty_grant_references(&np->gref_tx_head))
  21.245 +	if (!netfront_tx_slot_available(np))
  21.246  		netif_stop_queue(dev);
  21.247  
  21.248  	spin_unlock_irq(&np->tx_lock);
  21.249 @@ -963,6 +1039,38 @@ static struct net_device_stats *network_
  21.250  	return &np->stats;
  21.251  }
  21.252  
  21.253 +static int xennet_change_mtu(struct net_device *dev, int mtu)
  21.254 +{
  21.255 +	int max = xennet_can_sg(dev) ? 65535 - ETH_HLEN : ETH_DATA_LEN;
  21.256 +
  21.257 +	if (mtu > max)
  21.258 +		return -EINVAL;
  21.259 +	dev->mtu = mtu;
  21.260 +	return 0;
  21.261 +}
  21.262 +
  21.263 +static int xennet_set_sg(struct net_device *dev, u32 data)
  21.264 +{
  21.265 +	if (data) {
  21.266 +		struct netfront_info *np = netdev_priv(dev);
  21.267 +		int val;
  21.268 +
  21.269 +		if (xenbus_scanf(XBT_NULL, np->xbdev->otherend, "feature-sg",
  21.270 +				 "%d", &val) < 0)
  21.271 +			val = 0;
  21.272 +		if (!val)
  21.273 +			return -ENOSYS;
  21.274 +	} else if (dev->mtu > ETH_DATA_LEN)
  21.275 +		dev->mtu = ETH_DATA_LEN;
  21.276 +
  21.277 +	return ethtool_op_set_sg(dev, data);
  21.278 +}
  21.279 +
  21.280 +static void xennet_set_features(struct net_device *dev)
  21.281 +{
  21.282 +	xennet_set_sg(dev, 1);
  21.283 +}
  21.284 +
  21.285  static void network_connect(struct net_device *dev)
  21.286  {
  21.287  	struct netfront_info *np;
  21.288 @@ -970,6 +1078,8 @@ static void network_connect(struct net_d
  21.289  	struct netif_tx_request *tx;
  21.290  	struct sk_buff *skb;
  21.291  
  21.292 +	xennet_set_features(dev);
  21.293 +
  21.294  	np = netdev_priv(dev);
  21.295  	spin_lock_irq(&np->tx_lock);
  21.296  	spin_lock(&np->rx_lock);
  21.297 @@ -1081,6 +1191,8 @@ static struct ethtool_ops network_ethtoo
  21.298  {
  21.299  	.get_tx_csum = ethtool_op_get_tx_csum,
  21.300  	.set_tx_csum = ethtool_op_set_tx_csum,
  21.301 +	.get_sg = ethtool_op_get_sg,
  21.302 +	.set_sg = xennet_set_sg,
  21.303  };
  21.304  
  21.305  #ifdef CONFIG_SYSFS
  21.306 @@ -1297,6 +1409,7 @@ static struct net_device * __devinit cre
  21.307  	netdev->poll            = netif_poll;
  21.308  	netdev->set_multicast_list = network_set_multicast_list;
  21.309  	netdev->uninit          = netif_uninit;
  21.310 +	netdev->change_mtu	= xennet_change_mtu;
  21.311  	netdev->weight          = 64;
  21.312  	netdev->features        = NETIF_F_IP_CSUM;
  21.313  
    22.1 --- a/linux-2.6-xen-sparse/drivers/xen/privcmd/privcmd.c	Wed Jun 07 11:03:15 2006 +0100
    22.2 +++ b/linux-2.6-xen-sparse/drivers/xen/privcmd/privcmd.c	Wed Jun 07 11:03:51 2006 +0100
    22.3 @@ -61,11 +61,11 @@ static int privcmd_ioctl(struct inode *i
    22.4  		__asm__ __volatile__ (
    22.5  			"pushl %%ebx; pushl %%ecx; pushl %%edx; "
    22.6  			"pushl %%esi; pushl %%edi; "
    22.7 -			"movl  4(%%eax),%%ebx ;"
    22.8 -			"movl  8(%%eax),%%ecx ;"
    22.9 -			"movl 12(%%eax),%%edx ;"
   22.10 -			"movl 16(%%eax),%%esi ;"
   22.11 -			"movl 20(%%eax),%%edi ;"
   22.12 +			"movl  8(%%eax),%%ebx ;"
   22.13 +			"movl 16(%%eax),%%ecx ;"
   22.14 +			"movl 24(%%eax),%%edx ;"
   22.15 +			"movl 32(%%eax),%%esi ;"
   22.16 +			"movl 40(%%eax),%%edi ;"
   22.17  			"movl   (%%eax),%%eax ;"
   22.18  			"shll $5,%%eax ;"
   22.19  			"addl $hypercall_page,%%eax ;"
   22.20 @@ -161,7 +161,7 @@ static int privcmd_ioctl(struct inode *i
   22.21  	case IOCTL_PRIVCMD_MMAPBATCH: {
   22.22  		privcmd_mmapbatch_t m;
   22.23  		struct vm_area_struct *vma = NULL;
   22.24 -		unsigned long __user *p;
   22.25 +		xen_pfn_t __user *p;
   22.26  		unsigned long addr, mfn; 
   22.27  		int i;
   22.28  
   22.29 @@ -210,7 +210,7 @@ static int privcmd_ioctl(struct inode *i
   22.30  	batch_err:
   22.31  		printk("batch_err ret=%d vma=%p addr=%lx "
   22.32  		       "num=%d arr=%p %lx-%lx\n", 
   22.33 -		       ret, vma, m.addr, m.num, m.arr,
   22.34 +		       ret, vma, (unsigned long)m.addr, m.num, m.arr,
   22.35  		       vma ? vma->vm_start : 0, vma ? vma->vm_end : 0);
   22.36  		break;
   22.37  	}
    23.1 --- a/linux-2.6-xen-sparse/include/asm-i386/mach-xen/asm/system.h	Wed Jun 07 11:03:15 2006 +0100
    23.2 +++ b/linux-2.6-xen-sparse/include/asm-i386/mach-xen/asm/system.h	Wed Jun 07 11:03:51 2006 +0100
    23.3 @@ -116,10 +116,12 @@ extern struct task_struct * FASTCALL(__s
    23.4  	__asm__ ( \
    23.5  		"movl %%cr3,%0\n\t" \
    23.6  		:"=r" (__dummy)); \
    23.7 -	machine_to_phys(__dummy); \
    23.8 +	__dummy = xen_cr3_to_pfn(__dummy); \
    23.9 +	mfn_to_pfn(__dummy) << PAGE_SHIFT; \
   23.10  })
   23.11  #define write_cr3(x) ({						\
   23.12 -	maddr_t __dummy = phys_to_machine(x);			\
   23.13 +	unsigned int __dummy = pfn_to_mfn((x) >> PAGE_SHIFT);	\
   23.14 +	__dummy = xen_pfn_to_cr3(__dummy);			\
   23.15  	__asm__ __volatile__("movl %0,%%cr3": :"r" (__dummy));	\
   23.16  })
   23.17  
    24.1 --- a/linux-2.6-xen-sparse/include/asm-i386/mach-xen/setup_arch_post.h	Wed Jun 07 11:03:15 2006 +0100
    24.2 +++ b/linux-2.6-xen-sparse/include/asm-i386/mach-xen/setup_arch_post.h	Wed Jun 07 11:03:51 2006 +0100
    24.3 @@ -61,13 +61,6 @@ static void __init machine_specific_arch
    24.4  		.address = { __KERNEL_CS, (unsigned long)nmi },
    24.5  	};
    24.6  
    24.7 -	if (xen_feature(XENFEAT_auto_translated_physmap) &&
    24.8 -	    xen_start_info->shared_info < xen_start_info->nr_pages) {
    24.9 -		HYPERVISOR_shared_info =
   24.10 -			(shared_info_t *)__va(xen_start_info->shared_info);
   24.11 -		memset(empty_zero_page, 0, sizeof(empty_zero_page));
   24.12 -	}
   24.13 -
   24.14  	ret = HYPERVISOR_callback_op(CALLBACKOP_register, &event);
   24.15  	if (ret == 0)
   24.16  		ret = HYPERVISOR_callback_op(CALLBACKOP_register, &failsafe);
    25.1 --- a/linux-2.6-xen-sparse/include/xen/public/privcmd.h	Wed Jun 07 11:03:15 2006 +0100
    25.2 +++ b/linux-2.6-xen-sparse/include/xen/public/privcmd.h	Wed Jun 07 11:03:51 2006 +0100
    25.3 @@ -33,20 +33,22 @@
    25.4  #ifndef __LINUX_PUBLIC_PRIVCMD_H__
    25.5  #define __LINUX_PUBLIC_PRIVCMD_H__
    25.6  
    25.7 +#include <linux/types.h>
    25.8 +
    25.9  #ifndef __user
   25.10  #define __user
   25.11  #endif
   25.12  
   25.13  typedef struct privcmd_hypercall
   25.14  {
   25.15 -	unsigned long op;
   25.16 -	unsigned long arg[5];
   25.17 +	__u64 op;
   25.18 +	__u64 arg[5];
   25.19  } privcmd_hypercall_t;
   25.20  
   25.21  typedef struct privcmd_mmap_entry {
   25.22 -	unsigned long va;
   25.23 -	unsigned long mfn;
   25.24 -	unsigned long npages;
   25.25 +	__u64 va;
   25.26 +	__u64 mfn;
   25.27 +	__u64 npages;
   25.28  } privcmd_mmap_entry_t; 
   25.29  
   25.30  typedef struct privcmd_mmap {
   25.31 @@ -58,8 +60,8 @@ typedef struct privcmd_mmap {
   25.32  typedef struct privcmd_mmapbatch {
   25.33  	int num;     /* number of pages to populate */
   25.34  	domid_t dom; /* target domain */
   25.35 -	unsigned long addr;  /* virtual address */
   25.36 -	unsigned long __user *arr; /* array of mfns - top nibble set on err */
   25.37 +	__u64 addr;  /* virtual address */
   25.38 +	xen_pfn_t __user *arr; /* array of mfns - top nibble set on err */
   25.39  } privcmd_mmapbatch_t; 
   25.40  
   25.41  /*
    26.1 --- a/tools/debugger/libxendebug/xendebug.c	Wed Jun 07 11:03:15 2006 +0100
    26.2 +++ b/tools/debugger/libxendebug/xendebug.c	Wed Jun 07 11:03:51 2006 +0100
    26.3 @@ -57,7 +57,7 @@ typedef struct domain_context           
    26.4      vcpu_guest_context_t context[MAX_VIRT_CPUS];
    26.5  
    26.6      long            total_pages;
    26.7 -    unsigned long  *page_array;
    26.8 +    xen_pfn_t      *page_array;
    26.9  
   26.10      unsigned long   cr3_phys[MAX_VIRT_CPUS];
   26.11      unsigned long  *cr3_virt[MAX_VIRT_CPUS];
   26.12 @@ -346,8 +346,9 @@ xendebug_memory_page (domain_context_p c
   26.13          ctxt->cr3_phys[vcpu] = vcpu_ctxt->ctrlreg[3];
   26.14          if ( ctxt->cr3_virt[vcpu] )
   26.15              munmap(ctxt->cr3_virt[vcpu], PAGE_SIZE);
   26.16 -        ctxt->cr3_virt[vcpu] = xc_map_foreign_range(xc_handle, ctxt->domid,
   26.17 -                    PAGE_SIZE, PROT_READ, ctxt->cr3_phys[vcpu] >> PAGE_SHIFT);
   26.18 +        ctxt->cr3_virt[vcpu] = xc_map_foreign_range(
   26.19 +            xc_handle, ctxt->domid, PAGE_SIZE, PROT_READ,
   26.20 +            xen_cr3_to_pfn(ctxt->cr3_phys[vcpu]));
   26.21          if ( ctxt->cr3_virt[vcpu] == NULL )
   26.22              return 0;
   26.23      } 
    27.1 --- a/tools/firmware/hvmloader/Makefile	Wed Jun 07 11:03:15 2006 +0100
    27.2 +++ b/tools/firmware/hvmloader/Makefile	Wed Jun 07 11:03:51 2006 +0100
    27.3 @@ -51,8 +51,8 @@ hvmloader: roms.h hvmloader.c acpi_madt.
    27.4  	$(OBJCOPY) hvmloader.tmp hvmloader
    27.5  	rm -f hvmloader.tmp
    27.6  
    27.7 -roms.h:	../rombios/BIOS-bochs-8-processors ../vgabios/VGABIOS-lgpl-latest.bin ../vgabios/VGABIOS-lgpl-latest.cirrus.bin ../vmxassist/vmxassist.bin
    27.8 -	sh ./mkhex rombios ../rombios/BIOS-bochs-8-processors > roms.h
    27.9 +roms.h:	../rombios/BIOS-bochs-latest ../vgabios/VGABIOS-lgpl-latest.bin ../vgabios/VGABIOS-lgpl-latest.cirrus.bin ../vmxassist/vmxassist.bin
   27.10 +	sh ./mkhex rombios ../rombios/BIOS-bochs-latest > roms.h
   27.11  	sh ./mkhex vgabios_stdvga ../vgabios/VGABIOS-lgpl-latest.bin >> roms.h
   27.12  	sh ./mkhex vgabios_cirrusvga ../vgabios/VGABIOS-lgpl-latest.cirrus.bin >> roms.h
   27.13  	sh ./mkhex vmxassist ../vmxassist/vmxassist.bin >> roms.h
    28.1 --- a/tools/firmware/rombios/Makefile	Wed Jun 07 11:03:15 2006 +0100
    28.2 +++ b/tools/firmware/rombios/Makefile	Wed Jun 07 11:03:51 2006 +0100
    28.3 @@ -1,7 +1,7 @@
    28.4 -#BIOS_BUILDS = BIOS-bochs-latest
    28.5 +BIOS_BUILDS = BIOS-bochs-latest
    28.6  #BIOS_BUILDS += BIOS-bochs-2-processors
    28.7  #BIOS_BUILDS += BIOS-bochs-4-processors
    28.8 -BIOS_BUILDS += BIOS-bochs-8-processors
    28.9 +#BIOS_BUILDS += BIOS-bochs-8-processors
   28.10  
   28.11  .PHONY: all
   28.12  all: bios
    29.1 --- a/tools/firmware/vmxassist/vm86.c	Wed Jun 07 11:03:15 2006 +0100
    29.2 +++ b/tools/firmware/vmxassist/vm86.c	Wed Jun 07 11:03:51 2006 +0100
    29.3 @@ -37,6 +37,8 @@
    29.4  static unsigned prev_eip = 0;
    29.5  enum vm86_mode mode = 0;
    29.6  
    29.7 +static struct regs saved_rm_regs;
    29.8 +
    29.9  #ifdef DEBUG
   29.10  int traceset = 0;
   29.11  
   29.12 @@ -795,6 +797,8 @@ protected_mode(struct regs *regs)
   29.13  	oldctx.esp = regs->uesp;
   29.14  	oldctx.eflags = regs->eflags;
   29.15  
   29.16 +	memset(&saved_rm_regs, 0, sizeof(struct regs));
   29.17 +
   29.18  	/* reload all segment registers */
   29.19  	if (!load_seg(regs->cs, &oldctx.cs_base,
   29.20  				&oldctx.cs_limit, &oldctx.cs_arbytes))
   29.21 @@ -808,6 +812,7 @@ protected_mode(struct regs *regs)
   29.22  		load_seg(0, &oldctx.es_base,
   29.23  			    &oldctx.es_limit, &oldctx.es_arbytes);
   29.24  		oldctx.es_sel = 0;
   29.25 +		saved_rm_regs.ves = regs->ves;
   29.26  	}
   29.27  
   29.28  	if (load_seg(regs->uss, &oldctx.ss_base,
   29.29 @@ -817,6 +822,7 @@ protected_mode(struct regs *regs)
   29.30  		load_seg(0, &oldctx.ss_base,
   29.31  			    &oldctx.ss_limit, &oldctx.ss_arbytes);
   29.32  		oldctx.ss_sel = 0;
   29.33 +		saved_rm_regs.uss = regs->uss;
   29.34  	}
   29.35  
   29.36  	if (load_seg(regs->vds, &oldctx.ds_base,
   29.37 @@ -826,6 +832,7 @@ protected_mode(struct regs *regs)
   29.38  		load_seg(0, &oldctx.ds_base,
   29.39  			    &oldctx.ds_limit, &oldctx.ds_arbytes);
   29.40  		oldctx.ds_sel = 0;
   29.41 +		saved_rm_regs.vds = regs->vds;
   29.42  	}
   29.43  
   29.44  	if (load_seg(regs->vfs, &oldctx.fs_base,
   29.45 @@ -835,6 +842,7 @@ protected_mode(struct regs *regs)
   29.46  		load_seg(0, &oldctx.fs_base,
   29.47  			    &oldctx.fs_limit, &oldctx.fs_arbytes);
   29.48  		oldctx.fs_sel = 0;
   29.49 +		saved_rm_regs.vfs = regs->vfs;
   29.50  	}
   29.51  
   29.52  	if (load_seg(regs->vgs, &oldctx.gs_base,
   29.53 @@ -844,6 +852,7 @@ protected_mode(struct regs *regs)
   29.54  		load_seg(0, &oldctx.gs_base,
   29.55  			    &oldctx.gs_limit, &oldctx.gs_arbytes);
   29.56  		oldctx.gs_sel = 0;
   29.57 +		saved_rm_regs.vgs = regs->vgs;
   29.58  	}
   29.59  
   29.60  	/* initialize jump environment to warp back to protected mode */
   29.61 @@ -880,16 +889,22 @@ real_mode(struct regs *regs)
   29.62  		if (regs->uss >= HIGHMEM)
   29.63  			panic("%%ss 0x%lx higher than 1MB", regs->uss);
   29.64  		regs->uss = address(regs, regs->uss, 0) >> 4;
   29.65 +	} else {
   29.66 +	  regs->uss = saved_rm_regs.uss;
   29.67  	}
   29.68  	if (regs->vds != 0) {
   29.69  		if (regs->vds >= HIGHMEM)
   29.70  			panic("%%ds 0x%lx higher than 1MB", regs->vds);
   29.71  		regs->vds = address(regs, regs->vds, 0) >> 4;
   29.72 +	} else {
   29.73 +	  regs->vds = saved_rm_regs.vds;
   29.74  	}
   29.75  	if (regs->ves != 0) {
   29.76  		if (regs->ves >= HIGHMEM)
   29.77  			panic("%%es 0x%lx higher than 1MB", regs->ves);
   29.78  		regs->ves = address(regs, regs->ves, 0) >> 4;
   29.79 +	} else {
   29.80 +	  regs->ves = saved_rm_regs.ves;
   29.81  	}
   29.82  
   29.83  	/* this should get us into 16-bit mode */
   29.84 @@ -982,6 +997,39 @@ jmpl(struct regs *regs, int prefix)
   29.85  }
   29.86  
   29.87  static void
   29.88 +jmpl_indirect(struct regs *regs, int prefix, unsigned modrm)
   29.89 +{
   29.90 +	unsigned n = regs->eip;
   29.91 +	unsigned cs, eip;
   29.92 +	unsigned addr;
   29.93 +
   29.94 +	addr  = operand(prefix, regs, modrm);
   29.95 +
   29.96 +	if (mode == VM86_REAL_TO_PROTECTED) { /* jump to protected mode */
   29.97 +		eip = (prefix & DATA32) ? read32(addr) : read16(addr);
   29.98 +		addr += (prefix & DATA32) ? 4 : 2;
   29.99 +		cs = read16(addr);
  29.100 +
  29.101 +		TRACE((regs, (regs->eip - n) + 1, "jmpl 0x%x:0x%x", cs, eip));
  29.102 +
  29.103 +                regs->cs = cs;
  29.104 +                regs->eip = eip;
  29.105 +		set_mode(regs, VM86_PROTECTED);
  29.106 +	} else if (mode == VM86_PROTECTED_TO_REAL) { /* jump to real mode */
  29.107 +		eip = (prefix & DATA32) ? read32(addr) : read16(addr);
  29.108 +		addr += (prefix & DATA32) ? 4 : 2;
  29.109 +		cs = read16(addr);
  29.110 +
  29.111 +		TRACE((regs, (regs->eip - n) + 1, "jmpl 0x%x:0x%x", cs, eip));
  29.112 +
  29.113 +                regs->cs = cs;
  29.114 +                regs->eip = eip;
  29.115 +		set_mode(regs, VM86_REAL);
  29.116 +	} else
  29.117 +		panic("jmpl");
  29.118 +}
  29.119 +
  29.120 +static void
  29.121  retl(struct regs *regs, int prefix)
  29.122  {
  29.123  	unsigned cs, eip;
  29.124 @@ -1306,6 +1354,23 @@ opcode(struct regs *regs)
  29.125  			}
  29.126  			goto invalid;
  29.127  
  29.128 +		case 0xFF: /* jmpl (indirect) */
  29.129 +			if ((mode == VM86_REAL_TO_PROTECTED) ||
  29.130 +			    (mode == VM86_PROTECTED_TO_REAL)) {
  29.131 +			 	unsigned modrm = fetch8(regs);
  29.132 +				
  29.133 +				switch((modrm >> 3) & 7) {
  29.134 +				case 5:
  29.135 +				  jmpl_indirect(regs, prefix, modrm);
  29.136 +				  return OPC_INVALID;
  29.137 +
  29.138 +				default:
  29.139 +				  break;
  29.140 +				}
  29.141 +
  29.142 +			}
  29.143 +			goto invalid;
  29.144 +
  29.145  		case 0xEB: /* short jump */
  29.146  			if ((mode == VM86_REAL_TO_PROTECTED) ||
  29.147  			    (mode == VM86_PROTECTED_TO_REAL)) {
    30.1 --- a/tools/ioemu/hw/cirrus_vga.c	Wed Jun 07 11:03:15 2006 +0100
    30.2 +++ b/tools/ioemu/hw/cirrus_vga.c	Wed Jun 07 11:03:51 2006 +0100
    30.3 @@ -2460,10 +2460,9 @@ static CPUWriteMemoryFunc *cirrus_linear
    30.4  };
    30.5  
    30.6  extern FILE *logfile;
    30.7 -#if defined(__i386__) || defined (__x86_64__)
    30.8  static void * set_vram_mapping(unsigned long begin, unsigned long end)
    30.9  {
   30.10 -    unsigned long * extent_start = NULL;
   30.11 +    xen_pfn_t *extent_start = NULL;
   30.12      unsigned long nr_extents;
   30.13      void *vram_pointer = NULL;
   30.14      int i;
   30.15 @@ -2474,14 +2473,14 @@ static void * set_vram_mapping(unsigned 
   30.16      end = (end + TARGET_PAGE_SIZE -1 ) & TARGET_PAGE_MASK;
   30.17      nr_extents = (end - begin) >> TARGET_PAGE_BITS;
   30.18  
   30.19 -    extent_start = malloc(sizeof(unsigned long) * nr_extents );
   30.20 +    extent_start = malloc(sizeof(xen_pfn_t) * nr_extents );
   30.21      if (extent_start == NULL)
   30.22      {
   30.23          fprintf(stderr, "Failed malloc on set_vram_mapping\n");
   30.24          return NULL;
   30.25      }
   30.26  
   30.27 -    memset(extent_start, 0, sizeof(unsigned long) * nr_extents);
   30.28 +    memset(extent_start, 0, sizeof(xen_pfn_t) * nr_extents);
   30.29  
   30.30      for (i = 0; i < nr_extents; i++)
   30.31      {
   30.32 @@ -2509,7 +2508,7 @@ static void * set_vram_mapping(unsigned 
   30.33  
   30.34  static int unset_vram_mapping(unsigned long begin, unsigned long end)
   30.35  {
   30.36 -    unsigned long * extent_start = NULL;
   30.37 +    xen_pfn_t *extent_start = NULL;
   30.38      unsigned long nr_extents;
   30.39      int i;
   30.40  
   30.41 @@ -2520,7 +2519,7 @@ static int unset_vram_mapping(unsigned l
   30.42      end = (end + TARGET_PAGE_SIZE -1 ) & TARGET_PAGE_MASK;
   30.43      nr_extents = (end - begin) >> TARGET_PAGE_BITS;
   30.44  
   30.45 -    extent_start = malloc(sizeof(unsigned long) * nr_extents );
   30.46 +    extent_start = malloc(sizeof(xen_pfn_t) * nr_extents );
   30.47  
   30.48      if (extent_start == NULL)
   30.49      {
   30.50 @@ -2528,7 +2527,7 @@ static int unset_vram_mapping(unsigned l
   30.51          return -1;
   30.52      }
   30.53  
   30.54 -    memset(extent_start, 0, sizeof(unsigned long) * nr_extents);
   30.55 +    memset(extent_start, 0, sizeof(xen_pfn_t) * nr_extents);
   30.56  
   30.57      for (i = 0; i < nr_extents; i++)
   30.58          extent_start[i] = (begin + (i * TARGET_PAGE_SIZE)) >> TARGET_PAGE_BITS;
   30.59 @@ -2540,10 +2539,6 @@ static int unset_vram_mapping(unsigned l
   30.60      return 0;
   30.61  }
   30.62  
   30.63 -#elif defined(__ia64__)
   30.64 -static void * set_vram_mapping(unsigned long addr, unsigned long end) {}
   30.65 -static int unset_vram_mapping(unsigned long addr, unsigned long end) {}
   30.66 -#endif
   30.67  extern int vga_accelerate;
   30.68  
   30.69  /* Compute the memory access functions */
    31.1 --- a/tools/ioemu/hw/pc.c	Wed Jun 07 11:03:15 2006 +0100
    31.2 +++ b/tools/ioemu/hw/pc.c	Wed Jun 07 11:03:51 2006 +0100
    31.3 @@ -537,8 +537,11 @@ void pc_init(uint64_t ram_size, int vga_
    31.4      for(i = 0; i < MAX_SERIAL_PORTS; i++) {
    31.5          if (serial_hds[i]) {
    31.6              sp = serial_init(serial_io[i], serial_irq[i], serial_hds[i]);
    31.7 -            if (i == SUMMA_PORT)
    31.8 +            if (i == serial_summa_port) {
    31.9  		summa_init(sp, serial_hds[i]);
   31.10 +		fprintf(stderr, "Serial port %d (COM%d) initialized for Summagraphics\n",
   31.11 +			i, i+1);
   31.12 +	    }
   31.13          }
   31.14      }
   31.15  
    32.1 --- a/tools/ioemu/hw/vga.c	Wed Jun 07 11:03:15 2006 +0100
    32.2 +++ b/tools/ioemu/hw/vga.c	Wed Jun 07 11:03:51 2006 +0100
    32.3 @@ -1995,6 +1995,7 @@ void vga_common_init(VGAState *s, Displa
    32.4      s->get_resolution = vga_get_resolution;
    32.5      /* XXX: currently needed for display */
    32.6      vga_state = s;
    32.7 +    vga_bios_init(s);
    32.8  }
    32.9  
   32.10  
   32.11 @@ -2082,7 +2083,6 @@ int vga_initialize(PCIBus *bus, DisplayS
   32.12  #endif
   32.13      }
   32.14  
   32.15 -    vga_bios_init(s);
   32.16      return 0;
   32.17  }
   32.18  
    33.1 --- a/tools/ioemu/vl.c	Wed Jun 07 11:03:15 2006 +0100
    33.2 +++ b/tools/ioemu/vl.c	Wed Jun 07 11:03:51 2006 +0100
    33.3 @@ -146,6 +146,7 @@ int full_screen = 0;
    33.4  int repeat_key = 1;
    33.5  TextConsole *vga_console;
    33.6  CharDriverState *serial_hds[MAX_SERIAL_PORTS];
    33.7 +int serial_summa_port = -1;
    33.8  int xc_handle;
    33.9  time_t timeoffset = 0;
   33.10  
   33.11 @@ -2457,7 +2458,7 @@ int unset_mm_mapping(int xc_handle,
   33.12                       uint32_t domid,
   33.13                       unsigned long nr_pages,
   33.14                       unsigned int address_bits,
   33.15 -                     unsigned long *extent_start)
   33.16 +                     xen_pfn_t *extent_start)
   33.17  {
   33.18      int err = 0;
   33.19      xc_dominfo_t info;
   33.20 @@ -2490,7 +2491,7 @@ int set_mm_mapping(int xc_handle,
   33.21                      uint32_t domid,
   33.22                      unsigned long nr_pages,
   33.23                      unsigned int address_bits,
   33.24 -                    unsigned long *extent_start)
   33.25 +                    xen_pfn_t *extent_start)
   33.26  {
   33.27      xc_dominfo_t info;
   33.28      int err = 0;
   33.29 @@ -2498,7 +2499,7 @@ int set_mm_mapping(int xc_handle,
   33.30      xc_domain_getinfo(xc_handle, domid, 1, &info);
   33.31  
   33.32      if ( xc_domain_setmaxmem(xc_handle, domid,
   33.33 -                             (info.nr_pages + nr_pages) * PAGE_SIZE/1024) != 0)
   33.34 +                             info.max_memkb + nr_pages * PAGE_SIZE/1024) !=0)
   33.35      {
   33.36          fprintf(logfile, "set maxmem returned error %d\n", errno);
   33.37          return -1;
   33.38 @@ -2556,7 +2557,8 @@ int main(int argc, char **argv)
   33.39      int serial_device_index;
   33.40      char qemu_dm_logfilename[64];
   33.41      const char *loadvm = NULL;
   33.42 -    unsigned long nr_pages, *page_array;
   33.43 +    unsigned long nr_pages;
   33.44 +    xen_pfn_t *page_array;
   33.45      extern void *shared_page;
   33.46  
   33.47  #if !defined(CONFIG_SOFTMMU)
   33.48 @@ -2588,8 +2590,8 @@ int main(int argc, char **argv)
   33.49      pstrcpy(monitor_device, sizeof(monitor_device), "vc");
   33.50  
   33.51      pstrcpy(serial_devices[0], sizeof(serial_devices[0]), "vc");
   33.52 -    pstrcpy(serial_devices[1], sizeof(serial_devices[1]), "null");
   33.53 -    for(i = 2; i < MAX_SERIAL_PORTS; i++)
   33.54 +    serial_summa_port = -1;
   33.55 +    for(i = 1; i < MAX_SERIAL_PORTS; i++)
   33.56          serial_devices[i][0] = '\0';
   33.57      serial_device_index = 0;
   33.58  
   33.59 @@ -3022,8 +3024,8 @@ int main(int argc, char **argv)
   33.60  
   33.61      xc_handle = xc_interface_open();
   33.62  
   33.63 -    if ( (page_array = (unsigned long *)
   33.64 -                        malloc(nr_pages * sizeof(unsigned long))) == NULL)
   33.65 +    if ( (page_array = (xen_pfn_t *)
   33.66 +                        malloc(nr_pages * sizeof(xen_pfn_t))) == NULL)
   33.67      {
   33.68          fprintf(logfile, "malloc returned error %d\n", errno);
   33.69          exit(-1);
   33.70 @@ -3078,8 +3080,8 @@ int main(int argc, char **argv)
   33.71                                         page_array[0]);
   33.72  #endif
   33.73  
   33.74 -    fprintf(logfile, "shared page at pfn:%lx, mfn: %lx\n", (nr_pages-1),
   33.75 -           (page_array[nr_pages - 1]));
   33.76 +    fprintf(logfile, "shared page at pfn:%lx, mfn: %"PRIx64"\n", (nr_pages-1),
   33.77 +           (uint64_t)(page_array[nr_pages - 1]));
   33.78  
   33.79      /* we always create the cdrom drive, even if no disk is there */
   33.80      bdrv_init();
   33.81 @@ -3173,6 +3175,20 @@ int main(int argc, char **argv)
   33.82      }
   33.83      monitor_init(monitor_hd, !nographic);
   33.84  
   33.85 +    /* Find which port should be the Summagraphics port */
   33.86 +    /* It's the first unspecified serial line. Note that COM1 is set */
   33.87 +    /* by default, so the Summagraphics port would be COM2 or higher */
   33.88 +
   33.89 +    for(i = 0; i < MAX_SERIAL_PORTS; i++) {
   33.90 +      if (serial_devices[i][0] != '\0')
   33.91 +	continue;
   33.92 +      serial_summa_port = i;
   33.93 +      pstrcpy(serial_devices[serial_summa_port], sizeof(serial_devices[0]), "null");
   33.94 +      break;
   33.95 +    }
   33.96 +
   33.97 +    /* Now, open the ports */
   33.98 +
   33.99      for(i = 0; i < MAX_SERIAL_PORTS; i++) {
  33.100          if (serial_devices[i][0] != '\0') {
  33.101              serial_hds[i] = qemu_chr_open(serial_devices[i]);
    34.1 --- a/tools/ioemu/vl.h	Wed Jun 07 11:03:15 2006 +0100
    34.2 +++ b/tools/ioemu/vl.h	Wed Jun 07 11:03:51 2006 +0100
    34.3 @@ -238,9 +238,9 @@ void console_select(unsigned int index);
    34.4  /* serial ports */
    34.5  
    34.6  #define MAX_SERIAL_PORTS 4
    34.7 -#define SUMMA_PORT	1
    34.8  
    34.9  extern CharDriverState *serial_hds[MAX_SERIAL_PORTS];
   34.10 +extern int serial_summa_port;
   34.11  
   34.12  /* network redirectors support */
   34.13  
    35.1 --- a/tools/libxc/xc_core.c	Wed Jun 07 11:03:15 2006 +0100
    35.2 +++ b/tools/libxc/xc_core.c	Wed Jun 07 11:03:51 2006 +0100
    35.3 @@ -28,7 +28,7 @@ xc_domain_dumpcore_via_callback(int xc_h
    35.4                                  dumpcore_rtn_t dump_rtn)
    35.5  {
    35.6      unsigned long nr_pages;
    35.7 -    unsigned long *page_array = NULL;
    35.8 +    xen_pfn_t *page_array = NULL;
    35.9      xc_dominfo_t info;
   35.10      int i, nr_vcpus = 0;
   35.11      char *dump_mem, *dump_mem_start = NULL;
   35.12 @@ -70,7 +70,7 @@ xc_domain_dumpcore_via_callback(int xc_h
   35.13          sizeof(vcpu_guest_context_t)*nr_vcpus;
   35.14      dummy_len = (sizeof(struct xc_core_header) +
   35.15                   (sizeof(vcpu_guest_context_t) * nr_vcpus) +
   35.16 -                 (nr_pages * sizeof(unsigned long)));
   35.17 +                 (nr_pages * sizeof(xen_pfn_t)));
   35.18      header.xch_pages_offset = round_pgup(dummy_len);
   35.19  
   35.20      sts = dump_rtn(args, (char *)&header, sizeof(struct xc_core_header));
   35.21 @@ -81,7 +81,7 @@ xc_domain_dumpcore_via_callback(int xc_h
   35.22      if ( sts != 0 )
   35.23          goto error_out;
   35.24  
   35.25 -    if ( (page_array = malloc(nr_pages * sizeof(unsigned long))) == NULL )
   35.26 +    if ( (page_array = malloc(nr_pages * sizeof(xen_pfn_t))) == NULL )
   35.27      {
   35.28          printf("Could not allocate memory\n");
   35.29          goto error_out;
   35.30 @@ -91,7 +91,7 @@ xc_domain_dumpcore_via_callback(int xc_h
   35.31          printf("Could not get the page frame list\n");
   35.32          goto error_out;
   35.33      }
   35.34 -    sts = dump_rtn(args, (char *)page_array, nr_pages * sizeof(unsigned long));
   35.35 +    sts = dump_rtn(args, (char *)page_array, nr_pages * sizeof(xen_pfn_t));
   35.36      if ( sts != 0 )
   35.37          goto error_out;
   35.38  
    36.1 --- a/tools/libxc/xc_domain.c	Wed Jun 07 11:03:15 2006 +0100
    36.2 +++ b/tools/libxc/xc_domain.c	Wed Jun 07 11:03:51 2006 +0100
    36.3 @@ -291,7 +291,7 @@ int xc_domain_memory_increase_reservatio
    36.4                                            unsigned long nr_extents,
    36.5                                            unsigned int extent_order,
    36.6                                            unsigned int address_bits,
    36.7 -                                          unsigned long *extent_start)
    36.8 +                                          xen_pfn_t *extent_start)
    36.9  {
   36.10      int err;
   36.11      struct xen_memory_reservation reservation = {
   36.12 @@ -324,7 +324,7 @@ int xc_domain_memory_decrease_reservatio
   36.13                                            uint32_t domid,
   36.14                                            unsigned long nr_extents,
   36.15                                            unsigned int extent_order,
   36.16 -                                          unsigned long *extent_start)
   36.17 +                                          xen_pfn_t *extent_start)
   36.18  {
   36.19      int err;
   36.20      struct xen_memory_reservation reservation = {
   36.21 @@ -363,7 +363,7 @@ int xc_domain_memory_populate_physmap(in
   36.22                                            unsigned long nr_extents,
   36.23                                            unsigned int extent_order,
   36.24                                            unsigned int address_bits,
   36.25 -                                          unsigned long *extent_start)
   36.26 +                                          xen_pfn_t *extent_start)
   36.27  {
   36.28      int err;
   36.29      struct xen_memory_reservation reservation = {
   36.30 @@ -392,8 +392,8 @@ int xc_domain_memory_populate_physmap(in
   36.31  int xc_domain_translate_gpfn_list(int xc_handle,
   36.32                                    uint32_t domid,
   36.33                                    unsigned long nr_gpfns,
   36.34 -                                  unsigned long *gpfn_list,
   36.35 -                                  unsigned long *mfn_list)
   36.36 +                                  xen_pfn_t *gpfn_list,
   36.37 +                                  xen_pfn_t *mfn_list)
   36.38  {
   36.39      struct xen_translate_gpfn_list op = {
   36.40          .domid        = domid,
    37.1 --- a/tools/libxc/xc_hvm_build.c	Wed Jun 07 11:03:15 2006 +0100
    37.2 +++ b/tools/libxc/xc_hvm_build.c	Wed Jun 07 11:03:51 2006 +0100
    37.3 @@ -135,7 +135,7 @@ static void set_hvm_info_checksum(struct
    37.4   * hvmloader will use this info to set BIOS accordingly
    37.5   */
    37.6  static int set_hvm_info(int xc_handle, uint32_t dom,
    37.7 -                        unsigned long *pfn_list, unsigned int vcpus,
    37.8 +                        xen_pfn_t *pfn_list, unsigned int vcpus,
    37.9                          unsigned int pae, unsigned int acpi, unsigned int apic)
   37.10  {
   37.11      char *va_map;
   37.12 @@ -178,7 +178,7 @@ static int setup_guest(int xc_handle,
   37.13                         unsigned int store_evtchn,
   37.14                         unsigned long *store_mfn)
   37.15  {
   37.16 -    unsigned long *page_array = NULL;
   37.17 +    xen_pfn_t *page_array = NULL;
   37.18      unsigned long count, i;
   37.19      unsigned long long ptr;
   37.20      xc_mmu_t *mmu = NULL;
   37.21 @@ -223,7 +223,7 @@ static int setup_guest(int xc_handle,
   37.22          goto error_out;
   37.23      }
   37.24  
   37.25 -    if ( (page_array = malloc(nr_pages * sizeof(unsigned long))) == NULL )
   37.26 +    if ( (page_array = malloc(nr_pages * sizeof(xen_pfn_t))) == NULL )
   37.27      {
   37.28          PERROR("Could not allocate memory.\n");
   37.29          goto error_out;
    38.1 --- a/tools/libxc/xc_ia64_stubs.c	Wed Jun 07 11:03:15 2006 +0100
    38.2 +++ b/tools/libxc/xc_ia64_stubs.c	Wed Jun 07 11:03:51 2006 +0100
    38.3 @@ -57,7 +57,7 @@ xc_plan9_build(int xc_handle,
    38.4  
    38.5  int xc_ia64_get_pfn_list(int xc_handle,
    38.6                           uint32_t domid,
    38.7 -                         unsigned long *pfn_buf,
    38.8 +                         xen_pfn_t *pfn_buf,
    38.9                           unsigned int start_page,
   38.10                           unsigned int nr_pages)
   38.11  {
   38.12 @@ -65,7 +65,7 @@ int xc_ia64_get_pfn_list(int xc_handle,
   38.13      int num_pfns,ret;
   38.14      unsigned int __start_page, __nr_pages;
   38.15      unsigned long max_pfns;
   38.16 -    unsigned long *__pfn_buf;
   38.17 +    xen_pfn_t *__pfn_buf;
   38.18  
   38.19      __start_page = start_page;
   38.20      __nr_pages = nr_pages;
   38.21 @@ -80,7 +80,7 @@ int xc_ia64_get_pfn_list(int xc_handle,
   38.22          set_xen_guest_handle(op.u.getmemlist.buffer, __pfn_buf);
   38.23  
   38.24          if ( (max_pfns != -1UL)
   38.25 -            && mlock(__pfn_buf, __nr_pages * sizeof(unsigned long)) != 0 )
   38.26 +            && mlock(__pfn_buf, __nr_pages * sizeof(xen_pfn_t)) != 0 )
   38.27          {
   38.28              PERROR("Could not lock pfn list buffer");
   38.29              return -1;
   38.30 @@ -89,7 +89,7 @@ int xc_ia64_get_pfn_list(int xc_handle,
   38.31          ret = do_dom0_op(xc_handle, &op);
   38.32  
   38.33          if (max_pfns != -1UL)
   38.34 -            (void)munlock(__pfn_buf, __nr_pages * sizeof(unsigned long));
   38.35 +            (void)munlock(__pfn_buf, __nr_pages * sizeof(xen_pfn_t));
   38.36  
   38.37          if (max_pfns == -1UL)
   38.38              return 0;
   38.39 @@ -122,10 +122,10 @@ int xc_ia64_copy_to_domain_pages(int xc_
   38.40  {
   38.41      // N.B. gva should be page aligned
   38.42  
   38.43 -    unsigned long *page_array = NULL;
   38.44 +    xen_pfn_t *page_array = NULL;
   38.45      int i;
   38.46  
   38.47 -    if ( (page_array = malloc(nr_pages * sizeof(unsigned long))) == NULL ){
   38.48 +    if ( (page_array = malloc(nr_pages * sizeof(xen_pfn_t))) == NULL ){
   38.49          PERROR("Could not allocate memory");
   38.50          goto error_out;
   38.51      }
    39.1 --- a/tools/libxc/xc_linux.c	Wed Jun 07 11:03:15 2006 +0100
    39.2 +++ b/tools/libxc/xc_linux.c	Wed Jun 07 11:03:51 2006 +0100
    39.3 @@ -28,7 +28,7 @@ int xc_interface_close(int xc_handle)
    39.4  }
    39.5  
    39.6  void *xc_map_foreign_batch(int xc_handle, uint32_t dom, int prot,
    39.7 -                           unsigned long *arr, int num)
    39.8 +                           xen_pfn_t *arr, int num)
    39.9  {
   39.10      privcmd_mmapbatch_t ioctlx;
   39.11      void *addr;
    40.1 --- a/tools/libxc/xc_linux_build.c	Wed Jun 07 11:03:15 2006 +0100
    40.2 +++ b/tools/libxc/xc_linux_build.c	Wed Jun 07 11:03:51 2006 +0100
    40.3 @@ -10,6 +10,7 @@
    40.4  #include "xc_aout9.h"
    40.5  #include <stdlib.h>
    40.6  #include <unistd.h>
    40.7 +#include <inttypes.h>
    40.8  #include <zlib.h>
    40.9  
   40.10  #if defined(__i386__)
   40.11 @@ -136,7 +137,7 @@ static int probeimageformat(const char *
   40.12  int load_initrd(int xc_handle, domid_t dom,
   40.13                  struct initrd_info *initrd,
   40.14                  unsigned long physbase,
   40.15 -                unsigned long *phys_to_mach)
   40.16 +                xen_pfn_t *phys_to_mach)
   40.17  {
   40.18      char page[PAGE_SIZE];
   40.19      unsigned long pfn_start, pfn, nr_pages;
   40.20 @@ -189,7 +190,7 @@ static int setup_pg_tables(int xc_handle
   40.21                             vcpu_guest_context_t *ctxt,
   40.22                             unsigned long dsi_v_start,
   40.23                             unsigned long v_end,
   40.24 -                           unsigned long *page_array,
   40.25 +                           xen_pfn_t *page_array,
   40.26                             unsigned long vpt_start,
   40.27                             unsigned long vpt_end,
   40.28                             unsigned shadow_mode_enabled)
   40.29 @@ -205,9 +206,9 @@ static int setup_pg_tables(int xc_handle
   40.30      alloc_pt(l2tab, vl2tab, pl2tab);
   40.31      vl2e = &vl2tab[l2_table_offset(dsi_v_start)];
   40.32      if (shadow_mode_enabled)
   40.33 -        ctxt->ctrlreg[3] = pl2tab;
   40.34 +        ctxt->ctrlreg[3] = xen_pfn_to_cr3(pl2tab >> PAGE_SHIFT);
   40.35      else
   40.36 -        ctxt->ctrlreg[3] = l2tab;
   40.37 +        ctxt->ctrlreg[3] = xen_pfn_to_cr3(l2tab >> PAGE_SHIFT);
   40.38  
   40.39      for ( count = 0; count < ((v_end - dsi_v_start) >> PAGE_SHIFT); count++ )
   40.40      {
   40.41 @@ -251,26 +252,42 @@ static int setup_pg_tables_pae(int xc_ha
   40.42                                 vcpu_guest_context_t *ctxt,
   40.43                                 unsigned long dsi_v_start,
   40.44                                 unsigned long v_end,
   40.45 -                               unsigned long *page_array,
   40.46 +                               xen_pfn_t *page_array,
   40.47                                 unsigned long vpt_start,
   40.48                                 unsigned long vpt_end,
   40.49 -                               unsigned shadow_mode_enabled)
   40.50 +                               unsigned shadow_mode_enabled,
   40.51 +                               unsigned pae_mode)
   40.52  {
   40.53      l1_pgentry_64_t *vl1tab = NULL, *vl1e = NULL;
   40.54      l2_pgentry_64_t *vl2tab = NULL, *vl2e = NULL;
   40.55      l3_pgentry_64_t *vl3tab = NULL, *vl3e = NULL;
   40.56      uint64_t l1tab, l2tab, l3tab, pl1tab, pl2tab, pl3tab;
   40.57 -    unsigned long ppt_alloc, count;
   40.58 +    unsigned long ppt_alloc, count, nmfn;
   40.59  
   40.60      /* First allocate page for page dir. */
   40.61      ppt_alloc = (vpt_start - dsi_v_start) >> PAGE_SHIFT;
   40.62  
   40.63 +    if ( pae_mode == PAEKERN_extended_cr3 )
   40.64 +    {
   40.65 +        ctxt->vm_assist |= (1UL << VMASST_TYPE_pae_extended_cr3);
   40.66 +    }
   40.67 +    else if ( page_array[ppt_alloc] > 0xfffff )
   40.68 +    {
   40.69 +        nmfn = xc_make_page_below_4G(xc_handle, dom, page_array[ppt_alloc]);
   40.70 +        if ( nmfn == 0 )
   40.71 +        {
   40.72 +            fprintf(stderr, "Couldn't get a page below 4GB :-(\n");
   40.73 +            goto error_out;
   40.74 +        }
   40.75 +        page_array[ppt_alloc] = nmfn;
   40.76 +    }
   40.77 +
   40.78      alloc_pt(l3tab, vl3tab, pl3tab);
   40.79      vl3e = &vl3tab[l3_table_offset_pae(dsi_v_start)];
   40.80      if (shadow_mode_enabled)
   40.81 -        ctxt->ctrlreg[3] = pl3tab;
   40.82 +        ctxt->ctrlreg[3] = xen_pfn_to_cr3(pl3tab >> PAGE_SHIFT);
   40.83      else
   40.84 -        ctxt->ctrlreg[3] = l3tab;
   40.85 +        ctxt->ctrlreg[3] = xen_pfn_to_cr3(l3tab >> PAGE_SHIFT);
   40.86  
   40.87      for ( count = 0; count < ((v_end - dsi_v_start) >> PAGE_SHIFT); count++)
   40.88      {
   40.89 @@ -340,7 +357,7 @@ static int setup_pg_tables_64(int xc_han
   40.90                                vcpu_guest_context_t *ctxt,
   40.91                                unsigned long dsi_v_start,
   40.92                                unsigned long v_end,
   40.93 -                              unsigned long *page_array,
   40.94 +                              xen_pfn_t *page_array,
   40.95                                unsigned long vpt_start,
   40.96                                unsigned long vpt_end,
   40.97                                int shadow_mode_enabled)
   40.98 @@ -361,9 +378,9 @@ static int setup_pg_tables_64(int xc_han
   40.99      alloc_pt(l4tab, vl4tab, pl4tab);
  40.100      vl4e = &vl4tab[l4_table_offset(dsi_v_start)];
  40.101      if (shadow_mode_enabled)
  40.102 -        ctxt->ctrlreg[3] = pl4tab;
  40.103 +        ctxt->ctrlreg[3] = xen_pfn_to_cr3(pl4tab >> PAGE_SHIFT);
  40.104      else
  40.105 -        ctxt->ctrlreg[3] = l4tab;
  40.106 +        ctxt->ctrlreg[3] = xen_pfn_to_cr3(l4tab >> PAGE_SHIFT);
  40.107  
  40.108      for ( count = 0; count < ((v_end-dsi_v_start)>>PAGE_SHIFT); count++)
  40.109      {
  40.110 @@ -451,7 +468,7 @@ static int setup_guest(int xc_handle,
  40.111                         unsigned int console_evtchn, unsigned long *console_mfn,
  40.112                         uint32_t required_features[XENFEAT_NR_SUBMAPS])
  40.113  {
  40.114 -    unsigned long *page_array = NULL;
  40.115 +    xen_pfn_t *page_array = NULL;
  40.116      struct load_funcs load_funcs;
  40.117      struct domain_setup_info dsi;
  40.118      unsigned long vinitrd_start;
  40.119 @@ -478,7 +495,7 @@ static int setup_guest(int xc_handle,
  40.120  
  40.121      start_page = dsi.v_start >> PAGE_SHIFT;
  40.122      pgnr = (v_end - dsi.v_start) >> PAGE_SHIFT;
  40.123 -    if ( (page_array = malloc(pgnr * sizeof(unsigned long))) == NULL )
  40.124 +    if ( (page_array = malloc(pgnr * sizeof(xen_pfn_t))) == NULL )
  40.125      {
  40.126          PERROR("Could not allocate memory");
  40.127          goto error_out;
  40.128 @@ -579,11 +596,11 @@ static int compat_check(int xc_handle, s
  40.129      }
  40.130  
  40.131      if (strstr(xen_caps, "xen-3.0-x86_32p")) {
  40.132 -        if (!dsi->pae_kernel) {
  40.133 +        if (dsi->pae_kernel == PAEKERN_no) {
  40.134              ERROR("Non PAE-kernel on PAE host.");
  40.135              return 0;
  40.136          }
  40.137 -    } else if (dsi->pae_kernel) {
  40.138 +    } else if (dsi->pae_kernel != PAEKERN_no) {
  40.139          ERROR("PAE-kernel on non-PAE host.");
  40.140          return 0;
  40.141      }
  40.142 @@ -606,7 +623,7 @@ static int setup_guest(int xc_handle,
  40.143                         unsigned int console_evtchn, unsigned long *console_mfn,
  40.144                         uint32_t required_features[XENFEAT_NR_SUBMAPS])
  40.145  {
  40.146 -    unsigned long *page_array = NULL;
  40.147 +    xen_pfn_t *page_array = NULL;
  40.148      unsigned long count, i, hypercall_pfn;
  40.149      start_info_t *start_info;
  40.150      shared_info_t *shared_info;
  40.151 @@ -617,7 +634,7 @@ static int setup_guest(int xc_handle,
  40.152  
  40.153      unsigned long nr_pt_pages;
  40.154      unsigned long physmap_pfn;
  40.155 -    unsigned long *physmap, *physmap_e;
  40.156 +    xen_pfn_t *physmap, *physmap_e;
  40.157  
  40.158      struct load_funcs load_funcs;
  40.159      struct domain_setup_info dsi;
  40.160 @@ -673,7 +690,8 @@ static int setup_guest(int xc_handle,
  40.161  
  40.162      for ( i = 0; i < XENFEAT_NR_SUBMAPS; i++ )
  40.163      {
  40.164 -        if ( (supported_features[i]&required_features[i]) != required_features[i] )
  40.165 +        if ( (supported_features[i] & required_features[i]) !=
  40.166 +             required_features[i] )
  40.167          {
  40.168              ERROR("Guest kernel does not support a required feature.");
  40.169              goto error_out;
  40.170 @@ -719,7 +737,7 @@ static int setup_guest(int xc_handle,
  40.171      (((((_h) + ((1UL<<(_s))-1)) & ~((1UL<<(_s))-1)) - \
  40.172      ((_l) & ~((1UL<<(_s))-1))) >> (_s))
  40.173  #if defined(__i386__)
  40.174 -        if ( dsi.pae_kernel )
  40.175 +        if ( dsi.pae_kernel != PAEKERN_no )
  40.176          {
  40.177              if ( (1 + /* # L3 */
  40.178                    NR(dsi.v_start, v_end, L3_PAGETABLE_SHIFT_PAE) + /* # L2 */
  40.179 @@ -797,11 +815,11 @@ static int setup_guest(int xc_handle,
  40.180  
  40.181      /* setup page tables */
  40.182  #if defined(__i386__)
  40.183 -    if (dsi.pae_kernel)
  40.184 +    if (dsi.pae_kernel != PAEKERN_no)
  40.185          rc = setup_pg_tables_pae(xc_handle, dom, ctxt,
  40.186                                   dsi.v_start, v_end,
  40.187                                   page_array, vpt_start, vpt_end,
  40.188 -                                 shadow_mode_enabled);
  40.189 +                                 shadow_mode_enabled, dsi.pae_kernel);
  40.190      else
  40.191          rc = setup_pg_tables(xc_handle, dom, ctxt,
  40.192                               dsi.v_start, v_end,
  40.193 @@ -824,16 +842,16 @@ static int setup_guest(int xc_handle,
  40.194       */
  40.195      if ( !shadow_mode_enabled )
  40.196      {
  40.197 -        if ( dsi.pae_kernel )
  40.198 +        if ( dsi.pae_kernel != PAEKERN_no )
  40.199          {
  40.200              if ( pin_table(xc_handle, MMUEXT_PIN_L3_TABLE,
  40.201 -                           ctxt->ctrlreg[3] >> PAGE_SHIFT, dom) )
  40.202 +                           xen_cr3_to_pfn(ctxt->ctrlreg[3]), dom) )
  40.203                  goto error_out;
  40.204          }
  40.205          else
  40.206          {
  40.207              if ( pin_table(xc_handle, MMUEXT_PIN_L2_TABLE,
  40.208 -                           ctxt->ctrlreg[3] >> PAGE_SHIFT, dom) )
  40.209 +                           xen_cr3_to_pfn(ctxt->ctrlreg[3]), dom) )
  40.210                  goto error_out;
  40.211          }
  40.212      }
  40.213 @@ -845,7 +863,7 @@ static int setup_guest(int xc_handle,
  40.214       * correct protection for the page
  40.215       */
  40.216      if ( pin_table(xc_handle, MMUEXT_PIN_L4_TABLE,
  40.217 -                   ctxt->ctrlreg[3] >> PAGE_SHIFT, dom) )
  40.218 +                   xen_cr3_to_pfn(ctxt->ctrlreg[3]), dom) )
  40.219          goto error_out;
  40.220  #endif
  40.221  
  40.222 @@ -865,8 +883,8 @@ static int setup_guest(int xc_handle,
  40.223              ((uint64_t)page_array[count] << PAGE_SHIFT) | MMU_MACHPHYS_UPDATE,
  40.224              count) )
  40.225          {
  40.226 -            fprintf(stderr,"m2p update failure p=%lx m=%lx\n",
  40.227 -                    count, page_array[count]);
  40.228 +            fprintf(stderr,"m2p update failure p=%lx m=%"PRIx64"\n",
  40.229 +                    count, (uint64_t)page_array[count]);
  40.230              munmap(physmap, PAGE_SIZE);
  40.231              goto error_out;
  40.232          }
  40.233 @@ -958,7 +976,7 @@ static int setup_guest(int xc_handle,
  40.234      rc = xc_version(xc_handle, XENVER_version, NULL);
  40.235      sprintf(start_info->magic, "xen-%i.%i-x86_%d%s",
  40.236              rc >> 16, rc & (0xFFFF), (unsigned int)sizeof(long)*8,
  40.237 -            dsi.pae_kernel ? "p" : "");
  40.238 +            (dsi.pae_kernel != PAEKERN_no) ? "p" : "");
  40.239      start_info->nr_pages     = nr_pages;
  40.240      start_info->shared_info  = guest_shared_info_mfn << PAGE_SHIFT;
  40.241      start_info->flags        = flags;
    41.1 --- a/tools/libxc/xc_linux_restore.c	Wed Jun 07 11:03:15 2006 +0100
    41.2 +++ b/tools/libxc/xc_linux_restore.c	Wed Jun 07 11:03:51 2006 +0100
    41.3 @@ -25,10 +25,10 @@ static unsigned int pt_levels;
    41.4  static unsigned long max_pfn;
    41.5  
    41.6  /* Live mapping of the table mapping each PFN to its current MFN. */
    41.7 -static unsigned long *live_p2m = NULL;
    41.8 +static xen_pfn_t *live_p2m = NULL;
    41.9  
   41.10  /* A table mapping each PFN to its new MFN. */
   41.11 -static unsigned long *p2m = NULL;
   41.12 +static xen_pfn_t *p2m = NULL;
   41.13  
   41.14  
   41.15  static ssize_t
   41.16 @@ -108,7 +108,7 @@ int xc_linux_restore(int xc_handle, int 
   41.17                       unsigned int console_evtchn, unsigned long *console_mfn)
   41.18  {
   41.19      DECLARE_DOM0_OP;
   41.20 -    int rc = 1, i, n;
   41.21 +    int rc = 1, i, n, pae_extended_cr3 = 0;
   41.22      unsigned long mfn, pfn;
   41.23      unsigned int prev_pc, this_pc;
   41.24      int verify = 0;
   41.25 @@ -126,7 +126,7 @@ int xc_linux_restore(int xc_handle, int 
   41.26      unsigned long *pfn_type = NULL;
   41.27  
   41.28      /* A table of MFNs to map in the current region */
   41.29 -    unsigned long *region_mfn = NULL;
   41.30 +    xen_pfn_t *region_mfn = NULL;
   41.31  
   41.32      /* Types of the pfns in the current region */
   41.33      unsigned long region_pfn_type[MAX_BATCH_SIZE];
   41.34 @@ -135,7 +135,7 @@ int xc_linux_restore(int xc_handle, int 
   41.35      unsigned long *page = NULL;
   41.36  
   41.37      /* A copy of the pfn-to-mfn table frame list. */
   41.38 -    unsigned long *p2m_frame_list = NULL;
   41.39 +    xen_pfn_t *p2m_frame_list = NULL;
   41.40  
   41.41      /* A temporary mapping of the guest's start_info page. */
   41.42      start_info_t *start_info;
   41.43 @@ -162,30 +162,88 @@ int xc_linux_restore(int xc_handle, int 
   41.44          return 1;
   41.45      }
   41.46  
   41.47 -
   41.48      if (mlock(&ctxt, sizeof(ctxt))) {
   41.49          /* needed for build dom0 op, but might as well do early */
   41.50          ERR("Unable to mlock ctxt");
   41.51          return 1;
   41.52      }
   41.53  
   41.54 -
   41.55 -    /* Read the saved P2M frame list */
   41.56 -    if(!(p2m_frame_list = malloc(P2M_FL_SIZE))) {
   41.57 +    if (!(p2m_frame_list = malloc(P2M_FL_SIZE))) {
   41.58          ERR("Couldn't allocate p2m_frame_list array");
   41.59          goto out;
   41.60      }
   41.61  
   41.62 -    if (!read_exact(io_fd, p2m_frame_list, P2M_FL_SIZE)) {
   41.63 +    /* Read first entry of P2M list, or extended-info signature (~0UL). */
   41.64 +    if (!read_exact(io_fd, p2m_frame_list, sizeof(long))) {
   41.65 +        ERR("read extended-info signature failed");
   41.66 +        goto out;
   41.67 +    }
   41.68 +
   41.69 +    if (p2m_frame_list[0] == ~0UL) {
   41.70 +        uint32_t tot_bytes;
   41.71 +
   41.72 +        /* Next 4 bytes: total size of following extended info. */
   41.73 +        if (!read_exact(io_fd, &tot_bytes, sizeof(tot_bytes))) {
   41.74 +            ERR("read extended-info size failed");
   41.75 +            goto out;
   41.76 +        }
   41.77 +
   41.78 +        while (tot_bytes) {
   41.79 +            uint32_t chunk_bytes;
   41.80 +            char     chunk_sig[4];
   41.81 +
   41.82 +            /* 4-character chunk signature + 4-byte remaining chunk size. */
   41.83 +            if (!read_exact(io_fd, chunk_sig, sizeof(chunk_sig)) ||
   41.84 +                !read_exact(io_fd, &chunk_bytes, sizeof(chunk_bytes))) {
   41.85 +                ERR("read extended-info chunk signature failed");
   41.86 +                goto out;
   41.87 +            }
   41.88 +            tot_bytes -= 8;
   41.89 +
   41.90 +            /* VCPU context structure? */
   41.91 +            if (!strncmp(chunk_sig, "vcpu", 4)) {
   41.92 +                if (!read_exact(io_fd, &ctxt, sizeof(ctxt))) {
   41.93 +                    ERR("read extended-info vcpu context failed");
   41.94 +                    goto out;
   41.95 +                }
   41.96 +                tot_bytes   -= sizeof(struct vcpu_guest_context);
   41.97 +                chunk_bytes -= sizeof(struct vcpu_guest_context);
   41.98 +
   41.99 +                if (ctxt.vm_assist & (1UL << VMASST_TYPE_pae_extended_cr3))
  41.100 +                    pae_extended_cr3 = 1;
  41.101 +            }
  41.102 +
  41.103 +            /* Any remaining bytes of this chunk: read and discard. */
  41.104 +            while (chunk_bytes) {
  41.105 +                unsigned long sz = chunk_bytes;
  41.106 +                if ( sz > P2M_FL_SIZE )
  41.107 +                    sz = P2M_FL_SIZE;
  41.108 +                if (!read_exact(io_fd, p2m_frame_list, sz)) {
  41.109 +                    ERR("read-and-discard extended-info chunk bytes failed");
  41.110 +                    goto out;
  41.111 +                }
  41.112 +                chunk_bytes -= sz;
  41.113 +                tot_bytes   -= sz;
  41.114 +            }
  41.115 +        }
  41.116 +
  41.117 +        /* Now read the real first entry of P2M list. */
  41.118 +        if (!read_exact(io_fd, p2m_frame_list, sizeof(long))) {
  41.119 +            ERR("read first entry of p2m_frame_list failed");
  41.120 +            goto out;
  41.121 +        }
  41.122 +    }
  41.123 +
  41.124 +    /* First entry is already read into the p2m array. */
  41.125 +    if (!read_exact(io_fd, &p2m_frame_list[1], P2M_FL_SIZE - sizeof(long))) {
  41.126          ERR("read p2m_frame_list failed");
  41.127          goto out;
  41.128      }
  41.129  
  41.130 -
  41.131      /* We want zeroed memory so use calloc rather than malloc. */
  41.132 -    p2m        = calloc(max_pfn, sizeof(unsigned long));
  41.133 +    p2m        = calloc(max_pfn, sizeof(xen_pfn_t));
  41.134      pfn_type   = calloc(max_pfn, sizeof(unsigned long));
  41.135 -    region_mfn = calloc(MAX_BATCH_SIZE, sizeof(unsigned long));
  41.136 +    region_mfn = calloc(MAX_BATCH_SIZE, sizeof(xen_pfn_t));
  41.137  
  41.138      if ((p2m == NULL) || (pfn_type == NULL) || (region_mfn == NULL)) {
  41.139          ERR("memory alloc failed");
  41.140 @@ -193,7 +251,7 @@ int xc_linux_restore(int xc_handle, int 
  41.141          goto out;
  41.142      }
  41.143  
  41.144 -    if (mlock(region_mfn, sizeof(unsigned long) * MAX_BATCH_SIZE)) {
  41.145 +    if (mlock(region_mfn, sizeof(xen_pfn_t) * MAX_BATCH_SIZE)) {
  41.146          ERR("Could not mlock region_mfn");
  41.147          goto out;
  41.148      }
  41.149 @@ -331,17 +389,27 @@ int xc_linux_restore(int xc_handle, int 
  41.150                  ** A page table page - need to 'uncanonicalize' it, i.e.
  41.151                  ** replace all the references to pfns with the corresponding
  41.152                  ** mfns for the new domain.
  41.153 +                **
  41.154 +                ** On PAE we need to ensure that PGDs are in MFNs < 4G, and
  41.155 +                ** so we may need to update the p2m after the main loop.
  41.156 +                ** Hence we defer canonicalization of L1s until then.
  41.157                  */
  41.158 -                if(!uncanonicalize_pagetable(pagetype, page)) {
  41.159 -                    /*
  41.160 -                    ** Failing to uncanonicalize a page table can be ok
  41.161 -                    ** under live migration since the pages type may have
  41.162 -                    ** changed by now (and we'll get an update later).
  41.163 -                    */
  41.164 -                    DPRINTF("PT L%ld race on pfn=%08lx mfn=%08lx\n",
  41.165 -                            pagetype >> 28, pfn, mfn);
  41.166 -                    nraces++;
  41.167 -                    continue;
  41.168 +                if ((pt_levels != 3) ||
  41.169 +                    pae_extended_cr3 ||
  41.170 +                    (pagetype != L1TAB)) {
  41.171 +
  41.172 +                    if (!uncanonicalize_pagetable(pagetype, page)) {
  41.173 +                        /*
  41.174 +                        ** Failing to uncanonicalize a page table can be ok
  41.175 +                        ** under live migration since the pages type may have
  41.176 +                        ** changed by now (and we'll get an update later).
  41.177 +                        */
  41.178 +                        DPRINTF("PT L%ld race on pfn=%08lx mfn=%08lx\n",
  41.179 +                                pagetype >> 28, pfn, mfn);
  41.180 +                        nraces++;
  41.181 +                        continue;
  41.182 +                    }
  41.183 +
  41.184                  }
  41.185  
  41.186              } else if(pagetype != NOTAB) {
  41.187 @@ -390,6 +458,100 @@ int xc_linux_restore(int xc_handle, int 
  41.188  
  41.189      DPRINTF("Received all pages (%d races)\n", nraces);
  41.190  
  41.191 +    if ((pt_levels == 3) && !pae_extended_cr3) {
  41.192 +
  41.193 +        /*
  41.194 +        ** XXX SMH on PAE we need to ensure PGDs are in MFNs < 4G. This
  41.195 +        ** is a little awkward and involves (a) finding all such PGDs and
  41.196 +        ** replacing them with 'lowmem' versions; (b) upating the p2m[]
  41.197 +        ** with the new info; and (c) canonicalizing all the L1s using the
  41.198 +        ** (potentially updated) p2m[].
  41.199 +        **
  41.200 +        ** This is relatively slow (and currently involves two passes through
  41.201 +        ** the pfn_type[] array), but at least seems to be correct. May wish
  41.202 +        ** to consider more complex approaches to optimize this later.
  41.203 +        */
  41.204 +
  41.205 +        int j, k;
  41.206 +
  41.207 +        /* First pass: find all L3TABs current in > 4G mfns and get new mfns */
  41.208 +        for (i = 0; i < max_pfn; i++) {
  41.209 +
  41.210 +            if (((pfn_type[i] & LTABTYPE_MASK)==L3TAB) && (p2m[i]>0xfffffUL)) {
  41.211 +
  41.212 +                unsigned long new_mfn;
  41.213 +                uint64_t l3ptes[4];
  41.214 +                uint64_t *l3tab;
  41.215 +
  41.216 +                l3tab = (uint64_t *)
  41.217 +                    xc_map_foreign_range(xc_handle, dom, PAGE_SIZE,
  41.218 +                                         PROT_READ, p2m[i]);
  41.219 +
  41.220 +                for(j = 0; j < 4; j++)
  41.221 +                    l3ptes[j] = l3tab[j];
  41.222 +
  41.223 +                munmap(l3tab, PAGE_SIZE);
  41.224 +
  41.225 +                if (!(new_mfn=xc_make_page_below_4G(xc_handle, dom, p2m[i]))) {
  41.226 +                    ERR("Couldn't get a page below 4GB :-(");
  41.227 +                    goto out;
  41.228 +                }
  41.229 +
  41.230 +                p2m[i] = new_mfn;
  41.231 +                if (xc_add_mmu_update(xc_handle, mmu,
  41.232 +                                      (((unsigned long long)new_mfn)
  41.233 +                                       << PAGE_SHIFT) |
  41.234 +                                      MMU_MACHPHYS_UPDATE, i)) {
  41.235 +                    ERR("Couldn't m2p on PAE root pgdir");
  41.236 +                    goto out;
  41.237 +                }
  41.238 +
  41.239 +                l3tab = (uint64_t *)
  41.240 +                    xc_map_foreign_range(xc_handle, dom, PAGE_SIZE,
  41.241 +                                         PROT_READ | PROT_WRITE, p2m[i]);
  41.242 +
  41.243 +                for(j = 0; j < 4; j++)
  41.244 +                    l3tab[j] = l3ptes[j];
  41.245 +
  41.246 +                munmap(l3tab, PAGE_SIZE);
  41.247 +
  41.248 +            }
  41.249 +        }
  41.250 +
  41.251 +        /* Second pass: find all L1TABs and uncanonicalize them */
  41.252 +        j = 0;
  41.253 +
  41.254 +        for(i = 0; i < max_pfn; i++) {
  41.255 +
  41.256 +            if (((pfn_type[i] & LTABTYPE_MASK)==L1TAB)) {
  41.257 +                region_mfn[j] = p2m[i];
  41.258 +                j++;
  41.259 +            }
  41.260 +
  41.261 +            if(i == (max_pfn-1) || j == MAX_BATCH_SIZE) {
  41.262 +
  41.263 +                if (!(region_base = xc_map_foreign_batch(
  41.264 +                          xc_handle, dom, PROT_READ | PROT_WRITE,
  41.265 +                          region_mfn, j))) {
  41.266 +                    ERR("map batch failed");
  41.267 +                    goto out;
  41.268 +                }
  41.269 +
  41.270 +                for(k = 0; k < j; k++) {
  41.271 +                    if(!uncanonicalize_pagetable(L1TAB,
  41.272 +                                                 region_base + k*PAGE_SIZE)) {
  41.273 +                        ERR("failed uncanonicalize pt!");
  41.274 +                        goto out;
  41.275 +                    }
  41.276 +                }
  41.277 +
  41.278 +                munmap(region_base, j*PAGE_SIZE);
  41.279 +                j = 0;
  41.280 +            }
  41.281 +        }
  41.282 +
  41.283 +    }
  41.284 +
  41.285  
  41.286      if (xc_finish_mmu_updates(xc_handle, mmu)) {
  41.287          ERR("Error doing finish_mmu_updates()");
  41.288 @@ -536,7 +698,7 @@ int xc_linux_restore(int xc_handle, int 
  41.289      }
  41.290  
  41.291      /* Uncanonicalise the page table base pointer. */
  41.292 -    pfn = ctxt.ctrlreg[3] >> PAGE_SHIFT;
  41.293 +    pfn = xen_cr3_to_pfn(ctxt.ctrlreg[3]);
  41.294  
  41.295      if (pfn >= max_pfn) {
  41.296          ERR("PT base is bad: pfn=%lu max_pfn=%lu type=%08lx",
  41.297 @@ -552,7 +714,7 @@ int xc_linux_restore(int xc_handle, int 
  41.298          goto out;
  41.299      }
  41.300  
  41.301 -    ctxt.ctrlreg[3] = p2m[pfn] << PAGE_SHIFT;
  41.302 +    ctxt.ctrlreg[3] = xen_pfn_to_cr3(p2m[pfn]);
  41.303  
  41.304      /* clear any pending events and the selector */
  41.305      memset(&(shared_info->evtchn_pending[0]), 0,
    42.1 --- a/tools/libxc/xc_linux_save.c	Wed Jun 07 11:03:15 2006 +0100
    42.2 +++ b/tools/libxc/xc_linux_save.c	Wed Jun 07 11:03:51 2006 +0100
    42.3 @@ -40,10 +40,10 @@ static unsigned int pt_levels;
    42.4  static unsigned long max_pfn;
    42.5  
    42.6  /* Live mapping of the table mapping each PFN to its current MFN. */
    42.7 -static unsigned long *live_p2m = NULL;
    42.8 +static xen_pfn_t *live_p2m = NULL;
    42.9  
   42.10  /* Live mapping of system MFN to PFN table. */
   42.11 -static unsigned long *live_m2p = NULL;
   42.12 +static xen_pfn_t *live_m2p = NULL;
   42.13  
   42.14  /* grep fodder: machine_to_phys */
   42.15  
   42.16 @@ -501,22 +501,22 @@ void canonicalize_pagetable(unsigned lon
   42.17  
   42.18  
   42.19  
   42.20 -static unsigned long *xc_map_m2p(int xc_handle,
   42.21 +static xen_pfn_t *xc_map_m2p(int xc_handle,
   42.22                                   unsigned long max_mfn,
   42.23                                   int prot)
   42.24  {
   42.25      struct xen_machphys_mfn_list xmml;
   42.26      privcmd_mmap_entry_t *entries;
   42.27      unsigned long m2p_chunks, m2p_size;
   42.28 -    unsigned long *m2p;
   42.29 -    unsigned long *extent_start;
   42.30 +    xen_pfn_t *m2p;
   42.31 +    xen_pfn_t *extent_start;
   42.32      int i, rc;
   42.33  
   42.34      m2p_size   = M2P_SIZE(max_mfn);
   42.35      m2p_chunks = M2P_CHUNKS(max_mfn);
   42.36  
   42.37      xmml.max_extents = m2p_chunks;
   42.38 -    if (!(extent_start = malloc(m2p_chunks * sizeof(unsigned long)))) {
   42.39 +    if (!(extent_start = malloc(m2p_chunks * sizeof(xen_pfn_t)))) {
   42.40          ERR("failed to allocate space for m2p mfns");
   42.41          return NULL;
   42.42      }
   42.43 @@ -583,11 +583,11 @@ int xc_linux_save(int xc_handle, int io_
   42.44      char page[PAGE_SIZE];
   42.45  
   42.46      /* Double and single indirect references to the live P2M table */
   42.47 -    unsigned long *live_p2m_frame_list_list = NULL;
   42.48 -    unsigned long *live_p2m_frame_list = NULL;
   42.49 +    xen_pfn_t *live_p2m_frame_list_list = NULL;
   42.50 +    xen_pfn_t *live_p2m_frame_list = NULL;
   42.51  
   42.52      /* A copy of the pfn-to-mfn table frame list. */
   42.53 -    unsigned long *p2m_frame_list = NULL;
   42.54 +    xen_pfn_t *p2m_frame_list = NULL;
   42.55  
   42.56      /* Live mapping of shared info structure */
   42.57      shared_info_t *live_shinfo = NULL;
   42.58 @@ -712,11 +712,11 @@ int xc_linux_save(int xc_handle, int io_
   42.59      memcpy(p2m_frame_list, live_p2m_frame_list, P2M_FL_SIZE);
   42.60  
   42.61      /* Canonicalise the pfn-to-mfn table frame-number list. */
   42.62 -    for (i = 0; i < max_pfn; i += ulpp) {
   42.63 -        if (!translate_mfn_to_pfn(&p2m_frame_list[i/ulpp])) {
   42.64 +    for (i = 0; i < max_pfn; i += fpp) {
   42.65 +        if (!translate_mfn_to_pfn(&p2m_frame_list[i/fpp])) {
   42.66              ERR("Frame# in pfn-to-mfn frame list is not in pseudophys");
   42.67 -            ERR("entry %d: p2m_frame_list[%ld] is 0x%lx", i, i/ulpp,
   42.68 -                p2m_frame_list[i/ulpp]);
   42.69 +            ERR("entry %d: p2m_frame_list[%ld] is 0x%"PRIx64, i, i/fpp,
   42.70 +                (uint64_t)p2m_frame_list[i/fpp]);
   42.71              goto out;
   42.72          }
   42.73      }
   42.74 @@ -818,12 +818,33 @@ int xc_linux_save(int xc_handle, int io_
   42.75  
   42.76      /* Start writing out the saved-domain record. */
   42.77  
   42.78 -    if(!write_exact(io_fd, &max_pfn, sizeof(unsigned long))) {
   42.79 +    if (!write_exact(io_fd, &max_pfn, sizeof(unsigned long))) {
   42.80          ERR("write: max_pfn");
   42.81          goto out;
   42.82      }
   42.83  
   42.84 -    if(!write_exact(io_fd, p2m_frame_list, P2M_FL_SIZE)) {
   42.85 +    /*
   42.86 +     * Write an extended-info structure to inform the restore code that
   42.87 +     * a PAE guest understands extended CR3 (PDPTs above 4GB). Turns off
   42.88 +     * slow paths in the restore code.
   42.89 +     */
   42.90 +    if ((pt_levels == 3) &&
   42.91 +        (ctxt.vm_assist & (1UL << VMASST_TYPE_pae_extended_cr3))) {
   42.92 +        unsigned long signature = ~0UL;
   42.93 +        uint32_t tot_sz   = sizeof(struct vcpu_guest_context) + 8;
   42.94 +        uint32_t chunk_sz = sizeof(struct vcpu_guest_context);
   42.95 +        char chunk_sig[]  = "vcpu";
   42.96 +        if (!write_exact(io_fd, &signature, sizeof(signature)) ||
   42.97 +            !write_exact(io_fd, &tot_sz,    sizeof(tot_sz)) ||
   42.98 +            !write_exact(io_fd, &chunk_sig, 4) ||
   42.99 +            !write_exact(io_fd, &chunk_sz,  sizeof(chunk_sz)) ||
  42.100 +            !write_exact(io_fd, &ctxt,      sizeof(ctxt))) {
  42.101 +            ERR("write: extended info");
  42.102 +            goto out;
  42.103 +        }
  42.104 +    }
  42.105 +
  42.106 +    if (!write_exact(io_fd, p2m_frame_list, P2M_FL_SIZE)) {
  42.107          ERR("write: p2m_frame_list");
  42.108          goto out;
  42.109      }
  42.110 @@ -1129,12 +1150,12 @@ int xc_linux_save(int xc_handle, int io_
  42.111      }
  42.112  
  42.113      /* Canonicalise the page table base pointer. */
  42.114 -    if ( !MFN_IS_IN_PSEUDOPHYS_MAP(ctxt.ctrlreg[3] >> PAGE_SHIFT) ) {
  42.115 +    if ( !MFN_IS_IN_PSEUDOPHYS_MAP(xen_cr3_to_pfn(ctxt.ctrlreg[3])) ) {
  42.116          ERR("PT base is not in range of pseudophys map");
  42.117          goto out;
  42.118      }
  42.119 -    ctxt.ctrlreg[3] = mfn_to_pfn(ctxt.ctrlreg[3] >> PAGE_SHIFT) <<
  42.120 -        PAGE_SHIFT;
  42.121 +    ctxt.ctrlreg[3] = 
  42.122 +        xen_pfn_to_cr3(mfn_to_pfn(xen_cr3_to_pfn(ctxt.ctrlreg[3])));
  42.123  
  42.124      if (!write_exact(io_fd, &ctxt, sizeof(ctxt)) ||
  42.125          !write_exact(io_fd, live_shinfo, PAGE_SIZE)) {
    43.1 --- a/tools/libxc/xc_load_aout9.c	Wed Jun 07 11:03:15 2006 +0100
    43.2 +++ b/tools/libxc/xc_load_aout9.c	Wed Jun 07 11:03:51 2006 +0100
    43.3 @@ -17,7 +17,7 @@
    43.4  #define KOFFSET(_p)       ((_p)&~KZERO)
    43.5  
    43.6  static int parseaout9image(const char *, unsigned long, struct domain_setup_info *);
    43.7 -static int loadaout9image(const char *, unsigned long, int, uint32_t, unsigned long *, struct domain_setup_info *);
    43.8 +static int loadaout9image(const char *, unsigned long, int, uint32_t, xen_pfn_t *, struct domain_setup_info *);
    43.9  static void copyout(int, uint32_t, unsigned long *, unsigned long, const char *, int);
   43.10  struct Exec *get_header(const char *, unsigned long, struct Exec *);
   43.11  
   43.12 @@ -79,7 +79,7 @@ loadaout9image(
   43.13      const char *image,
   43.14      unsigned long image_size,
   43.15      int xch, uint32_t dom,
   43.16 -    unsigned long *parray,
   43.17 +    xen_pfn_t *parray,
   43.18      struct domain_setup_info *dsi)
   43.19  {
   43.20      struct Exec ehdr;
    44.1 --- a/tools/libxc/xc_load_bin.c	Wed Jun 07 11:03:15 2006 +0100
    44.2 +++ b/tools/libxc/xc_load_bin.c	Wed Jun 07 11:03:51 2006 +0100
    44.3 @@ -107,7 +107,7 @@ parsebinimage(
    44.4  static int
    44.5  loadbinimage(
    44.6      const char *image, unsigned long image_size, int xch, uint32_t dom,
    44.7 -    unsigned long *parray, struct domain_setup_info *dsi);
    44.8 +    xen_pfn_t *parray, struct domain_setup_info *dsi);
    44.9  
   44.10  int probe_bin(const char *image,
   44.11                unsigned long image_size,
   44.12 @@ -235,7 +235,7 @@ static int parsebinimage(const char *ima
   44.13  static int
   44.14  loadbinimage(
   44.15      const char *image, unsigned long image_size, int xch, uint32_t dom,
   44.16 -    unsigned long *parray, struct domain_setup_info *dsi)
   44.17 +    xen_pfn_t *parray, struct domain_setup_info *dsi)
   44.18  {
   44.19      unsigned long size;
   44.20      char         *va;
    45.1 --- a/tools/libxc/xc_load_elf.c	Wed Jun 07 11:03:15 2006 +0100
    45.2 +++ b/tools/libxc/xc_load_elf.c	Wed Jun 07 11:03:51 2006 +0100
    45.3 @@ -16,10 +16,10 @@ parseelfimage(
    45.4  static int
    45.5  loadelfimage(
    45.6      const char *image, unsigned long image_size, int xch, uint32_t dom,
    45.7 -    unsigned long *parray, struct domain_setup_info *dsi);
    45.8 +    xen_pfn_t *parray, struct domain_setup_info *dsi);
    45.9  static int
   45.10  loadelfsymtab(
   45.11 -    const char *image, int xch, uint32_t dom, unsigned long *parray,
   45.12 +    const char *image, int xch, uint32_t dom, xen_pfn_t *parray,
   45.13      struct domain_setup_info *dsi);
   45.14  
   45.15  int probe_elf(const char *image,
   45.16 @@ -122,8 +122,15 @@ static int parseelfimage(const char *ima
   45.17              ERROR("Actually saw: '%s'", guestinfo);
   45.18              return -EINVAL;
   45.19          }
   45.20 -        if ( (strstr(guestinfo, "PAE=yes") != NULL) )
   45.21 -            dsi->pae_kernel = 1;
   45.22 +
   45.23 +        dsi->pae_kernel = PAEKERN_no;
   45.24 +        p = strstr(guestinfo, "PAE=yes");
   45.25 +        if ( p != NULL )
   45.26 +        {
   45.27 +            dsi->pae_kernel = PAEKERN_yes;
   45.28 +            if ( !strncmp(p+7, "[extended-cr3]", 14) )
   45.29 +                dsi->pae_kernel = PAEKERN_extended_cr3;
   45.30 +        }
   45.31  
   45.32          break;
   45.33      }
   45.34 @@ -204,7 +211,7 @@ static int parseelfimage(const char *ima
   45.35  static int
   45.36  loadelfimage(
   45.37      const char *image, unsigned long elfsize, int xch, uint32_t dom,
   45.38 -    unsigned long *parray, struct domain_setup_info *dsi)
   45.39 +    xen_pfn_t *parray, struct domain_setup_info *dsi)
   45.40  {
   45.41      Elf_Ehdr *ehdr = (Elf_Ehdr *)image;
   45.42      Elf_Phdr *phdr;
   45.43 @@ -258,7 +265,7 @@ loadelfimage(
   45.44  
   45.45  static int
   45.46  loadelfsymtab(
   45.47 -    const char *image, int xch, uint32_t dom, unsigned long *parray,
   45.48 +    const char *image, int xch, uint32_t dom, xen_pfn_t *parray,
   45.49      struct domain_setup_info *dsi)
   45.50  {
   45.51      Elf_Ehdr *ehdr = (Elf_Ehdr *)image, *sym_ehdr;
    46.1 --- a/tools/libxc/xc_pagetab.c	Wed Jun 07 11:03:15 2006 +0100
    46.2 +++ b/tools/libxc/xc_pagetab.c	Wed Jun 07 11:03:51 2006 +0100
    46.3 @@ -78,7 +78,7 @@ unsigned long xc_translate_foreign_addre
    46.4          fprintf(stderr, "failed to retreive vcpu context\n");
    46.5          goto out;
    46.6      }
    46.7 -    cr3 = ctx.ctrlreg[3];
    46.8 +    cr3 = ((unsigned long long)xen_cr3_to_pfn(ctx.ctrlreg[3])) << PAGE_SHIFT;
    46.9  
   46.10      /* Page Map Level 4 */
   46.11  
    47.1 --- a/tools/libxc/xc_private.c	Wed Jun 07 11:03:15 2006 +0100
    47.2 +++ b/tools/libxc/xc_private.c	Wed Jun 07 11:03:51 2006 +0100
    47.3 @@ -4,6 +4,7 @@
    47.4   * Helper functions for the rest of the library.
    47.5   */
    47.6  
    47.7 +#include <inttypes.h>
    47.8  #include "xc_private.h"
    47.9  
   47.10  /* NB: arr must be mlock'ed */
   47.11 @@ -134,9 +135,9 @@ int xc_memory_op(int xc_handle,
   47.12      struct xen_memory_reservation *reservation = arg;
   47.13      struct xen_machphys_mfn_list *xmml = arg;
   47.14      struct xen_translate_gpfn_list *trans = arg;
   47.15 -    unsigned long *extent_start;
   47.16 -    unsigned long *gpfn_list;
   47.17 -    unsigned long *mfn_list;
   47.18 +    xen_pfn_t *extent_start;
   47.19 +    xen_pfn_t *gpfn_list;
   47.20 +    xen_pfn_t *mfn_list;
   47.21      long ret = -EINVAL;
   47.22  
   47.23      hypercall.op     = __HYPERVISOR_memory_op;
   47.24 @@ -156,7 +157,7 @@ int xc_memory_op(int xc_handle,
   47.25          get_xen_guest_handle(extent_start, reservation->extent_start);
   47.26          if ( (extent_start != NULL) &&
   47.27               (mlock(extent_start,
   47.28 -                    reservation->nr_extents * sizeof(unsigned long)) != 0) )
   47.29 +                    reservation->nr_extents * sizeof(xen_pfn_t)) != 0) )
   47.30          {
   47.31              PERROR("Could not mlock");
   47.32              safe_munlock(reservation, sizeof(*reservation));
   47.33 @@ -171,7 +172,7 @@ int xc_memory_op(int xc_handle,
   47.34          }
   47.35          get_xen_guest_handle(extent_start, xmml->extent_start);
   47.36          if ( mlock(extent_start,
   47.37 -                   xmml->max_extents * sizeof(unsigned long)) != 0 )
   47.38 +                   xmml->max_extents * sizeof(xen_pfn_t)) != 0 )
   47.39          {
   47.40              PERROR("Could not mlock");
   47.41              safe_munlock(xmml, sizeof(*xmml));
   47.42 @@ -192,17 +193,17 @@ int xc_memory_op(int xc_handle,
   47.43              goto out1;
   47.44          }
   47.45          get_xen_guest_handle(gpfn_list, trans->gpfn_list);
   47.46 -        if ( mlock(gpfn_list, trans->nr_gpfns * sizeof(long)) != 0 )
   47.47 +        if ( mlock(gpfn_list, trans->nr_gpfns * sizeof(xen_pfn_t)) != 0 )
   47.48          {
   47.49              PERROR("Could not mlock");
   47.50              safe_munlock(trans, sizeof(*trans));
   47.51              goto out1;
   47.52          }
   47.53          get_xen_guest_handle(mfn_list, trans->mfn_list);
   47.54 -        if ( mlock(mfn_list, trans->nr_gpfns * sizeof(long)) != 0 )
   47.55 +        if ( mlock(mfn_list, trans->nr_gpfns * sizeof(xen_pfn_t)) != 0 )
   47.56          {
   47.57              PERROR("Could not mlock");
   47.58 -            safe_munlock(gpfn_list, trans->nr_gpfns * sizeof(long));
   47.59 +            safe_munlock(gpfn_list, trans->nr_gpfns * sizeof(xen_pfn_t));
   47.60              safe_munlock(trans, sizeof(*trans));
   47.61              goto out1;
   47.62          }
   47.63 @@ -220,22 +221,22 @@ int xc_memory_op(int xc_handle,
   47.64          get_xen_guest_handle(extent_start, reservation->extent_start);
   47.65          if ( extent_start != NULL )
   47.66              safe_munlock(extent_start,
   47.67 -                         reservation->nr_extents * sizeof(unsigned long));
   47.68 +                         reservation->nr_extents * sizeof(xen_pfn_t));
   47.69          break;
   47.70      case XENMEM_machphys_mfn_list:
   47.71          safe_munlock(xmml, sizeof(*xmml));
   47.72          get_xen_guest_handle(extent_start, xmml->extent_start);
   47.73          safe_munlock(extent_start,
   47.74 -                     xmml->max_extents * sizeof(unsigned long));
   47.75 +                     xmml->max_extents * sizeof(xen_pfn_t));
   47.76          break;
   47.77      case XENMEM_add_to_physmap:
   47.78          safe_munlock(arg, sizeof(struct xen_add_to_physmap));
   47.79          break;
   47.80      case XENMEM_translate_gpfn_list:
   47.81              get_xen_guest_handle(mfn_list, trans->mfn_list);
   47.82 -            safe_munlock(mfn_list, trans->nr_gpfns * sizeof(long));
   47.83 +            safe_munlock(mfn_list, trans->nr_gpfns * sizeof(xen_pfn_t));
   47.84              get_xen_guest_handle(gpfn_list, trans->gpfn_list);
   47.85 -            safe_munlock(gpfn_list, trans->nr_gpfns * sizeof(long));
   47.86 +            safe_munlock(gpfn_list, trans->nr_gpfns * sizeof(xen_pfn_t));
   47.87              safe_munlock(trans, sizeof(*trans));
   47.88          break;
   47.89      }
   47.90 @@ -263,7 +264,7 @@ long long xc_domain_get_cpu_usage( int x
   47.91  
   47.92  int xc_get_pfn_list(int xc_handle,
   47.93                      uint32_t domid,
   47.94 -                    unsigned long *pfn_buf,
   47.95 +                    xen_pfn_t *pfn_buf,
   47.96                      unsigned long max_pfns)
   47.97  {
   47.98      DECLARE_DOM0_OP;
   47.99 @@ -274,10 +275,10 @@ int xc_get_pfn_list(int xc_handle,
  47.100      set_xen_guest_handle(op.u.getmemlist.buffer, pfn_buf);
  47.101  
  47.102  #ifdef VALGRIND
  47.103 -    memset(pfn_buf, 0, max_pfns * sizeof(unsigned long));
  47.104 +    memset(pfn_buf, 0, max_pfns * sizeof(xen_pfn_t));
  47.105  #endif
  47.106  
  47.107 -    if ( mlock(pfn_buf, max_pfns * sizeof(unsigned long)) != 0 )
  47.108 +    if ( mlock(pfn_buf, max_pfns * sizeof(xen_pfn_t)) != 0 )
  47.109      {
  47.110          PERROR("xc_get_pfn_list: pfn_buf mlock failed");
  47.111          return -1;
  47.112 @@ -285,7 +286,7 @@ int xc_get_pfn_list(int xc_handle,
  47.113  
  47.114      ret = do_dom0_op(xc_handle, &op);
  47.115  
  47.116 -    safe_munlock(pfn_buf, max_pfns * sizeof(unsigned long));
  47.117 +    safe_munlock(pfn_buf, max_pfns * sizeof(xen_pfn_t));
  47.118  
  47.119  #if 0
  47.120  #ifdef DEBUG
  47.121 @@ -364,7 +365,7 @@ unsigned long xc_get_filesz(int fd)
  47.122  }
  47.123  
  47.124  void xc_map_memcpy(unsigned long dst, const char *src, unsigned long size,
  47.125 -                   int xch, uint32_t dom, unsigned long *parray,
  47.126 +                   int xch, uint32_t dom, xen_pfn_t *parray,
  47.127                     unsigned long vstart)
  47.128  {
  47.129      char *va;
  47.130 @@ -430,6 +431,29 @@ int xc_version(int xc_handle, int cmd, v
  47.131      return rc;
  47.132  }
  47.133  
  47.134 +unsigned long xc_make_page_below_4G(
  47.135 +    int xc_handle, uint32_t domid, unsigned long mfn)
  47.136 +{
  47.137 +    xen_pfn_t old_mfn = mfn;
  47.138 +    xen_pfn_t new_mfn;
  47.139 +
  47.140 +    if ( xc_domain_memory_decrease_reservation(
  47.141 +        xc_handle, domid, 1, 0, &old_mfn) != 0 )
  47.142 +    {
  47.143 +        fprintf(stderr,"xc_make_page_below_4G decrease failed. mfn=%lx\n",mfn);
  47.144 +        return 0;
  47.145 +    }
  47.146 +
  47.147 +    if ( xc_domain_memory_increase_reservation(
  47.148 +        xc_handle, domid, 1, 0, 32, &new_mfn) != 0 )
  47.149 +    {
  47.150 +        fprintf(stderr,"xc_make_page_below_4G increase failed. mfn=%lx\n",mfn);
  47.151 +        return 0;
  47.152 +    }
  47.153 +
  47.154 +    return new_mfn;
  47.155 +}
  47.156 +
  47.157  /*
  47.158   * Local variables:
  47.159   * mode: C
    48.1 --- a/tools/libxc/xc_ptrace.c	Wed Jun 07 11:03:15 2006 +0100
    48.2 +++ b/tools/libxc/xc_ptrace.c	Wed Jun 07 11:03:51 2006 +0100
    48.3 @@ -190,7 +190,8 @@ map_domain_va_32(
    48.4      static void *v[MAX_VIRT_CPUS];
    48.5  
    48.6      l2 = xc_map_foreign_range(
    48.7 -         xc_handle, current_domid, PAGE_SIZE, PROT_READ, ctxt[cpu].ctrlreg[3] >> PAGE_SHIFT);
    48.8 +         xc_handle, current_domid, PAGE_SIZE, PROT_READ,
    48.9 +         xen_cr3_to_pfn(ctxt[cpu].ctrlreg[3]));
   48.10      if ( l2 == NULL )
   48.11          return NULL;
   48.12  
   48.13 @@ -230,7 +231,8 @@ map_domain_va_pae(
   48.14      static void *v[MAX_VIRT_CPUS];
   48.15  
   48.16      l3 = xc_map_foreign_range(
   48.17 -        xc_handle, current_domid, PAGE_SIZE, PROT_READ, ctxt[cpu].ctrlreg[3] >> PAGE_SHIFT);
   48.18 +        xc_handle, current_domid, PAGE_SIZE, PROT_READ,
   48.19 +        xen_cr3_to_pfn(ctxt[cpu].ctrlreg[3]));
   48.20      if ( l3 == NULL )
   48.21          return NULL;
   48.22  
   48.23 @@ -282,8 +284,9 @@ map_domain_va_64(
   48.24      if ((ctxt[cpu].ctrlreg[4] & 0x20) == 0 ) /* legacy ia32 mode */
   48.25          return map_domain_va_32(xc_handle, cpu, guest_va, perm);
   48.26  
   48.27 -    l4 = xc_map_foreign_range( xc_handle, current_domid, PAGE_SIZE,
   48.28 -            PROT_READ, ctxt[cpu].ctrlreg[3] >> PAGE_SHIFT);
   48.29 +    l4 = xc_map_foreign_range(
   48.30 +        xc_handle, current_domid, PAGE_SIZE, PROT_READ,
   48.31 +        xen_cr3_to_pfn(ctxt[cpu].ctrlreg[3]));
   48.32      if ( l4 == NULL )
   48.33          return NULL;
   48.34  
    49.1 --- a/tools/libxc/xc_ptrace_core.c	Wed Jun 07 11:03:15 2006 +0100
    49.2 +++ b/tools/libxc/xc_ptrace_core.c	Wed Jun 07 11:03:51 2006 +0100
    49.3 @@ -12,8 +12,8 @@
    49.4  static long   nr_pages = 0;
    49.5  static unsigned long  *p2m_array = NULL;
    49.6  static unsigned long  *m2p_array = NULL;
    49.7 -static unsigned long            pages_offset;
    49.8 -static unsigned long            cr3[MAX_VIRT_CPUS];
    49.9 +static unsigned long   pages_offset;
   49.10 +static unsigned long   cr3[MAX_VIRT_CPUS];
   49.11  
   49.12  /* --------------------- */
   49.13  
   49.14 @@ -47,7 +47,7 @@ map_domain_va_core(unsigned long domfd, 
   49.15              munmap(cr3_virt[cpu], PAGE_SIZE);
   49.16          v = mmap(
   49.17              NULL, PAGE_SIZE, PROT_READ, MAP_PRIVATE, domfd,
   49.18 -            map_mtop_offset(cr3_phys[cpu]));
   49.19 +            map_mtop_offset(xen_cr3_to_pfn(cr3_phys[cpu])));
   49.20          if (v == MAP_FAILED)
   49.21          {
   49.22              perror("mmap failed");
   49.23 @@ -127,14 +127,15 @@ xc_waitdomain_core(
   49.24              sizeof(vcpu_guest_context_t)*nr_vcpus)
   49.25              return -1;
   49.26  
   49.27 -        for (i = 0; i < nr_vcpus; i++) {
   49.28 +        for (i = 0; i < nr_vcpus; i++)
   49.29              cr3[i] = ctxt[i].ctrlreg[3];
   49.30 -        }
   49.31 +
   49.32          if ((p2m_array = malloc(nr_pages * sizeof(unsigned long))) == NULL)
   49.33          {
   49.34              printf("Could not allocate p2m_array\n");
   49.35              return -1;
   49.36          }
   49.37 +
   49.38          if (read(domfd, p2m_array, sizeof(unsigned long)*nr_pages) !=
   49.39              sizeof(unsigned long)*nr_pages)
   49.40              return -1;
   49.41 @@ -146,10 +147,8 @@ xc_waitdomain_core(
   49.42          }
   49.43          bzero(m2p_array, sizeof(unsigned long)* 1 << 20);
   49.44  
   49.45 -        for (i = 0; i < nr_pages; i++) {
   49.46 +        for (i = 0; i < nr_pages; i++)
   49.47              m2p_array[p2m_array[i]] = i;
   49.48 -        }
   49.49 -
   49.50      }
   49.51      return 0;
   49.52  }
    50.1 --- a/tools/libxc/xenctrl.h	Wed Jun 07 11:03:15 2006 +0100
    50.2 +++ b/tools/libxc/xenctrl.h	Wed Jun 07 11:03:51 2006 +0100
    50.3 @@ -415,26 +415,26 @@ int xc_domain_memory_increase_reservatio
    50.4                                            unsigned long nr_extents,
    50.5                                            unsigned int extent_order,
    50.6                                            unsigned int address_bits,
    50.7 -                                          unsigned long *extent_start);
    50.8 +                                          xen_pfn_t *extent_start);
    50.9  
   50.10  int xc_domain_memory_decrease_reservation(int xc_handle,
   50.11                                            uint32_t domid,
   50.12                                            unsigned long nr_extents,
   50.13                                            unsigned int extent_order,
   50.14 -                                          unsigned long *extent_start);
   50.15 +                                          xen_pfn_t *extent_start);
   50.16  
   50.17  int xc_domain_memory_populate_physmap(int xc_handle,
   50.18                                        uint32_t domid,
   50.19                                        unsigned long nr_extents,
   50.20                                        unsigned int extent_order,
   50.21                                        unsigned int address_bits,
   50.22 -                                      unsigned long *extent_start);
   50.23 +                                      xen_pfn_t *extent_start);
   50.24  
   50.25  int xc_domain_translate_gpfn_list(int xc_handle,
   50.26                                    uint32_t domid,
   50.27                                    unsigned long nr_gpfns,
   50.28 -                                  unsigned long *gpfn_list,
   50.29 -                                  unsigned long *mfn_list);
   50.30 +                                  xen_pfn_t *gpfn_list,
   50.31 +                                  xen_pfn_t *mfn_list);
   50.32  
   50.33  int xc_domain_ioport_permission(int xc_handle,
   50.34                                  uint32_t domid,
   50.35 @@ -453,6 +453,9 @@ int xc_domain_iomem_permission(int xc_ha
   50.36                                 unsigned long nr_mfns,
   50.37                                 uint8_t allow_access);
   50.38  
   50.39 +unsigned long xc_make_page_below_4G(int xc_handle, uint32_t domid,
   50.40 +                                    unsigned long mfn);
   50.41 +
   50.42  typedef dom0_perfc_desc_t xc_perfc_desc_t;
   50.43  /* IMPORTANT: The caller is responsible for mlock()'ing the @desc array. */
   50.44  int xc_perfc_control(int xc_handle,
   50.45 @@ -484,7 +487,7 @@ void *xc_map_foreign_range(int xc_handle
   50.46                              unsigned long mfn );
   50.47  
   50.48  void *xc_map_foreign_batch(int xc_handle, uint32_t dom, int prot,
   50.49 -                           unsigned long *arr, int num );
   50.50 +                           xen_pfn_t *arr, int num );
   50.51  
   50.52  /**
   50.53   * Translates a virtual address in the context of a given domain and
   50.54 @@ -499,11 +502,11 @@ void *xc_map_foreign_batch(int xc_handle
   50.55  unsigned long xc_translate_foreign_address(int xc_handle, uint32_t dom,
   50.56                                             int vcpu, unsigned long long virt);
   50.57  
   50.58 -int xc_get_pfn_list(int xc_handle, uint32_t domid, unsigned long *pfn_buf,
   50.59 +int xc_get_pfn_list(int xc_handle, uint32_t domid, xen_pfn_t *pfn_buf,
   50.60                      unsigned long max_pfns);
   50.61  
   50.62  int xc_ia64_get_pfn_list(int xc_handle, uint32_t domid,
   50.63 -                         unsigned long *pfn_buf,
   50.64 +                         xen_pfn_t *pfn_buf,
   50.65                           unsigned int start_page, unsigned int nr_pages);
   50.66  
   50.67  int xc_copy_to_domain_page(int xc_handle, uint32_t domid,
    51.1 --- a/tools/libxc/xg_private.h	Wed Jun 07 11:03:15 2006 +0100
    51.2 +++ b/tools/libxc/xg_private.h	Wed Jun 07 11:03:51 2006 +0100
    51.3 @@ -156,6 +156,9 @@ struct domain_setup_info
    51.4  
    51.5      unsigned long elf_paddr_offset;
    51.6  
    51.7 +#define PAEKERN_no           0
    51.8 +#define PAEKERN_yes          1
    51.9 +#define PAEKERN_extended_cr3 2
   51.10      unsigned int  pae_kernel;
   51.11  
   51.12      unsigned int  load_symtab;
   51.13 @@ -170,7 +173,7 @@ typedef int (*parseimagefunc)(const char
   51.14                                struct domain_setup_info *dsi);
   51.15  typedef int (*loadimagefunc)(const char *image, unsigned long image_size,
   51.16                               int xch,
   51.17 -                             uint32_t dom, unsigned long *parray,
   51.18 +                             uint32_t dom, xen_pfn_t *parray,
   51.19                               struct domain_setup_info *dsi);
   51.20  
   51.21  struct load_funcs
   51.22 @@ -198,7 +201,7 @@ int xc_copy_to_domain_page(int xc_handle
   51.23  unsigned long xc_get_filesz(int fd);
   51.24  
   51.25  void xc_map_memcpy(unsigned long dst, const char *src, unsigned long size,
   51.26 -                   int xch, uint32_t dom, unsigned long *parray,
   51.27 +                   int xch, uint32_t dom, xen_pfn_t *parray,
   51.28                     unsigned long vstart);
   51.29  
   51.30  int pin_table(int xc_handle, unsigned int type, unsigned long mfn,
    52.1 --- a/tools/libxc/xg_save_restore.h	Wed Jun 07 11:03:15 2006 +0100
    52.2 +++ b/tools/libxc/xg_save_restore.h	Wed Jun 07 11:03:51 2006 +0100
    52.3 @@ -105,23 +105,23 @@ static int get_platform_info(int xc_hand
    52.4  */
    52.5  #define M2P_SHIFT       L2_PAGETABLE_SHIFT_PAE
    52.6  #define M2P_CHUNK_SIZE  (1 << M2P_SHIFT)
    52.7 -#define M2P_SIZE(_m)    ROUNDUP(((_m) * sizeof(unsigned long)), M2P_SHIFT)
    52.8 +#define M2P_SIZE(_m)    ROUNDUP(((_m) * sizeof(xen_pfn_t)), M2P_SHIFT)
    52.9  #define M2P_CHUNKS(_m)  (M2P_SIZE((_m)) >> M2P_SHIFT)
   52.10  
   52.11  /* Size in bytes of the P2M (rounded up to the nearest PAGE_SIZE bytes) */
   52.12 -#define P2M_SIZE        ROUNDUP((max_pfn * sizeof(unsigned long)), PAGE_SHIFT)
   52.13 +#define P2M_SIZE        ROUNDUP((max_pfn * sizeof(xen_pfn_t)), PAGE_SHIFT)
   52.14  
   52.15 -/* Number of unsigned longs in a page */
   52.16 -#define ulpp            (PAGE_SIZE/sizeof(unsigned long))
   52.17 +/* Number of xen_pfn_t in a page */
   52.18 +#define fpp             (PAGE_SIZE/sizeof(xen_pfn_t))
   52.19  
   52.20  /* Number of entries in the pfn_to_mfn_frame_list */
   52.21 -#define P2M_FL_ENTRIES  (((max_pfn)+ulpp-1)/ulpp)
   52.22 +#define P2M_FL_ENTRIES  (((max_pfn)+fpp-1)/fpp)
   52.23  
   52.24  /* Size in bytes of the pfn_to_mfn_frame_list     */
   52.25  #define P2M_FL_SIZE     ((P2M_FL_ENTRIES)*sizeof(unsigned long))
   52.26  
   52.27  /* Number of entries in the pfn_to_mfn_frame_list_list */
   52.28 -#define P2M_FLL_ENTRIES (((max_pfn)+(ulpp*ulpp)-1)/(ulpp*ulpp))
   52.29 +#define P2M_FLL_ENTRIES (((max_pfn)+(fpp*fpp)-1)/(fpp*fpp))
   52.30  
   52.31  /* Current guests allow 8MB 'slack' in their P2M */
   52.32  #define NR_SLACK_ENTRIES   ((8 * 1024 * 1024) / PAGE_SIZE)
    53.1 --- a/tools/python/xen/util/security.py	Wed Jun 07 11:03:15 2006 +0100
    53.2 +++ b/tools/python/xen/util/security.py	Wed Jun 07 11:03:51 2006 +0100
    53.3 @@ -426,6 +426,15 @@ def get_decision(arg1, arg2):
    53.4              err("Argument type not supported.")
    53.5          ssidref = label2ssidref(arg2[2][1], arg2[1][1])
    53.6          arg2 = ['ssidref', str(ssidref)]
    53.7 +
    53.8 +    # accept only int or string types for domid and ssidref
    53.9 +    if isinstance(arg1[1], int):
   53.10 +        arg1[1] = str(arg1[1])
   53.11 +    if isinstance(arg2[1], int):
   53.12 +        arg2[1] = str(arg2[1])
   53.13 +    if not isinstance(arg1[1], str) or not isinstance(arg2[1], str):
   53.14 +        err("Invalid id or ssidref type, string or int required")
   53.15 +
   53.16      try:
   53.17          decision = acm.getdecision(arg1[0], arg1[1], arg2[0], arg2[1])
   53.18      except:
    54.1 --- a/tools/tests/test_x86_emulator.c	Wed Jun 07 11:03:15 2006 +0100
    54.2 +++ b/tools/tests/test_x86_emulator.c	Wed Jun 07 11:03:51 2006 +0100
    54.3 @@ -13,6 +13,7 @@ typedef int32_t            s32;
    54.4  typedef int64_t            s64;
    54.5  #include <public/xen.h>
    54.6  #include <asm-x86/x86_emulate.h>
    54.7 +#include <sys/mman.h>
    54.8  
    54.9  static int read_any(
   54.10      unsigned long addr,
   54.11 @@ -85,23 +86,30 @@ int main(int argc, char **argv)
   54.12      struct x86_emulate_ctxt ctxt;
   54.13      struct cpu_user_regs regs;
   54.14      char instr[20] = { 0x01, 0x08 }; /* add %ecx,(%eax) */
   54.15 -    unsigned int res = 0x7FFFFFFF;
   54.16 -    u32 cmpxchg8b_res[2] = { 0x12345678, 0x87654321 };
   54.17 +    unsigned int *res;
   54.18      int rc;
   54.19  
   54.20      ctxt.regs = &regs;
   54.21      ctxt.mode = X86EMUL_MODE_PROT32;
   54.22  
   54.23 +    res = mmap((void *)0x100000, 0x1000, PROT_READ|PROT_WRITE,
   54.24 +               MAP_FIXED|MAP_PRIVATE|MAP_ANONYMOUS, 0, 0);
   54.25 +    if ( res == MAP_FAILED )
   54.26 +    {
   54.27 +        fprintf(stderr, "mmap to low address failed\n");
   54.28 +        exit(1);
   54.29 +    }
   54.30 +
   54.31      printf("%-40s", "Testing addl %%ecx,(%%eax)...");
   54.32      instr[0] = 0x01; instr[1] = 0x08;
   54.33      regs.eflags = 0x200;
   54.34      regs.eip    = (unsigned long)&instr[0];
   54.35      regs.ecx    = 0x12345678;
   54.36 -    ctxt.cr2    = (unsigned long)&res;
   54.37 -    res         = 0x7FFFFFFF;
   54.38 +    ctxt.cr2    = (unsigned long)res;
   54.39 +    *res        = 0x7FFFFFFF;
   54.40      rc = x86_emulate_memop(&ctxt, &emulops);
   54.41      if ( (rc != 0) || 
   54.42 -         (res != 0x92345677) || 
   54.43 +         (*res != 0x92345677) || 
   54.44           (regs.eflags != 0xa94) ||
   54.45           (regs.eip != (unsigned long)&instr[2]) )
   54.46          goto fail;
   54.47 @@ -116,25 +124,39 @@ int main(int argc, char **argv)
   54.48  #else
   54.49      regs.ecx    = 0x12345678UL;
   54.50  #endif
   54.51 -    ctxt.cr2    = (unsigned long)&res;
   54.52 +    ctxt.cr2    = (unsigned long)res;
   54.53      rc = x86_emulate_memop(&ctxt, &emulops);
   54.54      if ( (rc != 0) || 
   54.55 -         (res != 0x92345677) || 
   54.56 +         (*res != 0x92345677) || 
   54.57           (regs.ecx != 0x8000000FUL) ||
   54.58           (regs.eip != (unsigned long)&instr[2]) )
   54.59          goto fail;
   54.60      printf("okay\n");
   54.61  
   54.62 +    printf("%-40s", "Testing movl (%%eax),%%ecx...");
   54.63 +    instr[0] = 0x8b; instr[1] = 0x08;
   54.64 +    regs.eflags = 0x200;
   54.65 +    regs.eip    = (unsigned long)&instr[0];
   54.66 +    regs.ecx    = ~0UL;
   54.67 +    ctxt.cr2    = (unsigned long)res;
   54.68 +    rc = x86_emulate_memop(&ctxt, &emulops);
   54.69 +    if ( (rc != 0) || 
   54.70 +         (*res != 0x92345677) || 
   54.71 +         (regs.ecx != 0x92345677UL) ||
   54.72 +         (regs.eip != (unsigned long)&instr[2]) )
   54.73 +        goto fail;
   54.74 +    printf("okay\n");
   54.75 +
   54.76      printf("%-40s", "Testing lock cmpxchgb %%cl,(%%eax)...");
   54.77      instr[0] = 0xf0; instr[1] = 0x0f; instr[2] = 0xb0; instr[3] = 0x08;
   54.78      regs.eflags = 0x200;
   54.79      regs.eip    = (unsigned long)&instr[0];
   54.80      regs.eax    = 0x92345677UL;
   54.81      regs.ecx    = 0xAA;
   54.82 -    ctxt.cr2    = (unsigned long)&res;
   54.83 +    ctxt.cr2    = (unsigned long)res;
   54.84      rc = x86_emulate_memop(&ctxt, &emulops);
   54.85      if ( (rc != 0) || 
   54.86 -         (res != 0x923456AA) || 
   54.87 +         (*res != 0x923456AA) || 
   54.88           (regs.eflags != 0x244) ||
   54.89           (regs.eax != 0x92345677UL) ||
   54.90           (regs.eip != (unsigned long)&instr[4]) )
   54.91 @@ -147,10 +169,10 @@ int main(int argc, char **argv)
   54.92      regs.eip    = (unsigned long)&instr[0];
   54.93      regs.eax    = 0xAABBCC77UL;
   54.94      regs.ecx    = 0xFF;
   54.95 -    ctxt.cr2    = (unsigned long)&res;
   54.96 +    ctxt.cr2    = (unsigned long)res;
   54.97      rc = x86_emulate_memop(&ctxt, &emulops);
   54.98      if ( (rc != 0) || 
   54.99 -         (res != 0x923456AA) || 
  54.100 +         (*res != 0x923456AA) || 
  54.101           ((regs.eflags&0x240) != 0x200) ||
  54.102           (regs.eax != 0xAABBCCAA) ||
  54.103           (regs.ecx != 0xFF) ||
  54.104 @@ -163,10 +185,10 @@ int main(int argc, char **argv)
  54.105      regs.eflags = 0x200;
  54.106      regs.eip    = (unsigned long)&instr[0];
  54.107      regs.ecx    = 0x12345678;
  54.108 -    ctxt.cr2    = (unsigned long)&res;
  54.109 +    ctxt.cr2    = (unsigned long)res;
  54.110      rc = x86_emulate_memop(&ctxt, &emulops);
  54.111      if ( (rc != 0) || 
  54.112 -         (res != 0x12345678) || 
  54.113 +         (*res != 0x12345678) || 
  54.114           (regs.eflags != 0x200) ||
  54.115           (regs.ecx != 0x923456AA) ||
  54.116           (regs.eip != (unsigned long)&instr[2]) )
  54.117 @@ -176,14 +198,14 @@ int main(int argc, char **argv)
  54.118      printf("%-40s", "Testing lock cmpxchgl %%ecx,(%%eax)...");
  54.119      instr[0] = 0xf0; instr[1] = 0x0f; instr[2] = 0xb1; instr[3] = 0x08;
  54.120      regs.eflags = 0x200;
  54.121 -    res         = 0x923456AA;
  54.122 +    *res        = 0x923456AA;
  54.123      regs.eip    = (unsigned long)&instr[0];
  54.124      regs.eax    = 0x923456AAUL;
  54.125      regs.ecx    = 0xDDEEFF00L;
  54.126 -    ctxt.cr2    = (unsigned long)&res;
  54.127 +    ctxt.cr2    = (unsigned long)res;
  54.128      rc = x86_emulate_memop(&ctxt, &emulops);
  54.129      if ( (rc != 0) || 
  54.130 -         (res != 0xDDEEFF00) || 
  54.131 +         (*res != 0xDDEEFF00) || 
  54.132           (regs.eflags != 0x244) ||
  54.133           (regs.eax != 0x923456AAUL) ||
  54.134           (regs.eip != (unsigned long)&instr[4]) )
  54.135 @@ -192,54 +214,57 @@ int main(int argc, char **argv)
  54.136  
  54.137      printf("%-40s", "Testing rep movsw...");
  54.138      instr[0] = 0xf3; instr[1] = 0x66; instr[2] = 0xa5;
  54.139 -    res         = 0x22334455;
  54.140 +    *res        = 0x22334455;
  54.141      regs.eflags = 0x200;
  54.142      regs.ecx    = 23;
  54.143      regs.eip    = (unsigned long)&instr[0];
  54.144 -    regs.esi    = (unsigned long)&res + 0;
  54.145 -    regs.edi    = (unsigned long)&res + 2;
  54.146 +    regs.esi    = (unsigned long)res + 0;
  54.147 +    regs.edi    = (unsigned long)res + 2;
  54.148      regs.error_code = 0; /* read fault */
  54.149      ctxt.cr2    = regs.esi;
  54.150      rc = x86_emulate_memop(&ctxt, &emulops);
  54.151      if ( (rc != 0) || 
  54.152 -         (res != 0x44554455) ||
  54.153 +         (*res != 0x44554455) ||
  54.154           (regs.eflags != 0x200) ||
  54.155           (regs.ecx != 22) || 
  54.156 -         (regs.esi != ((unsigned long)&res + 2)) ||
  54.157 -         (regs.edi != ((unsigned long)&res + 4)) ||
  54.158 +         (regs.esi != ((unsigned long)res + 2)) ||
  54.159 +         (regs.edi != ((unsigned long)res + 4)) ||
  54.160           (regs.eip != (unsigned long)&instr[0]) )
  54.161          goto fail;
  54.162      printf("okay\n");
  54.163  
  54.164      printf("%-40s", "Testing btrl $0x1,(%edi)...");
  54.165      instr[0] = 0x0f; instr[1] = 0xba; instr[2] = 0x37; instr[3] = 0x01;
  54.166 -    res         = 0x2233445F;
  54.167 +    *res        = 0x2233445F;
  54.168      regs.eflags = 0x200;
  54.169      regs.eip    = (unsigned long)&instr[0];
  54.170 -    regs.edi    = (unsigned long)&res;
  54.171 +    regs.edi    = (unsigned long)res;
  54.172      ctxt.cr2    = regs.edi;
  54.173      rc = x86_emulate_memop(&ctxt, &emulops);
  54.174      if ( (rc != 0) || 
  54.175 -         (res != 0x2233445D) ||
  54.176 +         (*res != 0x2233445D) ||
  54.177           ((regs.eflags&0x201) != 0x201) ||
  54.178           (regs.eip != (unsigned long)&instr[4]) )
  54.179          goto fail;
  54.180      printf("okay\n");
  54.181  
  54.182 +    res[0] = 0x12345678;
  54.183 +    res[1] = 0x87654321;
  54.184 +
  54.185      printf("%-40s", "Testing cmpxchg8b (%edi) [succeeding]...");
  54.186      instr[0] = 0x0f; instr[1] = 0xc7; instr[2] = 0x0f;
  54.187      regs.eflags = 0x200;
  54.188 -    regs.eax    = cmpxchg8b_res[0];
  54.189 -    regs.edx    = cmpxchg8b_res[1];
  54.190 +    regs.eax    = res[0];
  54.191 +    regs.edx    = res[1];
  54.192      regs.ebx    = 0x9999AAAA;
  54.193      regs.ecx    = 0xCCCCFFFF;
  54.194      regs.eip    = (unsigned long)&instr[0];
  54.195 -    regs.edi    = (unsigned long)cmpxchg8b_res;
  54.196 +    regs.edi    = (unsigned long)res;
  54.197      ctxt.cr2    = regs.edi;
  54.198      rc = x86_emulate_memop(&ctxt, &emulops);
  54.199      if ( (rc != 0) || 
  54.200 -         (cmpxchg8b_res[0] != 0x9999AAAA) ||
  54.201 -         (cmpxchg8b_res[1] != 0xCCCCFFFF) ||
  54.202 +         (res[0] != 0x9999AAAA) ||
  54.203 +         (res[1] != 0xCCCCFFFF) ||
  54.204           ((regs.eflags&0x240) != 0x240) ||
  54.205           (regs.eip != (unsigned long)&instr[3]) )
  54.206          goto fail;
  54.207 @@ -248,12 +273,12 @@ int main(int argc, char **argv)
  54.208      printf("%-40s", "Testing cmpxchg8b (%edi) [failing]...");
  54.209      instr[0] = 0x0f; instr[1] = 0xc7; instr[2] = 0x0f;
  54.210      regs.eip    = (unsigned long)&instr[0];
  54.211 -    regs.edi    = (unsigned long)cmpxchg8b_res;
  54.212 +    regs.edi    = (unsigned long)res;
  54.213      ctxt.cr2    = regs.edi;
  54.214      rc = x86_emulate_memop(&ctxt, &emulops);
  54.215      if ( (rc != 0) || 
  54.216 -         (cmpxchg8b_res[0] != 0x9999AAAA) ||
  54.217 -         (cmpxchg8b_res[1] != 0xCCCCFFFF) ||
  54.218 +         (res[0] != 0x9999AAAA) ||
  54.219 +         (res[1] != 0xCCCCFFFF) ||
  54.220           (regs.eax != 0x9999AAAA) ||
  54.221           (regs.edx != 0xCCCCFFFF) ||
  54.222           ((regs.eflags&0x240) != 0x200) ||
  54.223 @@ -265,11 +290,11 @@ int main(int argc, char **argv)
  54.224      instr[0] = 0x0f; instr[1] = 0xbe; instr[2] = 0x08;
  54.225      regs.eip    = (unsigned long)&instr[0];
  54.226      regs.ecx    = 0x12345678;
  54.227 -    ctxt.cr2    = (unsigned long)&res;
  54.228 -    res         = 0x82;
  54.229 +    ctxt.cr2    = (unsigned long)res;
  54.230 +    *res        = 0x82;
  54.231      rc = x86_emulate_memop(&ctxt, &emulops);
  54.232      if ( (rc != 0) ||
  54.233 -         (res != 0x82) ||
  54.234 +         (*res != 0x82) ||
  54.235           (regs.ecx != 0xFFFFFF82) ||
  54.236           ((regs.eflags&0x240) != 0x200) ||
  54.237           (regs.eip != (unsigned long)&instr[3]) )
  54.238 @@ -280,11 +305,11 @@ int main(int argc, char **argv)
  54.239      instr[0] = 0x0f; instr[1] = 0xb7; instr[2] = 0x08;
  54.240      regs.eip    = (unsigned long)&instr[0];
  54.241      regs.ecx    = 0x12345678;
  54.242 -    ctxt.cr2    = (unsigned long)&res;
  54.243 -    res         = 0x1234aa82;
  54.244 +    ctxt.cr2    = (unsigned long)res;
  54.245 +    *res        = 0x1234aa82;
  54.246      rc = x86_emulate_memop(&ctxt, &emulops);
  54.247      if ( (rc != 0) ||
  54.248 -         (res != 0x1234aa82) ||
  54.249 +         (*res != 0x1234aa82) ||
  54.250           (regs.ecx != 0xaa82) ||
  54.251           ((regs.eflags&0x240) != 0x200) ||
  54.252           (regs.eip != (unsigned long)&instr[3]) )
    55.1 --- a/xen/arch/ia64/linux-xen/smpboot.c	Wed Jun 07 11:03:15 2006 +0100
    55.2 +++ b/xen/arch/ia64/linux-xen/smpboot.c	Wed Jun 07 11:03:51 2006 +0100
    55.3 @@ -62,6 +62,7 @@
    55.4  #include <asm/unistd.h>
    55.5  
    55.6  #ifdef XEN
    55.7 +#include <xen/domain.h>
    55.8  #include <asm/hw_irq.h>
    55.9  int ht_per_core = 1;
   55.10  #ifndef CONFIG_SMP
   55.11 @@ -487,7 +488,7 @@ do_rest:
   55.12  #else
   55.13  	struct vcpu *v;
   55.14  
   55.15 -	v = idle_vcpu[cpu] = alloc_vcpu(idle_vcpu[0]->domain, cpu, cpu);
   55.16 +	v = alloc_idle_vcpu(cpu);
   55.17  	BUG_ON(v == NULL);
   55.18  
   55.19  	//printf ("do_boot_cpu: cpu=%d, domain=%p, vcpu=%p\n", cpu, idle, v);
    56.1 --- a/xen/arch/ia64/xen/domain.c	Wed Jun 07 11:03:15 2006 +0100
    56.2 +++ b/xen/arch/ia64/xen/domain.c	Wed Jun 07 11:03:51 2006 +0100
    56.3 @@ -42,6 +42,7 @@
    56.4  
    56.5  #include <asm/vcpu.h>   /* for function declarations */
    56.6  #include <public/arch-ia64.h>
    56.7 +#include <xen/domain.h>
    56.8  #include <asm/vmx.h>
    56.9  #include <asm/vmx_vcpu.h>
   56.10  #include <asm/vmx_vpd.h>
   56.11 @@ -92,26 +93,16 @@ alloc_dom_xen_and_dom_io(void)
   56.12       * Any Xen-heap pages that we will allow to be mapped will have
   56.13       * their domain field set to dom_xen.
   56.14       */
   56.15 -    dom_xen = alloc_domain();
   56.16 +    dom_xen = alloc_domain(DOMID_XEN);
   56.17      BUG_ON(dom_xen == NULL);
   56.18 -    spin_lock_init(&dom_xen->page_alloc_lock);
   56.19 -    INIT_LIST_HEAD(&dom_xen->page_list);
   56.20 -    INIT_LIST_HEAD(&dom_xen->xenpage_list);
   56.21 -    atomic_set(&dom_xen->refcnt, 1);
   56.22 -    dom_xen->domain_id = DOMID_XEN;
   56.23  
   56.24      /*
   56.25       * Initialise our DOMID_IO domain.
   56.26       * This domain owns I/O pages that are within the range of the page_info
   56.27       * array. Mappings occur at the priv of the caller.
   56.28       */
   56.29 -    dom_io = alloc_domain();
   56.30 +    dom_io = alloc_domain(DOMID_IO);
   56.31      BUG_ON(dom_io == NULL);
   56.32 -    spin_lock_init(&dom_io->page_alloc_lock);
   56.33 -    INIT_LIST_HEAD(&dom_io->page_list);
   56.34 -    INIT_LIST_HEAD(&dom_io->xenpage_list);
   56.35 -    atomic_set(&dom_io->refcnt, 1);
   56.36 -    dom_io->domain_id = DOMID_IO;
   56.37  }
   56.38  #endif
   56.39  
    57.1 --- a/xen/arch/ia64/xen/xensetup.c	Wed Jun 07 11:03:15 2006 +0100
    57.2 +++ b/xen/arch/ia64/xen/xensetup.c	Wed Jun 07 11:03:51 2006 +0100
    57.3 @@ -35,8 +35,6 @@ unsigned long xenheap_phys_end, total_pa
    57.4  char saved_command_line[COMMAND_LINE_SIZE];
    57.5  char dom0_command_line[COMMAND_LINE_SIZE];
    57.6  
    57.7 -struct vcpu *idle_vcpu[NR_CPUS];
    57.8 -
    57.9  cpumask_t cpu_present_map;
   57.10  
   57.11  extern unsigned long domain0_ready;
    58.1 --- a/xen/arch/x86/audit.c	Wed Jun 07 11:03:15 2006 +0100
    58.2 +++ b/xen/arch/x86/audit.c	Wed Jun 07 11:03:51 2006 +0100
    58.3 @@ -432,10 +432,10 @@ int audit_adjust_pgtables(struct domain 
    58.4  
    58.5          for_each_vcpu(d, v)
    58.6          {
    58.7 -            if ( pagetable_get_paddr(v->arch.guest_table) )
    58.8 +            if ( !pagetable_is_null(v->arch.guest_table) )
    58.9                  adjust(mfn_to_page(pagetable_get_pfn(v->arch.guest_table)),
   58.10                         !shadow_mode_refcounts(d));
   58.11 -            if ( pagetable_get_paddr(v->arch.shadow_table) )
   58.12 +            if ( !pagetable_is_null(v->arch.shadow_table) )
   58.13                  adjust(mfn_to_page(pagetable_get_pfn(v->arch.shadow_table)),
   58.14                         0);
   58.15              if ( v->arch.monitor_shadow_ref )
    59.1 --- a/xen/arch/x86/cpu/mtrr/main.c	Wed Jun 07 11:03:15 2006 +0100
    59.2 +++ b/xen/arch/x86/cpu/mtrr/main.c	Wed Jun 07 11:03:51 2006 +0100
    59.3 @@ -43,7 +43,7 @@
    59.4  #include "mtrr.h"
    59.5  
    59.6  /* No blocking mutexes in Xen. Spin instead. */
    59.7 -#define DECLARE_MUTEX(_m) spinlock_t _m = SPIN_LOCK_UNLOCKED
    59.8 +#define DECLARE_MUTEX(_m) DEFINE_SPINLOCK(_m)
    59.9  #define down(_m) spin_lock(_m)
   59.10  #define up(_m) spin_unlock(_m)
   59.11  #define lock_cpu_hotplug() ((void)0)
    60.1 --- a/xen/arch/x86/dom0_ops.c	Wed Jun 07 11:03:15 2006 +0100
    60.2 +++ b/xen/arch/x86/dom0_ops.c	Wed Jun 07 11:03:51 2006 +0100
    60.3 @@ -467,7 +467,7 @@ void arch_getdomaininfo_ctxt(
    60.4      if ( hvm_guest(v) )
    60.5          c->flags |= VGCF_HVM_GUEST;
    60.6  
    60.7 -    c->ctrlreg[3] = pagetable_get_paddr(v->arch.guest_table);
    60.8 +    c->ctrlreg[3] = xen_pfn_to_cr3(pagetable_get_pfn(v->arch.guest_table));
    60.9  
   60.10      c->vm_assist = v->domain->vm_assist;
   60.11  }
    61.1 --- a/xen/arch/x86/domain.c	Wed Jun 07 11:03:15 2006 +0100
    61.2 +++ b/xen/arch/x86/domain.c	Wed Jun 07 11:03:51 2006 +0100
    61.3 @@ -259,7 +259,7 @@ int arch_set_info_guest(
    61.4      struct vcpu *v, struct vcpu_guest_context *c)
    61.5  {
    61.6      struct domain *d = v->domain;
    61.7 -    unsigned long phys_basetab = INVALID_MFN;
    61.8 +    unsigned long cr3_pfn = INVALID_MFN;
    61.9      int i, rc;
   61.10  
   61.11      if ( !(c->flags & VGCF_HVM_GUEST) )
   61.12 @@ -322,12 +322,8 @@ int arch_set_info_guest(
   61.13  
   61.14      if ( !(c->flags & VGCF_HVM_GUEST) )
   61.15      {
   61.16 -        phys_basetab = c->ctrlreg[3];
   61.17 -        phys_basetab =
   61.18 -            (gmfn_to_mfn(d, phys_basetab >> PAGE_SHIFT) << PAGE_SHIFT) |
   61.19 -            (phys_basetab & ~PAGE_MASK);
   61.20 -
   61.21 -        v->arch.guest_table = mk_pagetable(phys_basetab);
   61.22 +        cr3_pfn = gmfn_to_mfn(d, xen_cr3_to_pfn(c->ctrlreg[3]));
   61.23 +        v->arch.guest_table = pagetable_from_pfn(cr3_pfn);
   61.24      }
   61.25  
   61.26      if ( (rc = (int)set_gdt(v, c->gdt_frames, c->gdt_ents)) != 0 )
   61.27 @@ -335,14 +331,14 @@ int arch_set_info_guest(
   61.28  
   61.29      if ( c->flags & VGCF_HVM_GUEST )
   61.30      {
   61.31 -        v->arch.guest_table = mk_pagetable(0);
   61.32 +        v->arch.guest_table = pagetable_null();
   61.33  
   61.34          if ( !hvm_initialize_guest_resources(v) )
   61.35              return -EINVAL;
   61.36      }
   61.37      else if ( shadow_mode_refcounts(d) )
   61.38      {
   61.39 -        if ( !get_page(mfn_to_page(phys_basetab>>PAGE_SHIFT), d) )
   61.40 +        if ( !get_page(mfn_to_page(cr3_pfn), d) )
   61.41          {
   61.42              destroy_gdt(v);
   61.43              return -EINVAL;
   61.44 @@ -350,7 +346,7 @@ int arch_set_info_guest(
   61.45      }
   61.46      else
   61.47      {
   61.48 -        if ( !get_page_and_type(mfn_to_page(phys_basetab>>PAGE_SHIFT), d,
   61.49 +        if ( !get_page_and_type(mfn_to_page(cr3_pfn), d,
   61.50                                  PGT_base_page_table) )
   61.51          {
   61.52              destroy_gdt(v);
   61.53 @@ -528,20 +524,29 @@ static void load_segments(struct vcpu *n
   61.54      if ( unlikely(!all_segs_okay) )
   61.55      {
   61.56          struct cpu_user_regs *regs = guest_cpu_user_regs();
   61.57 -        unsigned long   *rsp =
   61.58 +        unsigned long *rsp =
   61.59              (n->arch.flags & TF_kernel_mode) ?
   61.60              (unsigned long *)regs->rsp :
   61.61              (unsigned long *)nctxt->kernel_sp;
   61.62 +        unsigned long cs_and_mask, rflags;
   61.63  
   61.64          if ( !(n->arch.flags & TF_kernel_mode) )
   61.65              toggle_guest_mode(n);
   61.66          else
   61.67              regs->cs &= ~3;
   61.68  
   61.69 +        /* CS longword also contains full evtchn_upcall_mask. */
   61.70 +        cs_and_mask = (unsigned long)regs->cs |
   61.71 +            ((unsigned long)n->vcpu_info->evtchn_upcall_mask << 32);
   61.72 +
   61.73 +        /* Fold upcall mask into RFLAGS.IF. */
   61.74 +        rflags  = regs->rflags & ~X86_EFLAGS_IF;
   61.75 +        rflags |= !n->vcpu_info->evtchn_upcall_mask << 9;
   61.76 +
   61.77          if ( put_user(regs->ss,            rsp- 1) |
   61.78               put_user(regs->rsp,           rsp- 2) |
   61.79 -             put_user(regs->rflags,        rsp- 3) |
   61.80 -             put_user(regs->cs,            rsp- 4) |
   61.81 +             put_user(rflags,              rsp- 3) |
   61.82 +             put_user(cs_and_mask,         rsp- 4) |
   61.83               put_user(regs->rip,           rsp- 5) |
   61.84               put_user(nctxt->user_regs.gs, rsp- 6) |
   61.85               put_user(nctxt->user_regs.fs, rsp- 7) |
   61.86 @@ -554,6 +559,10 @@ static void load_segments(struct vcpu *n
   61.87              domain_crash(n->domain);
   61.88          }
   61.89  
   61.90 +        if ( test_bit(_VGCF_failsafe_disables_events,
   61.91 +                      &n->arch.guest_context.flags) )
   61.92 +            n->vcpu_info->evtchn_upcall_mask = 1;
   61.93 +
   61.94          regs->entry_vector  = TRAP_syscall;
   61.95          regs->rflags       &= 0xFFFCBEFFUL;
   61.96          regs->ss            = __GUEST_SS;
   61.97 @@ -935,7 +944,7 @@ void domain_relinquish_resources(struct 
   61.98                  put_page_type(mfn_to_page(pfn));
   61.99              put_page(mfn_to_page(pfn));
  61.100  
  61.101 -            v->arch.guest_table = mk_pagetable(0);
  61.102 +            v->arch.guest_table = pagetable_null();
  61.103          }
  61.104  
  61.105          if ( (pfn = pagetable_get_pfn(v->arch.guest_table_user)) != 0 )
  61.106 @@ -944,7 +953,7 @@ void domain_relinquish_resources(struct 
  61.107                  put_page_type(mfn_to_page(pfn));
  61.108              put_page(mfn_to_page(pfn));
  61.109  
  61.110 -            v->arch.guest_table_user = mk_pagetable(0);
  61.111 +            v->arch.guest_table_user = pagetable_null();
  61.112          }
  61.113      }
  61.114  
    62.1 --- a/xen/arch/x86/domain_build.c	Wed Jun 07 11:03:15 2006 +0100
    62.2 +++ b/xen/arch/x86/domain_build.c	Wed Jun 07 11:03:51 2006 +0100
    62.3 @@ -302,6 +302,9 @@ int construct_dom0(struct domain *d,
    62.4          return -EINVAL;
    62.5      }
    62.6  
    62.7 +    if ( xen_pae && !!strstr(dsi.xen_section_string, "PAE=yes[extended-cr3]") )
    62.8 +        set_bit(VMASST_TYPE_pae_extended_cr3, &d->vm_assist);
    62.9 +
   62.10      if ( (p = strstr(dsi.xen_section_string, "FEATURES=")) != NULL )
   62.11      {
   62.12          parse_features(
   62.13 @@ -443,13 +446,13 @@ int construct_dom0(struct domain *d,
   62.14          l2tab[(LINEAR_PT_VIRT_START >> L2_PAGETABLE_SHIFT)+i] =
   62.15              l2e_from_paddr((u32)l2tab + i*PAGE_SIZE, __PAGE_HYPERVISOR);
   62.16      }
   62.17 -    v->arch.guest_table = mk_pagetable((unsigned long)l3start);
   62.18 +    v->arch.guest_table = pagetable_from_paddr((unsigned long)l3start);
   62.19  #else
   62.20      l2start = l2tab = (l2_pgentry_t *)mpt_alloc; mpt_alloc += PAGE_SIZE;
   62.21      memcpy(l2tab, idle_pg_table, PAGE_SIZE);
   62.22      l2tab[LINEAR_PT_VIRT_START >> L2_PAGETABLE_SHIFT] =
   62.23          l2e_from_paddr((unsigned long)l2start, __PAGE_HYPERVISOR);
   62.24 -    v->arch.guest_table = mk_pagetable((unsigned long)l2start);
   62.25 +    v->arch.guest_table = pagetable_from_paddr((unsigned long)l2start);
   62.26  #endif
   62.27  
   62.28      for ( i = 0; i < PDPT_L2_ENTRIES; i++ )
   62.29 @@ -577,7 +580,7 @@ int construct_dom0(struct domain *d,
   62.30          l4e_from_paddr(__pa(l4start), __PAGE_HYPERVISOR);
   62.31      l4tab[l4_table_offset(PERDOMAIN_VIRT_START)] =
   62.32          l4e_from_paddr(__pa(d->arch.mm_perdomain_l3), __PAGE_HYPERVISOR);
   62.33 -    v->arch.guest_table = mk_pagetable(__pa(l4start));
   62.34 +    v->arch.guest_table = pagetable_from_paddr(__pa(l4start));
   62.35  
   62.36      l4tab += l4_table_offset(dsi.v_start);
   62.37      mfn = alloc_spfn;
    63.1 --- a/xen/arch/x86/hvm/svm/svm.c	Wed Jun 07 11:03:15 2006 +0100
    63.2 +++ b/xen/arch/x86/hvm/svm/svm.c	Wed Jun 07 11:03:51 2006 +0100
    63.3 @@ -84,28 +84,26 @@ struct svm_percore_globals svm_globals[N
    63.4  /*
    63.5   * Initializes the POOL of ASID used by the guests per core.
    63.6   */
    63.7 -void asidpool_init( int core )
    63.8 +void asidpool_init(int core)
    63.9  {
   63.10      int i;
   63.11 -    svm_globals[core].ASIDpool.asid_lock = SPIN_LOCK_UNLOCKED;
   63.12 -    spin_lock(&svm_globals[core].ASIDpool.asid_lock);
   63.13 +
   63.14 +    spin_lock_init(&svm_globals[core].ASIDpool.asid_lock);
   63.15 +
   63.16      /* Host ASID is always in use */
   63.17      svm_globals[core].ASIDpool.asid[INITIAL_ASID] = ASID_INUSE;
   63.18 -    for( i=1; i<ASID_MAX; i++ )
   63.19 -    {
   63.20 +    for ( i = 1; i < ASID_MAX; i++ )
   63.21         svm_globals[core].ASIDpool.asid[i] = ASID_AVAILABLE;
   63.22 -    }
   63.23 -    spin_unlock(&svm_globals[core].ASIDpool.asid_lock);
   63.24  }
   63.25  
   63.26  
   63.27  /* internal function to get the next available ASID */
   63.28 -static int asidpool_fetch_next( struct vmcb_struct *vmcb, int core )
   63.29 +static int asidpool_fetch_next(struct vmcb_struct *vmcb, int core)
   63.30  {
   63.31      int i;   
   63.32 -    for( i = 1; i < ASID_MAX; i++ )
   63.33 +    for ( i = 1; i < ASID_MAX; i++ )
   63.34      {
   63.35 -        if( svm_globals[core].ASIDpool.asid[i] == ASID_AVAILABLE )
   63.36 +        if ( svm_globals[core].ASIDpool.asid[i] == ASID_AVAILABLE )
   63.37          {
   63.38              vmcb->guest_asid = i;
   63.39              svm_globals[core].ASIDpool.asid[i] = ASID_INUSE;
   63.40 @@ -746,34 +744,34 @@ static void svm_ctxt_switch_to(struct vc
   63.41  
   63.42  void svm_final_setup_guest(struct vcpu *v)
   63.43  {
   63.44 +    struct domain *d = v->domain;
   63.45 +    struct vcpu *vc;
   63.46 +
   63.47      v->arch.schedule_tail    = arch_svm_do_launch;
   63.48      v->arch.ctxt_switch_from = svm_ctxt_switch_from;
   63.49      v->arch.ctxt_switch_to   = svm_ctxt_switch_to;
   63.50  
   63.51 -    if (v == v->domain->vcpu[0]) 
   63.52 -    {
   63.53 -	struct domain *d = v->domain;
   63.54 -	struct vcpu *vc;
   63.55 -
   63.56 -	/* Initialize monitor page table */
   63.57 -	for_each_vcpu(d, vc)
   63.58 -	    vc->arch.monitor_table = mk_pagetable(0);
   63.59 -
   63.60 -        /* 
   63.61 -         * Required to do this once per domain
   63.62 -         * TODO: add a seperate function to do these.
   63.63 -         */
   63.64 -        memset(&d->shared_info->evtchn_mask[0], 0xff, 
   63.65 -               sizeof(d->shared_info->evtchn_mask));       
   63.66 -
   63.67 -        /* 
   63.68 -         * Put the domain in shadow mode even though we're going to be using
   63.69 -         * the shared 1:1 page table initially. It shouldn't hurt 
   63.70 -         */
   63.71 -        shadow_mode_enable(d, 
   63.72 -                SHM_enable|SHM_refcounts|
   63.73 -		SHM_translate|SHM_external|SHM_wr_pt_pte);
   63.74 -    }
   63.75 +    if ( v != d->vcpu[0] )
   63.76 +        return;
   63.77 +
   63.78 +    /* Initialize monitor page table */
   63.79 +    for_each_vcpu( d, vc )
   63.80 +        vc->arch.monitor_table = pagetable_null();
   63.81 +
   63.82 +    /* 
   63.83 +     * Required to do this once per domain
   63.84 +     * TODO: add a seperate function to do these.
   63.85 +     */
   63.86 +    memset(&d->shared_info->evtchn_mask[0], 0xff, 
   63.87 +           sizeof(d->shared_info->evtchn_mask));       
   63.88 +
   63.89 +    /* 
   63.90 +     * Put the domain in shadow mode even though we're going to be using
   63.91 +     * the shared 1:1 page table initially. It shouldn't hurt 
   63.92 +     */
   63.93 +    shadow_mode_enable(d,
   63.94 +                       SHM_enable|SHM_refcounts|
   63.95 +                       SHM_translate|SHM_external|SHM_wr_pt_pte);
   63.96  }
   63.97  
   63.98  
   63.99 @@ -870,7 +868,7 @@ static int svm_do_page_fault(unsigned lo
  63.100      /* Use 1:1 page table to identify MMIO address space */
  63.101      if (mmio_space(gpa))
  63.102      {
  63.103 -	/* No support for APIC */
  63.104 +        /* No support for APIC */
  63.105          if (!hvm_apic_support(v->domain) && gpa >= 0xFEC00000)
  63.106          { 
  63.107              int inst_len;
  63.108 @@ -1570,7 +1568,7 @@ static int svm_set_cr0(unsigned long val
  63.109          }
  63.110  
  63.111          /* Now arch.guest_table points to machine physical. */
  63.112 -        v->arch.guest_table = mk_pagetable((u64)mfn << PAGE_SHIFT);
  63.113 +        v->arch.guest_table = pagetable_from_pfn(mfn);
  63.114          update_pagetables(v);
  63.115  
  63.116          HVM_DBG_LOG(DBG_LEVEL_VMMU, "New arch.guest_table = %lx", 
  63.117 @@ -1590,7 +1588,7 @@ static int svm_set_cr0(unsigned long val
  63.118          if ( v->arch.hvm_svm.cpu_cr3 ) {
  63.119              put_page(mfn_to_page(get_mfn_from_gpfn(
  63.120                        v->arch.hvm_svm.cpu_cr3 >> PAGE_SHIFT)));
  63.121 -            v->arch.guest_table = mk_pagetable(0);
  63.122 +            v->arch.guest_table = pagetable_null();
  63.123          }
  63.124  
  63.125      /*
  63.126 @@ -1599,7 +1597,7 @@ static int svm_set_cr0(unsigned long val
  63.127       * created.
  63.128       */
  63.129      if ((value & X86_CR0_PE) == 0) {
  63.130 -    	if (value & X86_CR0_PG) {
  63.131 +        if (value & X86_CR0_PG) {
  63.132              svm_inject_exception(v, TRAP_gp_fault, 1, 0);
  63.133              return 0;
  63.134          }
  63.135 @@ -1740,7 +1738,7 @@ static int mov_to_cr(int gpreg, int cr, 
  63.136              }
  63.137  
  63.138              old_base_mfn = pagetable_get_pfn(v->arch.guest_table);
  63.139 -            v->arch.guest_table = mk_pagetable((u64)mfn << PAGE_SHIFT);
  63.140 +            v->arch.guest_table = pagetable_from_pfn(mfn);
  63.141  
  63.142              if (old_base_mfn)
  63.143                  put_page(mfn_to_page(old_base_mfn));
  63.144 @@ -1797,7 +1795,7 @@ static int mov_to_cr(int gpreg, int cr, 
  63.145                   * Now arch.guest_table points to machine physical.
  63.146                   */
  63.147  
  63.148 -                v->arch.guest_table = mk_pagetable((u64)mfn << PAGE_SHIFT);
  63.149 +                v->arch.guest_table = pagetable_from_pfn(mfn);
  63.150                  update_pagetables(v);
  63.151  
  63.152                  HVM_DBG_LOG(DBG_LEVEL_VMMU, "New arch.guest_table = %lx",
    64.1 --- a/xen/arch/x86/hvm/vmx/vmx.c	Wed Jun 07 11:03:15 2006 +0100
    64.2 +++ b/xen/arch/x86/hvm/vmx/vmx.c	Wed Jun 07 11:03:51 2006 +0100
    64.3 @@ -66,7 +66,7 @@ void vmx_final_setup_guest(struct vcpu *
    64.4  
    64.5          /* Initialize monitor page table */
    64.6          for_each_vcpu(d, vc)
    64.7 -            vc->arch.monitor_table = mk_pagetable(0);
    64.8 +            vc->arch.monitor_table = pagetable_null();
    64.9  
   64.10          /*
   64.11           * Required to do this once per domain
   64.12 @@ -1223,7 +1223,7 @@ vmx_world_restore(struct vcpu *v, struct
   64.13          if(!get_page(mfn_to_page(mfn), v->domain))
   64.14                  return 0;
   64.15          old_base_mfn = pagetable_get_pfn(v->arch.guest_table);
   64.16 -        v->arch.guest_table = mk_pagetable((u64)mfn << PAGE_SHIFT);
   64.17 +        v->arch.guest_table = pagetable_from_pfn(mfn);
   64.18          if (old_base_mfn)
   64.19               put_page(mfn_to_page(old_base_mfn));
   64.20          /*
   64.21 @@ -1459,7 +1459,7 @@ static int vmx_set_cr0(unsigned long val
   64.22          /*
   64.23           * Now arch.guest_table points to machine physical.
   64.24           */
   64.25 -        v->arch.guest_table = mk_pagetable((u64)mfn << PAGE_SHIFT);
   64.26 +        v->arch.guest_table = pagetable_from_pfn(mfn);
   64.27          update_pagetables(v);
   64.28  
   64.29          HVM_DBG_LOG(DBG_LEVEL_VMMU, "New arch.guest_table = %lx",
   64.30 @@ -1477,7 +1477,7 @@ static int vmx_set_cr0(unsigned long val
   64.31          if ( v->arch.hvm_vmx.cpu_cr3 ) {
   64.32              put_page(mfn_to_page(get_mfn_from_gpfn(
   64.33                        v->arch.hvm_vmx.cpu_cr3 >> PAGE_SHIFT)));
   64.34 -            v->arch.guest_table = mk_pagetable(0);
   64.35 +            v->arch.guest_table = pagetable_null();
   64.36          }
   64.37  
   64.38      /*
   64.39 @@ -1635,7 +1635,7 @@ static int mov_to_cr(int gp, int cr, str
   64.40                  domain_crash_synchronous(); /* need to take a clean path */
   64.41              }
   64.42              old_base_mfn = pagetable_get_pfn(v->arch.guest_table);
   64.43 -            v->arch.guest_table = mk_pagetable((u64)mfn << PAGE_SHIFT);
   64.44 +            v->arch.guest_table = pagetable_from_pfn(mfn);
   64.45              if (old_base_mfn)
   64.46                  put_page(mfn_to_page(old_base_mfn));
   64.47              /*
   64.48 @@ -1690,7 +1690,7 @@ static int mov_to_cr(int gp, int cr, str
   64.49                   * Now arch.guest_table points to machine physical.
   64.50                   */
   64.51  
   64.52 -                v->arch.guest_table = mk_pagetable((u64)mfn << PAGE_SHIFT);
   64.53 +                v->arch.guest_table = pagetable_from_pfn(mfn);
   64.54                  update_pagetables(v);
   64.55  
   64.56                  HVM_DBG_LOG(DBG_LEVEL_VMMU, "New arch.guest_table = %lx",
   64.57 @@ -1970,7 +1970,6 @@ static inline void vmx_vmexit_do_extint(
   64.58          __hvm_bug(regs);
   64.59  
   64.60      vector &= INTR_INFO_VECTOR_MASK;
   64.61 -    local_irq_disable();
   64.62      TRACE_VMEXIT(1,vector);
   64.63  
   64.64      switch(vector) {
   64.65 @@ -2065,30 +2064,33 @@ asmlinkage void vmx_vmexit_handler(struc
   64.66      struct vcpu *v = current;
   64.67      int error;
   64.68  
   64.69 -    if ((error = __vmread(VM_EXIT_REASON, &exit_reason)))
   64.70 -        __hvm_bug(&regs);
   64.71 +    error = __vmread(VM_EXIT_REASON, &exit_reason);
   64.72 +    BUG_ON(error);
   64.73  
   64.74      perfc_incra(vmexits, exit_reason);
   64.75  
   64.76 -    /* don't bother H/W interrutps */
   64.77 -    if (exit_reason != EXIT_REASON_EXTERNAL_INTERRUPT &&
   64.78 -        exit_reason != EXIT_REASON_VMCALL &&
   64.79 -        exit_reason != EXIT_REASON_IO_INSTRUCTION) 
   64.80 +    if ( (exit_reason != EXIT_REASON_EXTERNAL_INTERRUPT) &&
   64.81 +         (exit_reason != EXIT_REASON_VMCALL) &&
   64.82 +         (exit_reason != EXIT_REASON_IO_INSTRUCTION) )
   64.83          HVM_DBG_LOG(DBG_LEVEL_0, "exit reason = %x", exit_reason);
   64.84  
   64.85 -    if (exit_reason & VMX_EXIT_REASONS_FAILED_VMENTRY) {
   64.86 +    if ( exit_reason != EXIT_REASON_EXTERNAL_INTERRUPT )
   64.87 +        local_irq_enable();
   64.88 +
   64.89 +    if ( unlikely(exit_reason & VMX_EXIT_REASONS_FAILED_VMENTRY) )
   64.90 +    {
   64.91          printk("Failed vm entry (reason 0x%x)\n", exit_reason);
   64.92          printk("*********** VMCS Area **************\n");
   64.93          vmcs_dump_vcpu();
   64.94          printk("**************************************\n");
   64.95          domain_crash_synchronous();
   64.96 -        return;
   64.97      }
   64.98  
   64.99      __vmread(GUEST_RIP, &eip);
  64.100      TRACE_VMEXIT(0,exit_reason);
  64.101  
  64.102 -    switch (exit_reason) {
  64.103 +    switch ( exit_reason )
  64.104 +    {
  64.105      case EXIT_REASON_EXCEPTION_NMI:
  64.106      {
  64.107          /*
    65.1 --- a/xen/arch/x86/hvm/vmx/x86_32/exits.S	Wed Jun 07 11:03:15 2006 +0100
    65.2 +++ b/xen/arch/x86/hvm/vmx/x86_32/exits.S	Wed Jun 07 11:03:51 2006 +0100
    65.3 @@ -55,29 +55,26 @@
    65.4   * domain pointer, DS, ES, FS, GS. Therefore, we effectively skip 6 registers.
    65.5   */
    65.6  
    65.7 -#define HVM_MONITOR_EFLAGS	0x202 /* IF on */
    65.8  #define NR_SKIPPED_REGS	6	/* See the above explanation */
    65.9 -#define HVM_SAVE_ALL_NOSEGREGS \
   65.10 -        pushl $HVM_MONITOR_EFLAGS; \
   65.11 -        popf; \
   65.12 -        subl $(NR_SKIPPED_REGS*4), %esp; \
   65.13 +#define HVM_SAVE_ALL_NOSEGREGS                                              \
   65.14 +        subl $(NR_SKIPPED_REGS*4), %esp;                                    \
   65.15          movl $0, 0xc(%esp);  /* XXX why do we need to force eflags==0 ?? */ \
   65.16 -        pushl %eax; \
   65.17 -        pushl %ebp; \
   65.18 -        pushl %edi; \
   65.19 -        pushl %esi; \
   65.20 -        pushl %edx; \
   65.21 -        pushl %ecx; \
   65.22 +        pushl %eax;                                                         \
   65.23 +        pushl %ebp;                                                         \
   65.24 +        pushl %edi;                                                         \
   65.25 +        pushl %esi;                                                         \
   65.26 +        pushl %edx;                                                         \
   65.27 +        pushl %ecx;                                                         \
   65.28          pushl %ebx;
   65.29  
   65.30 -#define HVM_RESTORE_ALL_NOSEGREGS   \
   65.31 -        popl %ebx;  \
   65.32 -        popl %ecx;  \
   65.33 -        popl %edx;  \
   65.34 -        popl %esi;  \
   65.35 -        popl %edi;  \
   65.36 -        popl %ebp;  \
   65.37 -        popl %eax;  \
   65.38 +#define HVM_RESTORE_ALL_NOSEGREGS               \
   65.39 +        popl %ebx;                              \
   65.40 +        popl %ecx;                              \
   65.41 +        popl %edx;                              \
   65.42 +        popl %esi;                              \
   65.43 +        popl %edi;                              \
   65.44 +        popl %ebp;                              \
   65.45 +        popl %eax;                              \
   65.46          addl $(NR_SKIPPED_REGS*4), %esp
   65.47  
   65.48          ALIGN
    66.1 --- a/xen/arch/x86/hvm/vmx/x86_64/exits.S	Wed Jun 07 11:03:15 2006 +0100
    66.2 +++ b/xen/arch/x86/hvm/vmx/x86_64/exits.S	Wed Jun 07 11:03:51 2006 +0100
    66.3 @@ -51,45 +51,42 @@
    66.4   * (2/1)  u32 entry_vector;
    66.5   * (1/1)  u32 error_code;
    66.6   */
    66.7 -#define HVM_MONITOR_RFLAGS	0x202 /* IF on */
    66.8  #define NR_SKIPPED_REGS	6	/* See the above explanation */
    66.9 -#define HVM_SAVE_ALL_NOSEGREGS \
   66.10 -        pushq $HVM_MONITOR_RFLAGS; \
   66.11 -        popfq; \
   66.12 -        subq $(NR_SKIPPED_REGS*8), %rsp; \
   66.13 -        pushq %rdi; \
   66.14 -        pushq %rsi; \
   66.15 -        pushq %rdx; \
   66.16 -        pushq %rcx; \
   66.17 -        pushq %rax; \
   66.18 -        pushq %r8;  \
   66.19 -        pushq %r9;  \
   66.20 -        pushq %r10; \
   66.21 -        pushq %r11; \
   66.22 -        pushq %rbx; \
   66.23 -        pushq %rbp; \
   66.24 -        pushq %r12; \
   66.25 -        pushq %r13; \
   66.26 -        pushq %r14; \
   66.27 -        pushq %r15; \
   66.28 +#define HVM_SAVE_ALL_NOSEGREGS                  \
   66.29 +        subq $(NR_SKIPPED_REGS*8), %rsp;        \
   66.30 +        pushq %rdi;                             \
   66.31 +        pushq %rsi;                             \
   66.32 +        pushq %rdx;                             \
   66.33 +        pushq %rcx;                             \
   66.34 +        pushq %rax;                             \
   66.35 +        pushq %r8;                              \
   66.36 +        pushq %r9;                              \
   66.37 +        pushq %r10;                             \
   66.38 +        pushq %r11;                             \
   66.39 +        pushq %rbx;                             \
   66.40 +        pushq %rbp;                             \
   66.41 +        pushq %r12;                             \
   66.42 +        pushq %r13;                             \
   66.43 +        pushq %r14;                             \
   66.44 +        pushq %r15;
   66.45  
   66.46 -#define HVM_RESTORE_ALL_NOSEGREGS \
   66.47 -        popq %r15; \
   66.48 -        popq %r14; \
   66.49 -        popq %r13; \
   66.50 -        popq %r12; \
   66.51 -        popq %rbp; \
   66.52 -        popq %rbx; \
   66.53 -        popq %r11; \
   66.54 -        popq %r10; \
   66.55 -        popq %r9;  \
   66.56 -        popq %r8;  \
   66.57 -        popq %rax; \
   66.58 -        popq %rcx; \
   66.59 -        popq %rdx; \
   66.60 -        popq %rsi; \
   66.61 -        popq %rdi; \
   66.62 -        addq $(NR_SKIPPED_REGS*8), %rsp; \
   66.63 +#define HVM_RESTORE_ALL_NOSEGREGS               \
   66.64 +        popq %r15;                              \
   66.65 +        popq %r14;                              \
   66.66 +        popq %r13;                              \
   66.67 +        popq %r12;                              \
   66.68 +        popq %rbp;                              \
   66.69 +        popq %rbx;                              \
   66.70 +        popq %r11;                              \
   66.71 +        popq %r10;                              \
   66.72 +        popq %r9;                               \
   66.73 +        popq %r8;                               \
   66.74 +        popq %rax;                              \
   66.75 +        popq %rcx;                              \
   66.76 +        popq %rdx;                              \
   66.77 +        popq %rsi;                              \
   66.78 +        popq %rdi;                              \
   66.79 +        addq $(NR_SKIPPED_REGS*8), %rsp;
   66.80  
   66.81  ENTRY(vmx_asm_vmexit_handler)
   66.82          /* selectors are restored/saved by VMX */
    67.1 --- a/xen/arch/x86/i8259.c	Wed Jun 07 11:03:15 2006 +0100
    67.2 +++ b/xen/arch/x86/i8259.c	Wed Jun 07 11:03:51 2006 +0100
    67.3 @@ -102,7 +102,7 @@ BUILD_SMP_INTERRUPT(thermal_interrupt,TH
    67.4   * moves to arch independent land
    67.5   */
    67.6  
    67.7 -spinlock_t i8259A_lock = SPIN_LOCK_UNLOCKED;
    67.8 +static DEFINE_SPINLOCK(i8259A_lock);
    67.9  
   67.10  static void disable_8259A_vector(unsigned int vector)
   67.11  {
    68.1 --- a/xen/arch/x86/microcode.c	Wed Jun 07 11:03:15 2006 +0100
    68.2 +++ b/xen/arch/x86/microcode.c	Wed Jun 07 11:03:51 2006 +0100
    68.3 @@ -83,7 +83,7 @@
    68.4  #include <asm/processor.h>
    68.5  
    68.6  #define pr_debug(x...) ((void)0)
    68.7 -#define DECLARE_MUTEX(_m) spinlock_t _m = SPIN_LOCK_UNLOCKED
    68.8 +#define DECLARE_MUTEX(_m) DEFINE_SPINLOCK(_m)
    68.9  #define down(_m) spin_lock(_m)
   68.10  #define up(_m) spin_unlock(_m)
   68.11  #define vmalloc(_s) xmalloc_bytes(_s)
    69.1 --- a/xen/arch/x86/mm.c	Wed Jun 07 11:03:15 2006 +0100
    69.2 +++ b/xen/arch/x86/mm.c	Wed Jun 07 11:03:51 2006 +0100
    69.3 @@ -89,6 +89,7 @@
    69.4  #include <xen/kernel.h>
    69.5  #include <xen/lib.h>
    69.6  #include <xen/mm.h>
    69.7 +#include <xen/domain.h>
    69.8  #include <xen/sched.h>
    69.9  #include <xen/errno.h>
   69.10  #include <xen/perfc.h>
   69.11 @@ -187,20 +188,16 @@ void arch_init_memory(void)
   69.12       * Any Xen-heap pages that we will allow to be mapped will have
   69.13       * their domain field set to dom_xen.
   69.14       */
   69.15 -    dom_xen = alloc_domain();
   69.16 -    spin_lock_init(&dom_xen->page_alloc_lock);
   69.17 -    atomic_set(&dom_xen->refcnt, 1);
   69.18 -    dom_xen->domain_id = DOMID_XEN;
   69.19 +    dom_xen = alloc_domain(DOMID_XEN);
   69.20 +    BUG_ON(dom_xen == NULL);
   69.21  
   69.22      /*
   69.23       * Initialise our DOMID_IO domain.
   69.24       * This domain owns I/O pages that are within the range of the page_info
   69.25       * array. Mappings occur at the priv of the caller.
   69.26       */
   69.27 -    dom_io = alloc_domain();
   69.28 -    spin_lock_init(&dom_io->page_alloc_lock);
   69.29 -    atomic_set(&dom_io->refcnt, 1);
   69.30 -    dom_io->domain_id = DOMID_IO;
   69.31 +    dom_io = alloc_domain(DOMID_IO);
   69.32 +    BUG_ON(dom_io == NULL);
   69.33  
   69.34      /* First 1MB of RAM is historically marked as I/O. */
   69.35      for ( i = 0; i < 0x100; i++ )
   69.36 @@ -1000,6 +997,21 @@ static int alloc_l3_table(struct page_in
   69.37  
   69.38      ASSERT(!shadow_mode_refcounts(d));
   69.39  
   69.40 +#ifdef CONFIG_X86_PAE
   69.41 +    /*
   69.42 +     * PAE pgdirs above 4GB are unacceptable if the guest does not understand
   69.43 +     * the weird 'extended cr3' format for dealing with high-order address
   69.44 +     * bits. We cut some slack for control tools (before vcpu0 is initialised).
   69.45 +     */
   69.46 +    if ( (pfn >= 0x100000) &&
   69.47 +         unlikely(!VM_ASSIST(d, VMASST_TYPE_pae_extended_cr3)) &&
   69.48 +         d->vcpu[0] && test_bit(_VCPUF_initialised, &d->vcpu[0]->vcpu_flags) )
   69.49 +    {
   69.50 +        MEM_LOG("PAE pgd must be below 4GB (0x%lx >= 0x100000)", pfn);
   69.51 +        return 0;
   69.52 +    }
   69.53 +#endif
   69.54 +
   69.55      pl3e = map_domain_page(pfn);
   69.56      for ( i = 0; i < L3_PAGETABLE_ENTRIES; i++ )
   69.57      {
   69.58 @@ -1717,7 +1729,7 @@ int new_guest_cr3(unsigned long mfn)
   69.59          {
   69.60              /* Switch to idle pagetable: this VCPU has no active p.t. now. */
   69.61              old_base_mfn = pagetable_get_pfn(v->arch.guest_table);
   69.62 -            v->arch.guest_table = mk_pagetable(0);
   69.63 +            v->arch.guest_table = pagetable_null();
   69.64              update_pagetables(v);
   69.65              write_cr3(__pa(idle_pg_table));
   69.66              if ( old_base_mfn != 0 )
   69.67 @@ -1739,7 +1751,7 @@ int new_guest_cr3(unsigned long mfn)
   69.68      invalidate_shadow_ldt(v);
   69.69  
   69.70      old_base_mfn = pagetable_get_pfn(v->arch.guest_table);
   69.71 -    v->arch.guest_table = mk_pagetable(mfn << PAGE_SHIFT);
   69.72 +    v->arch.guest_table = pagetable_from_pfn(mfn);
   69.73      update_pagetables(v); /* update shadow_table and monitor_table */
   69.74  
   69.75      write_ptbase(v);
   69.76 @@ -2006,7 +2018,7 @@ int do_mmuext_op(
   69.77              {
   69.78                  unsigned long old_mfn =
   69.79                      pagetable_get_pfn(v->arch.guest_table_user);
   69.80 -                v->arch.guest_table_user = mk_pagetable(mfn << PAGE_SHIFT);
   69.81 +                v->arch.guest_table_user = pagetable_from_pfn(mfn);
   69.82                  if ( old_mfn != 0 )
   69.83                      put_page_and_type(mfn_to_page(old_mfn));
   69.84              }
    70.1 --- a/xen/arch/x86/setup.c	Wed Jun 07 11:03:15 2006 +0100
    70.2 +++ b/xen/arch/x86/setup.c	Wed Jun 07 11:03:51 2006 +0100
    70.3 @@ -85,8 +85,6 @@ extern void early_cpu_init(void);
    70.4  
    70.5  struct tss_struct init_tss[NR_CPUS];
    70.6  
    70.7 -struct vcpu *idle_vcpu[NR_CPUS];
    70.8 -
    70.9  extern unsigned long cpu0_stack[];
   70.10  
   70.11  struct cpuinfo_x86 boot_cpu_data = { 0, 0, 0, 0, -1, 1, 0, 0, -1 };
    71.1 --- a/xen/arch/x86/shadow.c	Wed Jun 07 11:03:15 2006 +0100
    71.2 +++ b/xen/arch/x86/shadow.c	Wed Jun 07 11:03:51 2006 +0100
    71.3 @@ -2472,7 +2472,7 @@ static void shadow_update_pagetables(str
    71.4      if ( !get_shadow_ref(smfn) )
    71.5          BUG();
    71.6      old_smfn = pagetable_get_pfn(v->arch.shadow_table);
    71.7 -    v->arch.shadow_table = mk_pagetable((u64)smfn << PAGE_SHIFT);
    71.8 +    v->arch.shadow_table = pagetable_from_pfn(smfn);
    71.9      if ( old_smfn )
   71.10          put_shadow_ref(old_smfn);
   71.11  
   71.12 @@ -3481,15 +3481,16 @@ static void shadow_set_l2e_64(unsigned l
   71.13  
   71.14      __shadow_get_l3e(v, va, &sl3e);
   71.15      if (!(l3e_get_flags(sl3e) & _PAGE_PRESENT)) {
   71.16 -         if (create_l2_shadow) {
   71.17 +        if (create_l2_shadow) {
   71.18              perfc_incrc(shadow_set_l2e_force_map);
   71.19              shadow_map_into_current(v, va, PAGING_L2, PAGING_L3);
   71.20              __shadow_get_l3e(v, va, &sl3e);
   71.21          } else {
   71.22              printk("For non HVM shadow, create_l1_shadow:%d\n", create_l2_shadow);
   71.23          }
   71.24 -         shadow_update_min_max(l4e_get_pfn(sl4e), l3_table_offset(va));
   71.25 -
   71.26 +
   71.27 +        if ( v->domain->arch.ops->guest_paging_levels == PAGING_L4 )
   71.28 +            shadow_update_min_max(l4e_get_pfn(sl4e), l3_table_offset(va));
   71.29      }
   71.30  
   71.31      if ( put_ref_check ) {
    72.1 --- a/xen/arch/x86/shadow32.c	Wed Jun 07 11:03:15 2006 +0100
    72.2 +++ b/xen/arch/x86/shadow32.c	Wed Jun 07 11:03:51 2006 +0100
    72.3 @@ -583,7 +583,7 @@ static void free_shadow_pages(struct dom
    72.4          if ( pagetable_get_paddr(v->arch.shadow_table) )
    72.5          {
    72.6              put_shadow_ref(pagetable_get_pfn(v->arch.shadow_table));
    72.7 -            v->arch.shadow_table = mk_pagetable(0);
    72.8 +            v->arch.shadow_table = pagetable_null();
    72.9  
   72.10              if ( shadow_mode_external(d) )
   72.11              {
   72.12 @@ -765,7 +765,7 @@ static void alloc_monitor_pagetable(stru
   72.13      mpl2e[l2_table_offset(SH_LINEAR_PT_VIRT_START)] = l2e_empty();
   72.14      mpl2e[l2_table_offset(RO_MPT_VIRT_START)] = l2e_empty();
   72.15  
   72.16 -    v->arch.monitor_table = mk_pagetable(mmfn << PAGE_SHIFT);
   72.17 +    v->arch.monitor_table = pagetable_from_pfn(mmfn);
   72.18      v->arch.monitor_vtable = mpl2e;
   72.19  
   72.20      if ( v->vcpu_id == 0 )
   72.21 @@ -830,7 +830,7 @@ void free_monitor_pagetable(struct vcpu 
   72.22      unmap_domain_page_global(v->arch.monitor_vtable);
   72.23      free_domheap_page(mfn_to_page(mfn));
   72.24  
   72.25 -    v->arch.monitor_table = mk_pagetable(0);
   72.26 +    v->arch.monitor_table = pagetable_null();
   72.27      v->arch.monitor_vtable = 0;
   72.28  }
   72.29  
   72.30 @@ -992,7 +992,7 @@ alloc_p2m_table(struct domain *d)
   72.31  
   72.32          l1tab = map_domain_page(page_to_mfn(page));
   72.33          memset(l1tab, 0, PAGE_SIZE);
   72.34 -        d->arch.phys_table = mk_pagetable(page_to_maddr(page));
   72.35 +        d->arch.phys_table = pagetable_from_page(page);
   72.36      }
   72.37  
   72.38      list_ent = d->page_list.next;
   72.39 @@ -1126,7 +1126,7 @@ int shadow_direct_map_init(struct domain
   72.40      memset(root, 0, PAGE_SIZE);
   72.41      unmap_domain_page(root);
   72.42  
   72.43 -    d->arch.phys_table = mk_pagetable(page_to_maddr(page));
   72.44 +    d->arch.phys_table = pagetable_from_page(page);
   72.45  
   72.46      return 1;
   72.47  }
   72.48 @@ -1156,7 +1156,7 @@ void shadow_direct_map_clean(struct doma
   72.49  
   72.50      unmap_domain_page(l2e);
   72.51  
   72.52 -    d->arch.phys_table = mk_pagetable(0);
   72.53 +    d->arch.phys_table = pagetable_null();
   72.54  }
   72.55  
   72.56  int __shadow_mode_enable(struct domain *d, unsigned int mode)
   72.57 @@ -3231,7 +3231,7 @@ void __update_pagetables(struct vcpu *v)
   72.58      if ( !get_shadow_ref(smfn) )
   72.59          BUG();
   72.60      old_smfn = pagetable_get_pfn(v->arch.shadow_table);
   72.61 -    v->arch.shadow_table = mk_pagetable(smfn << PAGE_SHIFT);
   72.62 +    v->arch.shadow_table = pagetable_from_pfn(smfn);
   72.63      if ( old_smfn )
   72.64          put_shadow_ref(old_smfn);
   72.65  
    73.1 --- a/xen/arch/x86/shadow_public.c	Wed Jun 07 11:03:15 2006 +0100
    73.2 +++ b/xen/arch/x86/shadow_public.c	Wed Jun 07 11:03:51 2006 +0100
    73.3 @@ -50,7 +50,7 @@ int shadow_direct_map_init(struct domain
    73.4      memset(root, 0, PAGE_SIZE);
    73.5      root[PAE_SHADOW_SELF_ENTRY] = l3e_from_page(page, __PAGE_HYPERVISOR);
    73.6  
    73.7 -    d->arch.phys_table = mk_pagetable(page_to_maddr(page));
    73.8 +    d->arch.phys_table = pagetable_from_page(page);
    73.9  
   73.10      unmap_domain_page(root);
   73.11      return 1;
   73.12 @@ -92,7 +92,7 @@ void shadow_direct_map_clean(struct doma
   73.13  
   73.14      unmap_domain_page(l3e);
   73.15  
   73.16 -    d->arch.phys_table = mk_pagetable(0);
   73.17 +    d->arch.phys_table = pagetable_null();
   73.18  }
   73.19  
   73.20  /****************************************************************************/
   73.21 @@ -338,7 +338,7 @@ static void alloc_monitor_pagetable(stru
   73.22  
   73.23      /* map the phys_to_machine map into the per domain Read-Only MPT space */
   73.24  
   73.25 -    v->arch.monitor_table = mk_pagetable(mmfn << PAGE_SHIFT);
   73.26 +    v->arch.monitor_table = pagetable_from_pfn(mmfn);
   73.27      v->arch.monitor_vtable = (l2_pgentry_t *) mpl4e;
   73.28      mpl4e[l4_table_offset(RO_MPT_VIRT_START)] = l4e_empty();
   73.29  
   73.30 @@ -380,7 +380,7 @@ void free_monitor_pagetable(struct vcpu 
   73.31      unmap_domain_page_global(v->arch.monitor_vtable);
   73.32      free_domheap_page(mfn_to_page(mfn));
   73.33  
   73.34 -    v->arch.monitor_table = mk_pagetable(0);
   73.35 +    v->arch.monitor_table = pagetable_null();
   73.36      v->arch.monitor_vtable = 0;
   73.37  }
   73.38  #elif CONFIG_PAGING_LEVELS == 3
   73.39 @@ -431,7 +431,7 @@ static void alloc_monitor_pagetable(stru
   73.40      for ( i = 0; i < (MACHPHYS_MBYTES >> (L2_PAGETABLE_SHIFT - 20)); i++ )
   73.41          mpl2e[l2_table_offset(RO_MPT_VIRT_START) + i] = l2e_empty();
   73.42  
   73.43 -    v->arch.monitor_table = mk_pagetable(m3mfn << PAGE_SHIFT); /* < 4GB */
   73.44 +    v->arch.monitor_table = pagetable_from_pfn(m3mfn);
   73.45      v->arch.monitor_vtable = (l2_pgentry_t *) mpl3e;
   73.46  
   73.47      if ( v->vcpu_id == 0 )
   73.48 @@ -492,7 +492,7 @@ void free_monitor_pagetable(struct vcpu 
   73.49      unmap_domain_page_global(v->arch.monitor_vtable);
   73.50      free_domheap_page(mfn_to_page(m3mfn));
   73.51  
   73.52 -    v->arch.monitor_table = mk_pagetable(0);
   73.53 +    v->arch.monitor_table = pagetable_null();
   73.54      v->arch.monitor_vtable = 0;
   73.55  }
   73.56  #endif
   73.57 @@ -924,7 +924,7 @@ void free_shadow_pages(struct domain *d)
   73.58          if ( pagetable_get_paddr(v->arch.shadow_table) )
   73.59          {
   73.60              put_shadow_ref(pagetable_get_pfn(v->arch.shadow_table));
   73.61 -            v->arch.shadow_table = mk_pagetable(0);
   73.62 +            v->arch.shadow_table = pagetable_null();
   73.63  
   73.64              if ( shadow_mode_external(d) )
   73.65              {
    74.1 --- a/xen/arch/x86/smp.c	Wed Jun 07 11:03:15 2006 +0100
    74.2 +++ b/xen/arch/x86/smp.c	Wed Jun 07 11:03:51 2006 +0100
    74.3 @@ -161,7 +161,7 @@ void send_IPI_mask_phys(cpumask_t mask, 
    74.4      local_irq_restore(flags);
    74.5  }
    74.6  
    74.7 -static spinlock_t flush_lock = SPIN_LOCK_UNLOCKED;
    74.8 +static DEFINE_SPINLOCK(flush_lock);
    74.9  static cpumask_t flush_cpumask;
   74.10  static unsigned long flush_va;
   74.11  
    75.1 --- a/xen/arch/x86/smpboot.c	Wed Jun 07 11:03:15 2006 +0100
    75.2 +++ b/xen/arch/x86/smpboot.c	Wed Jun 07 11:03:51 2006 +0100
    75.3 @@ -37,6 +37,7 @@
    75.4  #include <xen/init.h>
    75.5  #include <xen/kernel.h>
    75.6  #include <xen/mm.h>
    75.7 +#include <xen/domain.h>
    75.8  #include <xen/sched.h>
    75.9  #include <xen/irq.h>
   75.10  #include <xen/delay.h>
   75.11 @@ -886,28 +887,16 @@ static int __devinit do_boot_cpu(int api
   75.12  	int timeout;
   75.13  	unsigned long start_eip;
   75.14  	unsigned short nmi_high = 0, nmi_low = 0;
   75.15 -	struct domain *d;
   75.16  	struct vcpu *v;
   75.17 -	int vcpu_id;
   75.18  
   75.19  	++cpucount;
   75.20  
   75.21  	booting_cpu = cpu;
   75.22  
   75.23 -	if ((vcpu_id = cpu % MAX_VIRT_CPUS) == 0) {
   75.24 -		d = domain_create(IDLE_DOMAIN_ID, cpu);
   75.25 -		BUG_ON(d == NULL);
   75.26 -		v = d->vcpu[0];
   75.27 -	} else {
   75.28 -		d = idle_vcpu[cpu - vcpu_id]->domain;
   75.29 -		BUG_ON(d == NULL);
   75.30 -		v = alloc_vcpu(d, vcpu_id, cpu);
   75.31 -	}
   75.32 -
   75.33 -	idle_vcpu[cpu] = v;
   75.34 +	v = alloc_idle_vcpu(cpu);
   75.35  	BUG_ON(v == NULL);
   75.36  
   75.37 -	v->arch.monitor_table = mk_pagetable(__pa(idle_pg_table));
   75.38 +	v->arch.monitor_table = pagetable_from_paddr(__pa(idle_pg_table));
   75.39  
   75.40  	/* start_eip had better be page-aligned! */
   75.41  	start_eip = setup_trampoline();
    76.1 --- a/xen/arch/x86/time.c	Wed Jun 07 11:03:15 2006 +0100
    76.2 +++ b/xen/arch/x86/time.c	Wed Jun 07 11:03:51 2006 +0100
    76.3 @@ -40,10 +40,10 @@ boolean_param("hpet_force", opt_hpet_for
    76.4  
    76.5  unsigned long cpu_khz;  /* CPU clock frequency in kHz. */
    76.6  unsigned long hpet_address;
    76.7 -spinlock_t rtc_lock = SPIN_LOCK_UNLOCKED;
    76.8 +DEFINE_SPINLOCK(rtc_lock);
    76.9  unsigned long volatile jiffies;
   76.10  static u32 wc_sec, wc_nsec; /* UTC time at last 'time update'. */
   76.11 -static spinlock_t wc_lock = SPIN_LOCK_UNLOCKED;
   76.12 +static DEFINE_SPINLOCK(wc_lock);
   76.13  
   76.14  struct time_scale {
   76.15      int shift;
   76.16 @@ -67,7 +67,7 @@ static struct cpu_time cpu_time[NR_CPUS]
   76.17  static s_time_t stime_platform_stamp;
   76.18  static u64 platform_timer_stamp;
   76.19  static struct time_scale platform_timer_scale;
   76.20 -static spinlock_t platform_timer_lock = SPIN_LOCK_UNLOCKED;
   76.21 +static DEFINE_SPINLOCK(platform_timer_lock);
   76.22  static u64 (*read_platform_count)(void);
   76.23  
   76.24  /*
    77.1 --- a/xen/arch/x86/traps.c	Wed Jun 07 11:03:15 2006 +0100
    77.2 +++ b/xen/arch/x86/traps.c	Wed Jun 07 11:03:51 2006 +0100
    77.3 @@ -876,7 +876,7 @@ static int emulate_privileged_op(struct 
    77.4                      PAGE_FAULT(regs->edi, USER_WRITE_FAULT);
    77.5                  break;
    77.6              }
    77.7 -            regs->edi += (regs->eflags & EF_DF) ? -(int)op_bytes : op_bytes;
    77.8 +            regs->edi += (int)((regs->eflags & EF_DF) ? -op_bytes : op_bytes);
    77.9              break;
   77.10  
   77.11          case 0x6e: /* OUTSB */
   77.12 @@ -902,7 +902,7 @@ static int emulate_privileged_op(struct 
   77.13                  outl_user((u32)data, (u16)regs->edx, v, regs);
   77.14                  break;
   77.15              }
   77.16 -            regs->esi += (regs->eflags & EF_DF) ? -(int)op_bytes : op_bytes;
   77.17 +            regs->esi += (int)((regs->eflags & EF_DF) ? -op_bytes : op_bytes);
   77.18              break;
   77.19          }
   77.20  
   77.21 @@ -1034,8 +1034,8 @@ static int emulate_privileged_op(struct 
   77.22              break;
   77.23              
   77.24          case 3: /* Read CR3 */
   77.25 -            *reg = pfn_to_paddr(mfn_to_gmfn(v->domain,
   77.26 -                                    pagetable_get_pfn(v->arch.guest_table)));
   77.27 +            *reg = xen_pfn_to_cr3(mfn_to_gmfn(
   77.28 +                v->domain, pagetable_get_pfn(v->arch.guest_table)));
   77.29              break;
   77.30  
   77.31          case 4: /* Read CR4 */
   77.32 @@ -1085,7 +1085,7 @@ static int emulate_privileged_op(struct 
   77.33          case 3: /* Write CR3 */
   77.34              LOCK_BIGLOCK(v->domain);
   77.35              cleanup_writable_pagetable(v->domain);
   77.36 -            (void)new_guest_cr3(gmfn_to_mfn(v->domain, paddr_to_pfn(*reg)));
   77.37 +            (void)new_guest_cr3(gmfn_to_mfn(v->domain, xen_cr3_to_pfn(*reg)));
   77.38              UNLOCK_BIGLOCK(v->domain);
   77.39              break;
   77.40  
    78.1 --- a/xen/arch/x86/x86_32/asm-offsets.c	Wed Jun 07 11:03:15 2006 +0100
    78.2 +++ b/xen/arch/x86/x86_32/asm-offsets.c	Wed Jun 07 11:03:51 2006 +0100
    78.3 @@ -64,11 +64,13 @@ void __dummy__(void)
    78.4             arch.guest_context.kernel_ss);
    78.5      OFFSET(VCPU_kernel_sp, struct vcpu,
    78.6             arch.guest_context.kernel_sp);
    78.7 +    OFFSET(VCPU_guest_context_flags, struct vcpu, arch.guest_context.flags);
    78.8      OFFSET(VCPU_arch_guest_fpu_ctxt, struct vcpu, arch.guest_context.fpu_ctxt);
    78.9      OFFSET(VCPU_flags, struct vcpu, vcpu_flags);
   78.10      OFFSET(VCPU_nmi_addr, struct vcpu, nmi_addr);
   78.11      DEFINE(_VCPUF_nmi_pending, _VCPUF_nmi_pending);
   78.12      DEFINE(_VCPUF_nmi_masked, _VCPUF_nmi_masked);
   78.13 +    DEFINE(_VGCF_failsafe_disables_events, _VGCF_failsafe_disables_events);
   78.14      BLANK();
   78.15  
   78.16      OFFSET(TSS_ss0, struct tss_struct, ss0);
    79.1 --- a/xen/arch/x86/x86_32/domain_page.c	Wed Jun 07 11:03:15 2006 +0100
    79.2 +++ b/xen/arch/x86/x86_32/domain_page.c	Wed Jun 07 11:03:51 2006 +0100
    79.3 @@ -183,7 +183,7 @@ void mapcache_init(struct domain *d)
    79.4  static unsigned long inuse[BITS_TO_LONGS(GLOBALMAP_BITS)];
    79.5  static unsigned long garbage[BITS_TO_LONGS(GLOBALMAP_BITS)];
    79.6  static unsigned int inuse_cursor;
    79.7 -static spinlock_t globalmap_lock = SPIN_LOCK_UNLOCKED;
    79.8 +static DEFINE_SPINLOCK(globalmap_lock);
    79.9  
   79.10  void *map_domain_page_global(unsigned long pfn)
   79.11  {
    80.1 --- a/xen/arch/x86/x86_32/entry.S	Wed Jun 07 11:03:15 2006 +0100
    80.2 +++ b/xen/arch/x86/x86_32/entry.S	Wed Jun 07 11:03:51 2006 +0100
    80.3 @@ -130,7 +130,10 @@ failsafe_callback:
    80.4          movl  VCPU_failsafe_sel(%ebx),%eax
    80.5          movw  %ax,TRAPBOUNCE_cs(%edx)
    80.6          movw  $TBF_FAILSAFE,TRAPBOUNCE_flags(%edx)
    80.7 -        call  create_bounce_frame
    80.8 +        bt    $_VGCF_failsafe_disables_events,VCPU_guest_context_flags(%ebx)
    80.9 +        jnc   1f
   80.10 +        orw   $TBF_INTERRUPT,TRAPBOUNCE_flags(%edx)
   80.11 +1:      call  create_bounce_frame
   80.12          xorl  %eax,%eax
   80.13          movl  %eax,UREGS_ds(%esp)
   80.14          movl  %eax,UREGS_es(%esp)
    81.1 --- a/xen/arch/x86/x86_32/mm.c	Wed Jun 07 11:03:15 2006 +0100
    81.2 +++ b/xen/arch/x86/x86_32/mm.c	Wed Jun 07 11:03:51 2006 +0100
    81.3 @@ -75,7 +75,8 @@ void __init paging_init(void)
    81.4      printk("PAE disabled.\n");
    81.5  #endif
    81.6  
    81.7 -    idle_vcpu[0]->arch.monitor_table = mk_pagetable(__pa(idle_pg_table));
    81.8 +    idle_vcpu[0]->arch.monitor_table =
    81.9 +        pagetable_from_paddr(__pa(idle_pg_table));
   81.10  
   81.11      if ( cpu_has_pge )
   81.12      {
    82.1 --- a/xen/arch/x86/x86_32/traps.c	Wed Jun 07 11:03:15 2006 +0100
    82.2 +++ b/xen/arch/x86/x86_32/traps.c	Wed Jun 07 11:03:51 2006 +0100
    82.3 @@ -346,6 +346,12 @@ static long register_guest_callback(stru
    82.4      case CALLBACKTYPE_failsafe:
    82.5          v->arch.guest_context.failsafe_callback_cs  = reg->address.cs;
    82.6          v->arch.guest_context.failsafe_callback_eip = reg->address.eip;
    82.7 +        if ( reg->flags & CALLBACKF_mask_events )
    82.8 +            set_bit(_VGCF_failsafe_disables_events,
    82.9 +                    &v->arch.guest_context.flags);
   82.10 +        else
   82.11 +            clear_bit(_VGCF_failsafe_disables_events,
   82.12 +                      &v->arch.guest_context.flags);
   82.13          break;
   82.14  
   82.15  #ifdef CONFIG_X86_SUPERVISOR_MODE_KERNEL
    83.1 --- a/xen/arch/x86/x86_64/asm-offsets.c	Wed Jun 07 11:03:15 2006 +0100
    83.2 +++ b/xen/arch/x86/x86_64/asm-offsets.c	Wed Jun 07 11:03:51 2006 +0100
    83.3 @@ -64,11 +64,14 @@ void __dummy__(void)
    83.4             arch.guest_context.syscall_callback_eip);
    83.5      OFFSET(VCPU_kernel_sp, struct vcpu,
    83.6             arch.guest_context.kernel_sp);
    83.7 +    OFFSET(VCPU_guest_context_flags, struct vcpu, arch.guest_context.flags);
    83.8      OFFSET(VCPU_arch_guest_fpu_ctxt, struct vcpu, arch.guest_context.fpu_ctxt);
    83.9      OFFSET(VCPU_flags, struct vcpu, vcpu_flags);
   83.10      OFFSET(VCPU_nmi_addr, struct vcpu, nmi_addr);
   83.11      DEFINE(_VCPUF_nmi_pending, _VCPUF_nmi_pending);
   83.12      DEFINE(_VCPUF_nmi_masked, _VCPUF_nmi_masked);
   83.13 +    DEFINE(_VGCF_failsafe_disables_events, _VGCF_failsafe_disables_events);
   83.14 +    DEFINE(_VGCF_syscall_disables_events,  _VGCF_syscall_disables_events);
   83.15      BLANK();
   83.16  
   83.17      OFFSET(VCPU_svm_vmcb_pa, struct vcpu, arch.hvm_svm.vmcb_pa);
    84.1 --- a/xen/arch/x86/x86_64/entry.S	Wed Jun 07 11:03:15 2006 +0100
    84.2 +++ b/xen/arch/x86/x86_64/entry.S	Wed Jun 07 11:03:51 2006 +0100
    84.3 @@ -30,7 +30,10 @@ switch_to_kernel:
    84.4          movq  VCPU_syscall_addr(%rbx),%rax
    84.5          movq  %rax,TRAPBOUNCE_eip(%rdx)
    84.6          movw  $0,TRAPBOUNCE_flags(%rdx)
    84.7 -        call  create_bounce_frame
    84.8 +        bt    $_VGCF_syscall_disables_events,VCPU_guest_context_flags(%rbx)
    84.9 +        jnc   1f
   84.10 +        orw   $TBF_INTERRUPT,TRAPBOUNCE_flags(%rdx)
   84.11 +1:      call  create_bounce_frame
   84.12          jmp   test_all_events
   84.13  
   84.14  /* %rbx: struct vcpu, interrupts disabled */
   84.15 @@ -77,7 +80,10 @@ failsafe_callback:
   84.16          movq  VCPU_failsafe_addr(%rbx),%rax
   84.17          movq  %rax,TRAPBOUNCE_eip(%rdx)
   84.18          movw  $TBF_FAILSAFE,TRAPBOUNCE_flags(%rdx)
   84.19 -        call  create_bounce_frame
   84.20 +        bt    $_VGCF_failsafe_disables_events,VCPU_guest_context_flags(%rbx)
   84.21 +        jnc   1f
   84.22 +        orw   $TBF_INTERRUPT,TRAPBOUNCE_flags(%rdx)
   84.23 +1:      call  create_bounce_frame
   84.24          jmp   test_all_events
   84.25  .previous
   84.26  .section __pre_ex_table,"a"
    85.1 --- a/xen/arch/x86/x86_64/mm.c	Wed Jun 07 11:03:15 2006 +0100
    85.2 +++ b/xen/arch/x86/x86_64/mm.c	Wed Jun 07 11:03:51 2006 +0100
    85.3 @@ -81,7 +81,8 @@ void __init paging_init(void)
    85.4      l2_pgentry_t *l2_ro_mpt;
    85.5      struct page_info *pg;
    85.6  
    85.7 -    idle_vcpu[0]->arch.monitor_table = mk_pagetable(__pa(idle_pg_table));
    85.8 +    idle_vcpu[0]->arch.monitor_table =
    85.9 +        pagetable_from_paddr(__pa(idle_pg_table));
   85.10  
   85.11      /* Create user-accessible L2 directory to map the MPT for guests. */
   85.12      l3_ro_mpt = alloc_xenheap_page();
    86.1 --- a/xen/arch/x86/x86_64/traps.c	Wed Jun 07 11:03:15 2006 +0100
    86.2 +++ b/xen/arch/x86/x86_64/traps.c	Wed Jun 07 11:03:51 2006 +0100
    86.3 @@ -195,7 +195,7 @@ unsigned long do_iret(void)
    86.4      /* Returning to user mode? */
    86.5      if ( (iret_saved.cs & 3) == 3 )
    86.6      {
    86.7 -        if ( unlikely(pagetable_get_paddr(v->arch.guest_table_user) == 0) )
    86.8 +        if ( unlikely(pagetable_is_null(v->arch.guest_table_user)) )
    86.9          {
   86.10              DPRINTK("Guest switching to user mode with no user page tables\n");
   86.11              domain_crash_synchronous();
   86.12 @@ -334,10 +334,22 @@ static long register_guest_callback(stru
   86.13  
   86.14      case CALLBACKTYPE_failsafe:
   86.15          v->arch.guest_context.failsafe_callback_eip = reg->address;
   86.16 +        if ( reg->flags & CALLBACKF_mask_events )
   86.17 +            set_bit(_VGCF_failsafe_disables_events,
   86.18 +                    &v->arch.guest_context.flags);
   86.19 +        else
   86.20 +            clear_bit(_VGCF_failsafe_disables_events,
   86.21 +                      &v->arch.guest_context.flags);
   86.22          break;
   86.23  
   86.24      case CALLBACKTYPE_syscall:
   86.25          v->arch.guest_context.syscall_callback_eip  = reg->address;
   86.26 +        if ( reg->flags & CALLBACKF_mask_events )
   86.27 +            set_bit(_VGCF_syscall_disables_events,
   86.28 +                    &v->arch.guest_context.flags);
   86.29 +        else
   86.30 +            clear_bit(_VGCF_syscall_disables_events,
   86.31 +                      &v->arch.guest_context.flags);
   86.32          break;
   86.33  
   86.34      case CALLBACKTYPE_nmi:
    87.1 --- a/xen/arch/x86/x86_emulate.c	Wed Jun 07 11:03:15 2006 +0100
    87.2 +++ b/xen/arch/x86/x86_emulate.c	Wed Jun 07 11:03:51 2006 +0100
    87.3 @@ -100,8 +100,8 @@ static uint8_t opcode_table[256] = {
    87.4      ByteOp|DstMem|SrcReg|ModRM, DstMem|SrcReg|ModRM,
    87.5      ByteOp|DstMem|SrcReg|ModRM, DstMem|SrcReg|ModRM,
    87.6      /* 0x88 - 0x8F */
    87.7 -    ByteOp|DstMem|SrcReg|ModRM, DstMem|SrcReg|ModRM,
    87.8 -    ByteOp|DstReg|SrcMem|ModRM, DstReg|SrcMem|ModRM,
    87.9 +    ByteOp|DstMem|SrcReg|ModRM|Mov, DstMem|SrcReg|ModRM|Mov,
   87.10 +    ByteOp|DstReg|SrcMem|ModRM|Mov, DstReg|SrcMem|ModRM|Mov,
   87.11      0, 0, 0, DstMem|SrcNone|ModRM|Mov,
   87.12      /* 0x90 - 0x9F */
   87.13      0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
   87.14 @@ -380,11 +380,12 @@ do{ __asm__ __volatile__ (              
   87.15        ((reg) & ((1UL << (ad_bytes << 3)) - 1))))
   87.16  #define register_address_increment(reg, inc)                            \
   87.17  do {                                                                    \
   87.18 +    int _inc = (inc); /* signed type ensures sign extension to long */  \
   87.19      if ( ad_bytes == sizeof(unsigned long) )                            \
   87.20 -        (reg) += (inc);                                                 \
   87.21 +        (reg) += _inc;                                                  \
   87.22      else                                                                \
   87.23          (reg) = ((reg) & ~((1UL << (ad_bytes << 3)) - 1)) |             \
   87.24 -                (((reg) + (inc)) & ((1UL << (ad_bytes << 3)) - 1));     \
   87.25 +                (((reg) + _inc) & ((1UL << (ad_bytes << 3)) - 1));      \
   87.26  } while (0)
   87.27  
   87.28  void *
   87.29 @@ -858,7 +859,7 @@ x86_emulate_memop(
   87.30                                           &dst.val, 8, ctxt)) != 0 )
   87.31                      goto done;
   87.32              }
   87.33 -            register_address_increment(_regs.esp, -(int)dst.bytes);
   87.34 +            register_address_increment(_regs.esp, -dst.bytes);
   87.35              if ( (rc = ops->write_std(register_address(_regs.ss, _regs.esp),
   87.36                                        dst.val, dst.bytes, ctxt)) != 0 )
   87.37                  goto done;
   87.38 @@ -942,9 +943,9 @@ x86_emulate_memop(
   87.39                  goto done;
   87.40          }
   87.41          register_address_increment(
   87.42 -            _regs.esi, (_regs.eflags & EFLG_DF) ? -(int)dst.bytes : dst.bytes);
   87.43 +            _regs.esi, (_regs.eflags & EFLG_DF) ? -dst.bytes : dst.bytes);
   87.44          register_address_increment(
   87.45 -            _regs.edi, (_regs.eflags & EFLG_DF) ? -(int)dst.bytes : dst.bytes);
   87.46 +            _regs.edi, (_regs.eflags & EFLG_DF) ? -dst.bytes : dst.bytes);
   87.47          break;
   87.48      case 0xa6 ... 0xa7: /* cmps */
   87.49          DPRINTF("Urk! I don't handle CMPS.\n");
   87.50 @@ -955,7 +956,7 @@ x86_emulate_memop(
   87.51          dst.ptr   = (unsigned long *)cr2;
   87.52          dst.val   = _regs.eax;
   87.53          register_address_increment(
   87.54 -            _regs.edi, (_regs.eflags & EFLG_DF) ? -(int)dst.bytes : dst.bytes);
   87.55 +            _regs.edi, (_regs.eflags & EFLG_DF) ? -dst.bytes : dst.bytes);
   87.56          break;
   87.57      case 0xac ... 0xad: /* lods */
   87.58          dst.type  = OP_REG;
   87.59 @@ -964,7 +965,7 @@ x86_emulate_memop(
   87.60          if ( (rc = ops->read_emulated(cr2, &dst.val, dst.bytes, ctxt)) != 0 )
   87.61              goto done;
   87.62          register_address_increment(
   87.63 -            _regs.esi, (_regs.eflags & EFLG_DF) ? -(int)dst.bytes : dst.bytes);
   87.64 +            _regs.esi, (_regs.eflags & EFLG_DF) ? -dst.bytes : dst.bytes);
   87.65          break;
   87.66      case 0xae ... 0xaf: /* scas */
   87.67          DPRINTF("Urk! I don't handle SCAS.\n");
    88.1 --- a/xen/common/dom0_ops.c	Wed Jun 07 11:03:15 2006 +0100
    88.2 +++ b/xen/common/dom0_ops.c	Wed Jun 07 11:03:51 2006 +0100
    88.3 @@ -95,7 +95,7 @@ long do_dom0_op(XEN_GUEST_HANDLE(dom0_op
    88.4      long ret = 0;
    88.5      struct dom0_op curop, *op = &curop;
    88.6      void *ssid = NULL; /* save security ptr between pre and post/fail hooks */
    88.7 -    static spinlock_t dom0_lock = SPIN_LOCK_UNLOCKED;
    88.8 +    static DEFINE_SPINLOCK(dom0_lock);
    88.9  
   88.10      if ( !IS_PRIV(current->domain) )
   88.11          return -EPERM;
    89.1 --- a/xen/common/domain.c	Wed Jun 07 11:03:15 2006 +0100
    89.2 +++ b/xen/common/domain.c	Wed Jun 07 11:03:51 2006 +0100
    89.3 @@ -32,23 +32,112 @@ struct domain *domain_list;
    89.4  
    89.5  struct domain *dom0;
    89.6  
    89.7 -struct domain *domain_create(domid_t dom_id, unsigned int cpu)
    89.8 +struct vcpu *idle_vcpu[NR_CPUS];
    89.9 +
   89.10 +struct domain *alloc_domain(domid_t domid)
   89.11 +{
   89.12 +    struct domain *d;
   89.13 +
   89.14 +    if ( (d = xmalloc(struct domain)) == NULL )
   89.15 +        return NULL;
   89.16 +
   89.17 +    memset(d, 0, sizeof(*d));
   89.18 +    d->domain_id = domid;
   89.19 +    atomic_set(&d->refcnt, 1);
   89.20 +    spin_lock_init(&d->big_lock);
   89.21 +    spin_lock_init(&d->page_alloc_lock);
   89.22 +    INIT_LIST_HEAD(&d->page_list);
   89.23 +    INIT_LIST_HEAD(&d->xenpage_list);
   89.24 +
   89.25 +    return d;
   89.26 +}
   89.27 +
   89.28 +
   89.29 +void free_domain(struct domain *d)
   89.30 +{
   89.31 +    struct vcpu *v;
   89.32 +    int i;
   89.33 +
   89.34 +    sched_destroy_domain(d);
   89.35 +
   89.36 +    for ( i = MAX_VIRT_CPUS-1; i >= 0; i-- )
   89.37 +        if ( (v = d->vcpu[i]) != NULL )
   89.38 +            free_vcpu_struct(v);
   89.39 +
   89.40 +    xfree(d);
   89.41 +}
   89.42 +
   89.43 +
   89.44 +struct vcpu *alloc_vcpu(
   89.45 +    struct domain *d, unsigned int vcpu_id, unsigned int cpu_id)
   89.46 +{
   89.47 +    struct vcpu *v;
   89.48 +
   89.49 +    BUG_ON(d->vcpu[vcpu_id] != NULL);
   89.50 +
   89.51 +    if ( (v = alloc_vcpu_struct(d, vcpu_id)) == NULL )
   89.52 +        return NULL;
   89.53 +
   89.54 +    v->domain = d;
   89.55 +    v->vcpu_id = vcpu_id;
   89.56 +    v->processor = cpu_id;
   89.57 +    atomic_set(&v->pausecnt, 0);
   89.58 +    v->vcpu_info = &d->shared_info->vcpu_info[vcpu_id];
   89.59 +
   89.60 +    v->cpu_affinity = is_idle_domain(d) ?
   89.61 +        cpumask_of_cpu(cpu_id) : CPU_MASK_ALL;
   89.62 +
   89.63 +    v->runstate.state = is_idle_vcpu(v) ? RUNSTATE_running : RUNSTATE_offline;
   89.64 +    v->runstate.state_entry_time = NOW();
   89.65 +
   89.66 +    if ( (vcpu_id != 0) && !is_idle_domain(d) )
   89.67 +        set_bit(_VCPUF_down, &v->vcpu_flags);
   89.68 +
   89.69 +    if ( sched_init_vcpu(v) < 0 )
   89.70 +    {
   89.71 +        free_vcpu_struct(v);
   89.72 +        return NULL;
   89.73 +    }
   89.74 +
   89.75 +    d->vcpu[vcpu_id] = v;
   89.76 +    if ( vcpu_id != 0 )
   89.77 +        d->vcpu[v->vcpu_id-1]->next_in_list = v;
   89.78 +
   89.79 +    return v;
   89.80 +}
   89.81 +
   89.82 +struct vcpu *alloc_idle_vcpu(unsigned int cpu_id)
   89.83 +{
   89.84 +    struct domain *d;
   89.85 +    struct vcpu *v;
   89.86 +    unsigned int vcpu_id;
   89.87 +
   89.88 +    if ((vcpu_id = cpu_id % MAX_VIRT_CPUS) == 0)
   89.89 +    {
   89.90 +        d = domain_create(IDLE_DOMAIN_ID, cpu_id);
   89.91 +        BUG_ON(d == NULL);
   89.92 +        v = d->vcpu[0];
   89.93 +    }
   89.94 +    else
   89.95 +    {
   89.96 +        d = idle_vcpu[cpu_id - vcpu_id]->domain;
   89.97 +        BUG_ON(d == NULL);
   89.98 +        v = alloc_vcpu(d, vcpu_id, cpu_id);
   89.99 +    }
  89.100 +
  89.101 +    idle_vcpu[cpu_id] = v;
  89.102 +
  89.103 +    return v;
  89.104 +}
  89.105 +
  89.106 +struct domain *domain_create(domid_t domid, unsigned int cpu)
  89.107  {
  89.108      struct domain *d, **pd;
  89.109      struct vcpu *v;
  89.110  
  89.111 -    if ( (d = alloc_domain()) == NULL )
  89.112 +    if ( (d = alloc_domain(domid)) == NULL )
  89.113          return NULL;
  89.114  
  89.115 -    d->domain_id = dom_id;
  89.116 -
  89.117 -    atomic_set(&d->refcnt, 1);
  89.118 -
  89.119 -    spin_lock_init(&d->big_lock);
  89.120 -    spin_lock_init(&d->page_alloc_lock);
  89.121 -    INIT_LIST_HEAD(&d->page_list);
  89.122 -    INIT_LIST_HEAD(&d->xenpage_list);
  89.123 -
  89.124      rangeset_domain_initialise(d);
  89.125  
  89.126      if ( !is_idle_domain(d) )
  89.127 @@ -74,14 +163,14 @@ struct domain *domain_create(domid_t dom
  89.128      if ( !is_idle_domain(d) )
  89.129      {
  89.130          write_lock(&domlist_lock);
  89.131 -        pd = &domain_list; /* NB. domain_list maintained in order of dom_id. */
  89.132 +        pd = &domain_list; /* NB. domain_list maintained in order of domid. */
  89.133          for ( pd = &domain_list; *pd != NULL; pd = &(*pd)->next_in_list )
  89.134              if ( (*pd)->domain_id > d->domain_id )
  89.135                  break;
  89.136          d->next_in_list = *pd;
  89.137          *pd = d;
  89.138 -        d->next_in_hashbucket = domain_hash[DOMAIN_HASH(dom_id)];
  89.139 -        domain_hash[DOMAIN_HASH(dom_id)] = d;
  89.140 +        d->next_in_hashbucket = domain_hash[DOMAIN_HASH(domid)];
  89.141 +        domain_hash[DOMAIN_HASH(domid)] = d;
  89.142          write_unlock(&domlist_lock);
  89.143      }
  89.144  
  89.145 @@ -126,19 +215,16 @@ struct domain *find_domain_by_id(domid_t
  89.146  
  89.147  void domain_kill(struct domain *d)
  89.148  {
  89.149 -    struct vcpu *v;
  89.150 +    domain_pause(d);
  89.151 +
  89.152 +    if ( test_and_set_bit(_DOMF_dying, &d->domain_flags) )
  89.153 +        return;
  89.154  
  89.155 -    domain_pause(d);
  89.156 -    if ( !test_and_set_bit(_DOMF_dying, &d->domain_flags) )
  89.157 -    {
  89.158 -        for_each_vcpu(d, v)
  89.159 -            sched_rem_domain(v);
  89.160 -        gnttab_release_mappings(d);
  89.161 -        domain_relinquish_resources(d);
  89.162 -        put_domain(d);
  89.163 +    gnttab_release_mappings(d);
  89.164 +    domain_relinquish_resources(d);
  89.165 +    put_domain(d);
  89.166  
  89.167 -        send_guest_global_virq(dom0, VIRQ_DOM_EXC);
  89.168 -    }
  89.169 +    send_guest_global_virq(dom0, VIRQ_DOM_EXC);
  89.170  }
  89.171  
  89.172  
    90.1 --- a/xen/common/kernel.c	Wed Jun 07 11:03:15 2006 +0100
    90.2 +++ b/xen/common/kernel.c	Wed Jun 07 11:03:51 2006 +0100
    90.3 @@ -184,6 +184,7 @@ long do_xen_version(int cmd, XEN_GUEST_H
    90.4      case XENVER_get_features:
    90.5      {
    90.6          xen_feature_info_t fi;
    90.7 +        struct domain *d = current->domain;
    90.8  
    90.9          if ( copy_from_guest(&fi, arg, 1) )
   90.10              return -EFAULT;
   90.11 @@ -191,7 +192,9 @@ long do_xen_version(int cmd, XEN_GUEST_H
   90.12          switch ( fi.submap_idx )
   90.13          {
   90.14          case 0:
   90.15 -            fi.submap = (1U << XENFEAT_pae_pgdir_above_4gb);
   90.16 +            fi.submap = 0;
   90.17 +            if ( VM_ASSIST(d, VMASST_TYPE_pae_extended_cr3) )
   90.18 +                fi.submap |= (1U << XENFEAT_pae_pgdir_above_4gb);
   90.19              if ( shadow_mode_translate(current->domain) )
   90.20                  fi.submap |= 
   90.21                      (1U << XENFEAT_writable_page_tables) |
    91.1 --- a/xen/common/keyhandler.c	Wed Jun 07 11:03:15 2006 +0100
    91.2 +++ b/xen/common/keyhandler.c	Wed Jun 07 11:03:51 2006 +0100
    91.3 @@ -128,11 +128,12 @@ static void dump_domains(unsigned char k
    91.4                 d->domain_flags, atomic_read(&d->refcnt),
    91.5                 d->tot_pages, d->xenheap_pages, cpuset);
    91.6          printk("    handle=%02x%02x%02x%02x-%02x%02x-%02x%02x-"
    91.7 -               "%02x%02x-%02x%02x%02x%02x%02x%02x\n",
    91.8 +               "%02x%02x-%02x%02x%02x%02x%02x%02x vm_assist=%08lx\n",
    91.9                 d->handle[ 0], d->handle[ 1], d->handle[ 2], d->handle[ 3],
   91.10                 d->handle[ 4], d->handle[ 5], d->handle[ 6], d->handle[ 7],
   91.11                 d->handle[ 8], d->handle[ 9], d->handle[10], d->handle[11],
   91.12 -               d->handle[12], d->handle[13], d->handle[14], d->handle[15]);
   91.13 +               d->handle[12], d->handle[13], d->handle[14], d->handle[15],
   91.14 +               d->vm_assist);
   91.15  
   91.16          arch_dump_domain_info(d);
   91.17  
    92.1 --- a/xen/common/memory.c	Wed Jun 07 11:03:15 2006 +0100
    92.2 +++ b/xen/common/memory.c	Wed Jun 07 11:03:51 2006 +0100
    92.3 @@ -31,14 +31,15 @@
    92.4  static long
    92.5  increase_reservation(
    92.6      struct domain *d, 
    92.7 -    XEN_GUEST_HANDLE(ulong) extent_list,
    92.8 +    XEN_GUEST_HANDLE(xen_pfn_t) extent_list,
    92.9      unsigned int   nr_extents,
   92.10      unsigned int   extent_order,
   92.11      unsigned int   flags,
   92.12      int           *preempted)
   92.13  {
   92.14      struct page_info *page;
   92.15 -    unsigned long     i, mfn;
   92.16 +    unsigned long i;
   92.17 +    xen_pfn_t mfn;
   92.18  
   92.19      if ( !guest_handle_is_null(extent_list) &&
   92.20           !guest_handle_okay(extent_list, nr_extents) )
   92.21 @@ -80,14 +81,16 @@ increase_reservation(
   92.22  static long
   92.23  populate_physmap(
   92.24      struct domain *d, 
   92.25 -    XEN_GUEST_HANDLE(ulong) extent_list,
   92.26 +    XEN_GUEST_HANDLE(xen_pfn_t) extent_list,
   92.27      unsigned int  nr_extents,
   92.28      unsigned int  extent_order,
   92.29      unsigned int  flags,
   92.30      int          *preempted)
   92.31  {
   92.32      struct page_info *page;
   92.33 -    unsigned long    i, j, gpfn, mfn;
   92.34 +    unsigned long i, j;
   92.35 +    xen_pfn_t gpfn;
   92.36 +    xen_pfn_t mfn;
   92.37  
   92.38      if ( !guest_handle_okay(extent_list, nr_extents) )
   92.39          return 0;
   92.40 @@ -177,13 +180,14 @@ guest_remove_page(
   92.41  static long
   92.42  decrease_reservation(
   92.43      struct domain *d,
   92.44 -    XEN_GUEST_HANDLE(ulong) extent_list,
   92.45 +    XEN_GUEST_HANDLE(xen_pfn_t) extent_list,
   92.46      unsigned int   nr_extents,
   92.47      unsigned int   extent_order,
   92.48      unsigned int   flags,
   92.49      int           *preempted)
   92.50  {
   92.51 -    unsigned long    i, j, gmfn;
   92.52 +    unsigned long i, j;
   92.53 +    xen_pfn_t gmfn;
   92.54  
   92.55      if ( !guest_handle_okay(extent_list, nr_extents) )
   92.56          return 0;
   92.57 @@ -214,7 +218,9 @@ translate_gpfn_list(
   92.58      XEN_GUEST_HANDLE(xen_translate_gpfn_list_t) uop, unsigned long *progress)
   92.59  {
   92.60      struct xen_translate_gpfn_list op;
   92.61 -    unsigned long i, gpfn, mfn;
   92.62 +    unsigned long i;
   92.63 +    xen_pfn_t gpfn;
   92.64 +    xen_pfn_t mfn;
   92.65      struct domain *d;
   92.66  
   92.67      if ( copy_from_guest(&op, uop, 1) )
    93.1 --- a/xen/common/page_alloc.c	Wed Jun 07 11:03:15 2006 +0100
    93.2 +++ b/xen/common/page_alloc.c	Wed Jun 07 11:03:51 2006 +0100
    93.3 @@ -59,7 +59,7 @@ custom_param("lowmem_emergency_pool", pa
    93.4  #define round_pgdown(_p)  ((_p)&PAGE_MASK)
    93.5  #define round_pgup(_p)    (((_p)+(PAGE_SIZE-1))&PAGE_MASK)
    93.6  
    93.7 -static spinlock_t page_scrub_lock = SPIN_LOCK_UNLOCKED;
    93.8 +static DEFINE_SPINLOCK(page_scrub_lock);
    93.9  LIST_HEAD(page_scrub_list);
   93.10  
   93.11  /*********************
   93.12 @@ -250,7 +250,7 @@ static struct list_head heap[NR_ZONES][M
   93.13  
   93.14  static unsigned long avail[NR_ZONES];
   93.15  
   93.16 -static spinlock_t heap_lock = SPIN_LOCK_UNLOCKED;
   93.17 +static DEFINE_SPINLOCK(heap_lock);
   93.18  
   93.19  void end_boot_allocator(void)
   93.20  {
    94.1 --- a/xen/common/perfc.c	Wed Jun 07 11:03:15 2006 +0100
    94.2 +++ b/xen/common/perfc.c	Wed Jun 07 11:03:51 2006 +0100
    94.3 @@ -209,7 +209,7 @@ static int perfc_copy_info(XEN_GUEST_HAN
    94.4  /* Dom0 control of perf counters */
    94.5  int perfc_control(dom0_perfccontrol_t *pc)
    94.6  {
    94.7 -    static spinlock_t lock = SPIN_LOCK_UNLOCKED;
    94.8 +    static DEFINE_SPINLOCK(lock);
    94.9      u32 op = pc->op;
   94.10      int rc;
   94.11  
    95.1 --- a/xen/common/sched_bvt.c	Wed Jun 07 11:03:15 2006 +0100
    95.2 +++ b/xen/common/sched_bvt.c	Wed Jun 07 11:03:51 2006 +0100
    95.3 @@ -160,15 +160,14 @@ static inline u32 calc_evt(struct vcpu *
    95.4  }
    95.5  
    95.6  /**
    95.7 - * bvt_alloc_task - allocate BVT private structures for a task
    95.8 - * @p:              task to allocate private structures for
    95.9 - *
   95.10 + * bvt_init_vcpu - allocate BVT private structures for a VCPU.
   95.11   * Returns non-zero on failure.
   95.12   */
   95.13 -static int bvt_alloc_task(struct vcpu *v)
   95.14 +static int bvt_init_vcpu(struct vcpu *v)
   95.15  {
   95.16      struct domain *d = v->domain;
   95.17      struct bvt_dom_info *inf;
   95.18 +    struct bvt_vcpu_info *einf;
   95.19  
   95.20      if ( (d->sched_priv == NULL) )
   95.21      {
   95.22 @@ -199,15 +198,7 @@ static int bvt_alloc_task(struct vcpu *v
   95.23          init_timer(&inf->unwarp_timer, unwarp_timer_fn, inf, v->processor);
   95.24      }
   95.25  
   95.26 -    return 0;
   95.27 -}
   95.28 -
   95.29 -/*
   95.30 - * Add and remove a domain
   95.31 - */
   95.32 -static void bvt_add_task(struct vcpu *v) 
   95.33 -{
   95.34 -    struct bvt_vcpu_info *einf = EBVT_INFO(v);
   95.35 +    einf = EBVT_INFO(v);
   95.36  
   95.37      /* Allocate per-CPU context if this is the first domain to be added. */
   95.38      if ( CPU_INFO(v->processor) == NULL )
   95.39 @@ -223,13 +214,15 @@ static void bvt_add_task(struct vcpu *v)
   95.40          einf->avt = einf->evt = ~0U;
   95.41          BUG_ON(__task_on_runqueue(v));
   95.42          __add_to_runqueue_head(v);
   95.43 -    } 
   95.44 +    }
   95.45      else 
   95.46      {
   95.47          /* Set avt and evt to system virtual time. */
   95.48          einf->avt = CPU_SVT(v->processor);
   95.49          einf->evt = CPU_SVT(v->processor);
   95.50      }
   95.51 +
   95.52 +    return 0;
   95.53  }
   95.54  
   95.55  static void bvt_wake(struct vcpu *v)
   95.56 @@ -298,10 +291,9 @@ static int bvt_set_affinity(struct vcpu 
   95.57  
   95.58  
   95.59  /**
   95.60 - * bvt_free_task - free BVT private structures for a task
   95.61 - * @d:             task
   95.62 + * bvt_destroy_domain - free BVT private structures for a domain.
   95.63   */
   95.64 -static void bvt_free_task(struct domain *d)
   95.65 +static void bvt_destroy_domain(struct domain *d)
   95.66  {
   95.67      struct bvt_dom_info *inf = BVT_INFO(d);
   95.68  
   95.69 @@ -568,10 +560,10 @@ struct scheduler sched_bvt_def = {
   95.70      .name     = "Borrowed Virtual Time",
   95.71      .opt_name = "bvt",
   95.72      .sched_id = SCHED_BVT,
   95.73 -    
   95.74 -    .alloc_task     = bvt_alloc_task,
   95.75 -    .add_task       = bvt_add_task,
   95.76 -    .free_task      = bvt_free_task,
   95.77 +
   95.78 +    .init_vcpu      = bvt_init_vcpu,
   95.79 +    .destroy_domain = bvt_destroy_domain,
   95.80 +
   95.81      .do_schedule    = bvt_do_schedule,
   95.82      .control        = bvt_ctl,
   95.83      .adjdom         = bvt_adjdom,
    96.1 --- a/xen/common/sched_credit.c	Wed Jun 07 11:03:15 2006 +0100
    96.2 +++ b/xen/common/sched_credit.c	Wed Jun 07 11:03:51 2006 +0100
    96.3 @@ -75,14 +75,13 @@
    96.4      } while ( 0 );
    96.5  
    96.6  #define CSCHED_STATS_EXPAND_SCHED(_MACRO)   \
    96.7 -    _MACRO(vcpu_alloc)                      \
    96.8 -    _MACRO(vcpu_add)                        \
    96.9 +    _MACRO(vcpu_init)                       \
   96.10      _MACRO(vcpu_sleep)                      \
   96.11      _MACRO(vcpu_wake_running)               \
   96.12      _MACRO(vcpu_wake_onrunq)                \
   96.13      _MACRO(vcpu_wake_runnable)              \
   96.14      _MACRO(vcpu_wake_not_runnable)          \
   96.15 -    _MACRO(dom_free)                        \
   96.16 +    _MACRO(dom_destroy)                     \
   96.17      _MACRO(schedule)                        \
   96.18      _MACRO(tickle_local_idler)              \
   96.19      _MACRO(tickle_local_over)               \
   96.20 @@ -429,14 +428,14 @@ static inline void
   96.21  }
   96.22  
   96.23  static int
   96.24 -csched_vcpu_alloc(struct vcpu *vc)
   96.25 +csched_vcpu_init(struct vcpu *vc)
   96.26  {
   96.27      struct domain * const dom = vc->domain;
   96.28      struct csched_dom *sdom;
   96.29      struct csched_vcpu *svc;
   96.30      int16_t pri;
   96.31  
   96.32 -    CSCHED_STAT_CRANK(vcpu_alloc);
   96.33 +    CSCHED_STAT_CRANK(vcpu_init);
   96.34  
   96.35      /* Allocate, if appropriate, per-domain info */
   96.36      if ( is_idle_vcpu(vc) )
   96.37 @@ -489,19 +488,13 @@ csched_vcpu_alloc(struct vcpu *vc)
   96.38      if ( likely(sdom != NULL) )
   96.39          csched_vcpu_acct(svc, 0);
   96.40  
   96.41 -    return 0;
   96.42 -}
   96.43 -
   96.44 -static void
   96.45 -csched_vcpu_add(struct vcpu *vc) 
   96.46 -{
   96.47 -    CSCHED_STAT_CRANK(vcpu_add);
   96.48 -
   96.49      /* Allocate per-PCPU info */
   96.50      if ( unlikely(!CSCHED_PCPU(vc->processor)) )
   96.51          csched_pcpu_init(vc->processor);
   96.52  
   96.53      CSCHED_VCPU_CHECK(vc);
   96.54 +
   96.55 +    return 0;
   96.56  }
   96.57  
   96.58  static void
   96.59 @@ -644,12 +637,12 @@ csched_dom_cntl(
   96.60  }
   96.61  
   96.62  static void
   96.63 -csched_dom_free(struct domain *dom)
   96.64 +csched_dom_destroy(struct domain *dom)
   96.65  {
   96.66      struct csched_dom * const sdom = CSCHED_DOM(dom);
   96.67      int i;
   96.68  
   96.69 -    CSCHED_STAT_CRANK(dom_free);
   96.70 +    CSCHED_STAT_CRANK(dom_destroy);
   96.71  
   96.72      for ( i = 0; i < MAX_VIRT_CPUS; i++ )
   96.73      {
   96.74 @@ -1215,14 +1208,15 @@ struct scheduler sched_credit_def = {
   96.75      .opt_name       = "credit",
   96.76      .sched_id       = SCHED_CREDIT,
   96.77  
   96.78 -    .alloc_task     = csched_vcpu_alloc,
   96.79 -    .add_task       = csched_vcpu_add,
   96.80 +    .init_vcpu      = csched_vcpu_init,
   96.81 +    .destroy_domain = csched_dom_destroy,
   96.82 +
   96.83      .sleep          = csched_vcpu_sleep,
   96.84      .wake           = csched_vcpu_wake,
   96.85 +
   96.86      .set_affinity   = csched_vcpu_set_affinity,
   96.87  
   96.88      .adjdom         = csched_dom_cntl,
   96.89 -    .free_task      = csched_dom_free,
   96.90  
   96.91      .tick           = csched_tick,
   96.92      .do_schedule    = csched_schedule,
    97.1 --- a/xen/common/sched_sedf.c	Wed Jun 07 11:03:15 2006 +0100
    97.2 +++ b/xen/common/sched_sedf.c	Wed Jun 07 11:03:51 2006 +0100
    97.3 @@ -328,11 +328,9 @@ static inline void __add_to_runqueue_sor
    97.4  }
    97.5  
    97.6  
    97.7 -/* Allocates memory for per domain private scheduling data*/
    97.8 -static int sedf_alloc_task(struct vcpu *v)
    97.9 +static int sedf_init_vcpu(struct vcpu *v)
   97.10  {
   97.11 -    PRINT(2, "sedf_alloc_task was called, domain-id %i.%i\n",
   97.12 -          v->domain->domain_id, v->vcpu_id);
   97.13 +    struct sedf_vcpu_info *inf;
   97.14  
   97.15      if ( v->domain->sched_priv == NULL )
   97.16      {
   97.17 @@ -344,23 +342,11 @@ static int sedf_alloc_task(struct vcpu *
   97.18  
   97.19      if ( (v->sched_priv = xmalloc(struct sedf_vcpu_info)) == NULL )
   97.20          return -1;
   97.21 -
   97.22      memset(v->sched_priv, 0, sizeof(struct sedf_vcpu_info));
   97.23  
   97.24 -    return 0;
   97.25 -}
   97.26 -
   97.27 -
   97.28 -/* Setup the sedf_dom_info */
   97.29 -static void sedf_add_task(struct vcpu *v)
   97.30 -{
   97.31 -    struct sedf_vcpu_info *inf = EDOM_INFO(v);
   97.32 -
   97.33 +    inf = EDOM_INFO(v);
   97.34      inf->vcpu = v;
   97.35   
   97.36 -    PRINT(2,"sedf_add_task was called, domain-id %i.%i\n",
   97.37 -          v->domain->domain_id, v->vcpu_id);
   97.38 -
   97.39      /* Allocate per-CPU context if this is the first domain to be added. */
   97.40      if ( unlikely(schedule_data[v->processor].sched_priv == NULL) )
   97.41      {
   97.42 @@ -408,15 +394,14 @@ static void sedf_add_task(struct vcpu *v
   97.43          EDOM_INFO(v)->deadl_abs = 0;
   97.44          EDOM_INFO(v)->status &= ~SEDF_ASLEEP;
   97.45      }
   97.46 +
   97.47 +    return 0;
   97.48  }
   97.49  
   97.50 -/* Frees memory used by domain info */
   97.51 -static void sedf_free_task(struct domain *d)
   97.52 +static void sedf_destroy_domain(struct domain *d)
   97.53  {
   97.54      int i;
   97.55  
   97.56 -    PRINT(2,"sedf_free_task was called, domain-id %i\n",d->domain_id);
   97.57 -
   97.58      xfree(d->sched_priv);
   97.59   
   97.60      for ( i = 0; i < MAX_VIRT_CPUS; i++ )
   97.61 @@ -1452,9 +1437,9 @@ struct scheduler sched_sedf_def = {
   97.62      .opt_name = "sedf",
   97.63      .sched_id = SCHED_SEDF,
   97.64      
   97.65 -    .alloc_task     = sedf_alloc_task,
   97.66 -    .add_task       = sedf_add_task,
   97.67 -    .free_task      = sedf_free_task,
   97.68 +    .init_vcpu      = sedf_init_vcpu,
   97.69 +    .destroy_domain = sedf_destroy_domain,
   97.70 +
   97.71      .do_schedule    = sedf_do_schedule,
   97.72      .dump_cpu_state = sedf_dump_cpu_state,
   97.73      .sleep          = sedf_sleep,
    98.1 --- a/xen/common/schedule.c	Wed Jun 07 11:03:15 2006 +0100
    98.2 +++ b/xen/common/schedule.c	Wed Jun 07 11:03:51 2006 +0100
    98.3 @@ -99,74 +99,7 @@ void vcpu_runstate_get(struct vcpu *v, s
    98.4      }
    98.5  }
    98.6  
    98.7 -struct domain *alloc_domain(void)
    98.8 -{
    98.9 -    struct domain *d;
   98.10 -
   98.11 -    if ( (d = xmalloc(struct domain)) != NULL )
   98.12 -        memset(d, 0, sizeof(*d));
   98.13 -
   98.14 -    return d;
   98.15 -}
   98.16 -
   98.17 -void free_domain(struct domain *d)
   98.18 -{
   98.19 -    struct vcpu *v;
   98.20 -    int i;
   98.21 -
   98.22 -    for_each_vcpu ( d, v )
   98.23 -        sched_rem_domain(v);
   98.24 -
   98.25 -    SCHED_OP(free_task, d);
   98.26 -
   98.27 -    for ( i = MAX_VIRT_CPUS-1; i >= 0; i-- )
   98.28 -        if ( (v = d->vcpu[i]) != NULL )
   98.29 -            free_vcpu_struct(v);
   98.30 -
   98.31 -    xfree(d);
   98.32 -}
   98.33 -
   98.34 -struct vcpu *alloc_vcpu(
   98.35 -    struct domain *d, unsigned int vcpu_id, unsigned int cpu_id)
   98.36 -{
   98.37 -    struct vcpu *v;
   98.38 -
   98.39 -    BUG_ON(d->vcpu[vcpu_id] != NULL);
   98.40 -
   98.41 -    if ( (v = alloc_vcpu_struct(d, vcpu_id)) == NULL )
   98.42 -        return NULL;
   98.43 -
   98.44 -    v->domain = d;
   98.45 -    v->vcpu_id = vcpu_id;
   98.46 -    v->processor = cpu_id;
   98.47 -    atomic_set(&v->pausecnt, 0);
   98.48 -    v->vcpu_info = &d->shared_info->vcpu_info[vcpu_id];
   98.49 -
   98.50 -    v->cpu_affinity = is_idle_domain(d) ?
   98.51 -        cpumask_of_cpu(cpu_id) : CPU_MASK_ALL;
   98.52 -
   98.53 -    v->runstate.state = is_idle_vcpu(v) ? RUNSTATE_running : RUNSTATE_offline;
   98.54 -    v->runstate.state_entry_time = NOW();
   98.55 -
   98.56 -    if ( (vcpu_id != 0) && !is_idle_domain(d) )
   98.57 -        set_bit(_VCPUF_down, &v->vcpu_flags);
   98.58 -
   98.59 -    if ( SCHED_OP(alloc_task, v) < 0 )
   98.60 -    {
   98.61 -        free_vcpu_struct(v);
   98.62 -        return NULL;
   98.63 -    }
   98.64 -
   98.65 -    d->vcpu[vcpu_id] = v;
   98.66 -    if ( vcpu_id != 0 )
   98.67 -        d->vcpu[v->vcpu_id-1]->next_in_list = v;
   98.68 -
   98.69 -    sched_add_domain(v);
   98.70 -
   98.71 -    return v;
   98.72 -}
   98.73 -
   98.74 -void sched_add_domain(struct vcpu *v) 
   98.75 +int sched_init_vcpu(struct vcpu *v) 
   98.76  {
   98.77      /* Initialise the per-domain timers. */
   98.78      init_timer(&v->timer, vcpu_timer_fn, v, v->processor);
   98.79 @@ -179,17 +112,23 @@ void sched_add_domain(struct vcpu *v)
   98.80          set_bit(_VCPUF_running, &v->vcpu_flags);
   98.81      }
   98.82  
   98.83 -    SCHED_OP(add_task, v);
   98.84      TRACE_2D(TRC_SCHED_DOM_ADD, v->domain->domain_id, v->vcpu_id);
   98.85 +
   98.86 +    return SCHED_OP(init_vcpu, v);
   98.87  }
   98.88  
   98.89 -void sched_rem_domain(struct vcpu *v) 
   98.90 +void sched_destroy_domain(struct domain *d)
   98.91  {
   98.92 -    kill_timer(&v->timer);
   98.93 -    kill_timer(&v->poll_timer);
   98.94 +    struct vcpu *v;
   98.95  
   98.96 -    SCHED_OP(rem_task, v);
   98.97 -    TRACE_2D(TRC_SCHED_DOM_REM, v->domain->domain_id, v->vcpu_id);
   98.98 +    for_each_vcpu ( d, v )
   98.99 +    {
  98.100 +        kill_timer(&v->timer);
  98.101 +        kill_timer(&v->poll_timer);
  98.102 +        TRACE_2D(TRC_SCHED_DOM_REM, v->domain->domain_id, v->vcpu_id);
  98.103 +    }
  98.104 +
  98.105 +    SCHED_OP(destroy_domain, d);
  98.106  }
  98.107  
  98.108  void vcpu_sleep_nosync(struct vcpu *v)
  98.109 @@ -663,7 +602,7 @@ static void poll_timer_fn(void *data)
  98.110  /* Initialise the data structures. */
  98.111  void __init scheduler_init(void)
  98.112  {
  98.113 -    int i, rc;
  98.114 +    int i;
  98.115  
  98.116      open_softirq(SCHEDULE_SOFTIRQ, __enter_scheduler);
  98.117  
  98.118 @@ -686,17 +625,6 @@ void __init scheduler_init(void)
  98.119  
  98.120      printk("Using scheduler: %s (%s)\n", ops.name, ops.opt_name);
  98.121      SCHED_OP(init);
  98.122 -
  98.123 -    if ( idle_vcpu[0] != NULL )
  98.124 -    {
  98.125 -        schedule_data[0].curr = idle_vcpu[0];
  98.126 -        schedule_data[0].idle = idle_vcpu[0];
  98.127 -
  98.128 -        rc = SCHED_OP(alloc_task, idle_vcpu[0]);
  98.129 -        BUG_ON(rc < 0);
  98.130 -
  98.131 -        sched_add_domain(idle_vcpu[0]);
  98.132 -    }
  98.133  }
  98.134  
  98.135  /*
    99.1 --- a/xen/common/trace.c	Wed Jun 07 11:03:15 2006 +0100
    99.2 +++ b/xen/common/trace.c	Wed Jun 07 11:03:51 2006 +0100
    99.3 @@ -173,25 +173,17 @@ void init_trace_bufs(void)
    99.4   */
    99.5  int tb_control(dom0_tbufcontrol_t *tbc)
    99.6  {
    99.7 -    static spinlock_t lock = SPIN_LOCK_UNLOCKED;
    99.8 +    static DEFINE_SPINLOCK(lock);
    99.9      int rc = 0;
   99.10  
   99.11      spin_lock(&lock);
   99.12  
   99.13 -    if ( !tb_init_done &&
   99.14 -         (tbc->op != DOM0_TBUF_SET_SIZE) &&
   99.15 -         (tbc->op != DOM0_TBUF_ENABLE) )
   99.16 -    {
   99.17 -        spin_unlock(&lock);
   99.18 -        return -EINVAL;
   99.19 -    }
   99.20 -
   99.21      switch ( tbc->op )
   99.22      {
   99.23      case DOM0_TBUF_GET_INFO:
   99.24          tbc->cpu_mask   = tb_cpu_mask;
   99.25          tbc->evt_mask   = tb_event_mask;
   99.26 -        tbc->buffer_mfn = __pa(t_bufs[0]) >> PAGE_SHIFT;
   99.27 +        tbc->buffer_mfn = opt_tbuf_size ? virt_to_mfn(t_bufs[0]) : 0UL;
   99.28          tbc->size       = opt_tbuf_size * PAGE_SIZE;
   99.29          break;
   99.30      case DOM0_TBUF_SET_CPU_MASK:
   100.1 --- a/xen/common/xmalloc.c	Wed Jun 07 11:03:15 2006 +0100
   100.2 +++ b/xen/common/xmalloc.c	Wed Jun 07 11:03:51 2006 +0100
   100.3 @@ -35,7 +35,7 @@
   100.4  #include <xen/prefetch.h>
   100.5  
   100.6  static LIST_HEAD(freelist);
   100.7 -static spinlock_t freelist_lock = SPIN_LOCK_UNLOCKED;
   100.8 +static DEFINE_SPINLOCK(freelist_lock);
   100.9  
  100.10  struct xmalloc_hdr
  100.11  {
   101.1 --- a/xen/drivers/char/console.c	Wed Jun 07 11:03:15 2006 +0100
   101.2 +++ b/xen/drivers/char/console.c	Wed Jun 07 11:03:51 2006 +0100
   101.3 @@ -53,7 +53,7 @@ static char printk_prefix[16] = "";
   101.4  static int sercon_handle = -1;
   101.5  static int vgacon_enabled = 0;
   101.6  
   101.7 -spinlock_t console_lock = SPIN_LOCK_UNLOCKED;
   101.8 +static DEFINE_SPINLOCK(console_lock);
   101.9  
  101.10  /*
  101.11   * *******************************************************
  101.12 @@ -563,7 +563,7 @@ static char        *debugtrace_buf; /* D
  101.13  static unsigned int debugtrace_prd; /* Producer index     */
  101.14  static unsigned int debugtrace_kilobytes = 128, debugtrace_bytes;
  101.15  static unsigned int debugtrace_used;
  101.16 -static spinlock_t   debugtrace_lock = SPIN_LOCK_UNLOCKED;
  101.17 +static DEFINE_SPINLOCK(debugtrace_lock);
  101.18  integer_param("debugtrace", debugtrace_kilobytes);
  101.19  
  101.20  void debugtrace_dump(void)
  101.21 @@ -675,7 +675,7 @@ void panic(const char *fmt, ...)
  101.22      va_list args;
  101.23      char buf[128];
  101.24      unsigned long flags;
  101.25 -    static spinlock_t lock = SPIN_LOCK_UNLOCKED;
  101.26 +    static DEFINE_SPINLOCK(lock);
  101.27      extern void machine_restart(char *);
  101.28      
  101.29      debugtrace_dump();
   102.1 --- a/xen/include/asm-x86/page.h	Wed Jun 07 11:03:15 2006 +0100
   102.2 +++ b/xen/include/asm-x86/page.h	Wed Jun 07 11:03:51 2006 +0100
   102.3 @@ -172,10 +172,13 @@ typedef struct { u32 pfn; } pagetable_t;
   102.4  /* x86_64 */
   102.5  typedef struct { u64 pfn; } pagetable_t;
   102.6  #endif
   102.7 -#define pagetable_get_paddr(x) ((paddr_t)(x).pfn << PAGE_SHIFT)
   102.8 -#define pagetable_get_pfn(x)   ((x).pfn)
   102.9 -#define mk_pagetable(pa)       \
  102.10 -    ({ pagetable_t __p; __p.pfn = (pa) >> PAGE_SHIFT; __p; })
  102.11 +#define pagetable_get_paddr(x)  ((paddr_t)(x).pfn << PAGE_SHIFT)
  102.12 +#define pagetable_get_pfn(x)    ((x).pfn)
  102.13 +#define pagetable_is_null(x)    ((x).pfn == 0)
  102.14 +#define pagetable_from_pfn(pfn) ((pagetable_t) { (pfn) })
  102.15 +#define pagetable_from_page(pg) pagetable_from_pfn(page_to_mfn(pg))
  102.16 +#define pagetable_from_paddr(p) pagetable_from_pfn((p)>>PAGE_SHIFT)
  102.17 +#define pagetable_null()        pagetable_from_pfn(0)
  102.18  #endif
  102.19  
  102.20  #define clear_page(_p)      memset((void *)(_p), 0, PAGE_SIZE)
   103.1 --- a/xen/include/public/arch-ia64.h	Wed Jun 07 11:03:15 2006 +0100
   103.2 +++ b/xen/include/public/arch-ia64.h	Wed Jun 07 11:03:51 2006 +0100
   103.3 @@ -26,6 +26,9 @@ DEFINE_XEN_GUEST_HANDLE(char);
   103.4  DEFINE_XEN_GUEST_HANDLE(int);
   103.5  DEFINE_XEN_GUEST_HANDLE(long);
   103.6  DEFINE_XEN_GUEST_HANDLE(void);
   103.7 +
   103.8 +typedef unsigned long xen_pfn_t;
   103.9 +DEFINE_XEN_GUEST_HANDLE(xen_pfn_t);
  103.10  #endif
  103.11  
  103.12  /* Arch specific VIRQs definition */
   104.1 --- a/xen/include/public/arch-x86_32.h	Wed Jun 07 11:03:15 2006 +0100
   104.2 +++ b/xen/include/public/arch-x86_32.h	Wed Jun 07 11:03:51 2006 +0100
   104.3 @@ -28,6 +28,9 @@ DEFINE_XEN_GUEST_HANDLE(char);
   104.4  DEFINE_XEN_GUEST_HANDLE(int);
   104.5  DEFINE_XEN_GUEST_HANDLE(long);
   104.6  DEFINE_XEN_GUEST_HANDLE(void);
   104.7 +
   104.8 +typedef unsigned long xen_pfn_t;
   104.9 +DEFINE_XEN_GUEST_HANDLE(xen_pfn_t);
  104.10  #endif
  104.11  
  104.12  /*
  104.13 @@ -138,9 +141,17 @@ typedef uint64_t tsc_timestamp_t; /* RDT
  104.14  struct vcpu_guest_context {
  104.15      /* FPU registers come first so they can be aligned for FXSAVE/FXRSTOR. */
  104.16      struct { char x[512]; } fpu_ctxt;       /* User-level FPU registers     */
  104.17 -#define VGCF_I387_VALID (1<<0)
  104.18 -#define VGCF_HVM_GUEST  (1<<1)
  104.19 -#define VGCF_IN_KERNEL  (1<<2)
  104.20 +#define VGCF_I387_VALID                (1<<0)
  104.21 +#define VGCF_HVM_GUEST                 (1<<1)
  104.22 +#define VGCF_IN_KERNEL                 (1<<2)
  104.23 +#define _VGCF_i387_valid               0
  104.24 +#define VGCF_i387_valid                (1<<_VGCF_i387_valid)
  104.25 +#define _VGCF_hvm_guest                1
  104.26 +#define VGCF_hvm_guest                 (1<<_VGCF_hvm_guest)
  104.27 +#define _VGCF_in_kernel                2
  104.28 +#define VGCF_in_kernel                 (1<<_VGCF_in_kernel)
  104.29 +#define _VGCF_failsafe_disables_events 3
  104.30 +#define VGCF_failsafe_disables_events  (1<<_VGCF_failsafe_disables_events)
  104.31      unsigned long flags;                    /* VGCF_* flags                 */
  104.32      struct cpu_user_regs user_regs;         /* User-level CPU registers     */
  104.33      struct trap_info trap_ctxt[256];        /* Virtual IDT                  */
  104.34 @@ -158,10 +169,18 @@ struct vcpu_guest_context {
  104.35  typedef struct vcpu_guest_context vcpu_guest_context_t;
  104.36  DEFINE_XEN_GUEST_HANDLE(vcpu_guest_context_t);
  104.37  
  104.38 +/*
  104.39 + * Page-directory addresses above 4GB do not fit into architectural %cr3.
  104.40 + * When accessing %cr3, or equivalent field in vcpu_guest_context, guests
  104.41 + * must use the following accessor macros to pack/unpack valid MFNs.
  104.42 + */
  104.43 +#define xen_pfn_to_cr3(pfn) (((unsigned)(pfn) << 12) | ((unsigned)(pfn) >> 20))
  104.44 +#define xen_cr3_to_pfn(cr3) (((unsigned)(cr3) >> 12) | ((unsigned)(cr3) << 20))
  104.45 +
  104.46  struct arch_shared_info {
  104.47      unsigned long max_pfn;                  /* max pfn that appears in table */
  104.48      /* Frame containing list of mfns containing list of mfns containing p2m. */
  104.49 -    unsigned long pfn_to_mfn_frame_list_list;
  104.50 +    xen_pfn_t     pfn_to_mfn_frame_list_list;
  104.51      unsigned long nmi_reason;
  104.52  };
  104.53  typedef struct arch_shared_info arch_shared_info_t;
   105.1 --- a/xen/include/public/arch-x86_64.h	Wed Jun 07 11:03:15 2006 +0100
   105.2 +++ b/xen/include/public/arch-x86_64.h	Wed Jun 07 11:03:51 2006 +0100
   105.3 @@ -28,6 +28,9 @@ DEFINE_XEN_GUEST_HANDLE(char);
   105.4  DEFINE_XEN_GUEST_HANDLE(int);
   105.5  DEFINE_XEN_GUEST_HANDLE(long);
   105.6  DEFINE_XEN_GUEST_HANDLE(void);
   105.7 +
   105.8 +typedef unsigned long xen_pfn_t;
   105.9 +DEFINE_XEN_GUEST_HANDLE(xen_pfn_t);
  105.10  #endif
  105.11  
  105.12  /*
  105.13 @@ -211,9 +214,19 @@ typedef uint64_t tsc_timestamp_t; /* RDT
  105.14  struct vcpu_guest_context {
  105.15      /* FPU registers come first so they can be aligned for FXSAVE/FXRSTOR. */
  105.16      struct { char x[512]; } fpu_ctxt;       /* User-level FPU registers     */
  105.17 -#define VGCF_I387_VALID (1<<0)
  105.18 -#define VGCF_HVM_GUEST  (1<<1)
  105.19 -#define VGCF_IN_KERNEL  (1<<2)
  105.20 +#define VGCF_I387_VALID                (1<<0)
  105.21 +#define VGCF_HVM_GUEST                 (1<<1)
  105.22 +#define VGCF_IN_KERNEL                 (1<<2)
  105.23 +#define _VGCF_i387_valid               0
  105.24 +#define VGCF_i387_valid                (1<<_VGCF_i387_valid)
  105.25 +#define _VGCF_hvm_guest                1
  105.26 +#define VGCF_hvm_guest                 (1<<_VGCF_hvm_guest)
  105.27 +#define _VGCF_in_kernel                2
  105.28 +#define VGCF_in_kernel                 (1<<_VGCF_in_kernel)
  105.29 +#define _VGCF_failsafe_disables_events 3
  105.30 +#define VGCF_failsafe_disables_events  (1<<_VGCF_failsafe_disables_events)
  105.31 +#define _VGCF_syscall_disables_events  4
  105.32 +#define VGCF_syscall_disables_events   (1<<_VGCF_syscall_disables_events)
  105.33      unsigned long flags;                    /* VGCF_* flags                 */
  105.34      struct cpu_user_regs user_regs;         /* User-level CPU registers     */
  105.35      struct trap_info trap_ctxt[256];        /* Virtual IDT                  */
  105.36 @@ -234,10 +247,13 @@ struct vcpu_guest_context {
  105.37  typedef struct vcpu_guest_context vcpu_guest_context_t;
  105.38  DEFINE_XEN_GUEST_HANDLE(vcpu_guest_context_t);
  105.39  
  105.40 +#define xen_pfn_to_cr3(pfn) ((unsigned long)(pfn) << 12)
  105.41 +#define xen_cr3_to_pfn(cr3) ((unsigned long)(cr3) >> 12)
  105.42 +
  105.43  struct arch_shared_info {
  105.44      unsigned long max_pfn;                  /* max pfn that appears in table */
  105.45      /* Frame containing list of mfns containing list of mfns containing p2m. */
  105.46 -    unsigned long pfn_to_mfn_frame_list_list;
  105.47 +    xen_pfn_t     pfn_to_mfn_frame_list_list;
  105.48      unsigned long nmi_reason;
  105.49  };
  105.50  typedef struct arch_shared_info arch_shared_info_t;
   106.1 --- a/xen/include/public/callback.h	Wed Jun 07 11:03:15 2006 +0100
   106.2 +++ b/xen/include/public/callback.h	Wed Jun 07 11:03:51 2006 +0100
   106.3 @@ -29,12 +29,20 @@
   106.4  #define CALLBACKTYPE_nmi                   4
   106.5  
   106.6  /*
   106.7 + * Disable event deliver during callback? This flag is ignored for event and
   106.8 + * NMI callbacks: event delivery is unconditionally disabled.
   106.9 + */
  106.10 +#define _CALLBACKF_mask_events             0
  106.11 +#define CALLBACKF_mask_events              (1U << _CALLBACKF_mask_events)
  106.12 +
  106.13 +/*
  106.14   * Register a callback.
  106.15   */
  106.16  #define CALLBACKOP_register                0
  106.17  struct callback_register {
  106.18 -     int type;
  106.19 -     xen_callback_t address;
  106.20 +    uint16_t type;
  106.21 +    uint16_t flags;
  106.22 +    xen_callback_t address;
  106.23  };
  106.24  typedef struct callback_register callback_register_t;
  106.25  DEFINE_XEN_GUEST_HANDLE(callback_register_t);
  106.26 @@ -47,7 +55,8 @@ DEFINE_XEN_GUEST_HANDLE(callback_registe
  106.27   */
  106.28  #define CALLBACKOP_unregister              1
  106.29  struct callback_unregister {
  106.30 -     int type;
  106.31 +    uint16_t type;
  106.32 +    uint16_t _unused;
  106.33  };
  106.34  typedef struct callback_unregister callback_unregister_t;
  106.35  DEFINE_XEN_GUEST_HANDLE(callback_unregister_t);
   107.1 --- a/xen/include/public/dom0_ops.h	Wed Jun 07 11:03:15 2006 +0100
   107.2 +++ b/xen/include/public/dom0_ops.h	Wed Jun 07 11:03:51 2006 +0100
   107.3 @@ -19,7 +19,7 @@
   107.4   * This makes sure that old versions of dom0 tools will stop working in a
   107.5   * well-defined way (rather than crashing the machine, for instance).
   107.6   */
   107.7 -#define DOM0_INTERFACE_VERSION   0x03000000
   107.8 +#define DOM0_INTERFACE_VERSION   0x03000001
   107.9  
  107.10  /************************************************************************/
  107.11  
  107.12 @@ -27,10 +27,10 @@
  107.13  struct dom0_getmemlist {
  107.14      /* IN variables. */
  107.15      domid_t       domain;
  107.16 -    unsigned long max_pfns;
  107.17 -    XEN_GUEST_HANDLE(ulong) buffer;
  107.18 +    uint64_t max_pfns;
  107.19 +    XEN_GUEST_HANDLE(xen_pfn_t) buffer;
  107.20      /* OUT variables. */
  107.21 -    unsigned long num_pfns;
  107.22 +    uint64_t num_pfns;
  107.23  };
  107.24  typedef struct dom0_getmemlist dom0_getmemlist_t;
  107.25  DEFINE_XEN_GUEST_HANDLE(dom0_getmemlist_t);
  107.26 @@ -96,9 +96,9 @@ struct dom0_getdomaininfo {
  107.27  #define DOMFLAGS_SHUTDOWNMASK 255 /* DOMFLAGS_SHUTDOWN guest-supplied code.  */
  107.28  #define DOMFLAGS_SHUTDOWNSHIFT 16
  107.29      uint32_t flags;
  107.30 -    unsigned long tot_pages;
  107.31 -    unsigned long max_pages;
  107.32 -    unsigned long shared_info_frame;       /* MFN of shared_info struct */
  107.33 +    uint64_t tot_pages;
  107.34 +    uint64_t max_pages;
  107.35 +    xen_pfn_t shared_info_frame;  /* MFN of shared_info struct */
  107.36      uint64_t cpu_time;
  107.37      uint32_t nr_online_vcpus;     /* Number of VCPUs currently online. */
  107.38      uint32_t max_vcpu_id;         /* Maximum VCPUID in use by this domain. */
  107.39 @@ -162,7 +162,7 @@ DEFINE_XEN_GUEST_HANDLE(dom0_settime_t);
  107.40  
  107.41  struct dom0_getpageframeinfo {
  107.42      /* IN variables. */
  107.43 -    unsigned long mfn;     /* Machine page frame number to query.       */
  107.44 +    xen_pfn_t mfn;         /* Machine page frame number to query.       */
  107.45      domid_t domain;        /* To which domain does the frame belong?    */
  107.46      /* OUT variables. */
  107.47      /* Is the page PINNED to a type? */
  107.48 @@ -213,7 +213,7 @@ struct dom0_tbufcontrol {
  107.49      cpumap_t      cpu_mask;
  107.50      uint32_t      evt_mask;
  107.51      /* OUT variables */
  107.52 -    unsigned long buffer_mfn;
  107.53 +    xen_pfn_t buffer_mfn;
  107.54      uint32_t size;
  107.55  };
  107.56  typedef struct dom0_tbufcontrol dom0_tbufcontrol_t;
  107.57 @@ -229,8 +229,8 @@ struct dom0_physinfo {
  107.58      uint32_t sockets_per_node;
  107.59      uint32_t nr_nodes;
  107.60      uint32_t cpu_khz;
  107.61 -    unsigned long total_pages;
  107.62 -    unsigned long free_pages;
  107.63 +    uint64_t total_pages;
  107.64 +    uint64_t free_pages;
  107.65      uint32_t hw_cap[8];
  107.66  };
  107.67  typedef struct dom0_physinfo dom0_physinfo_t;
  107.68 @@ -276,7 +276,7 @@ struct dom0_shadow_control {
  107.69      uint32_t       op;
  107.70      XEN_GUEST_HANDLE(ulong) dirty_bitmap;
  107.71      /* IN/OUT variables. */
  107.72 -    unsigned long  pages;        /* size of buffer, updated with actual size */
  107.73 +    uint64_t       pages;        /* size of buffer, updated with actual size */
  107.74      /* OUT variables. */
  107.75      struct dom0_shadow_control_stats stats;
  107.76  };
  107.77 @@ -286,8 +286,8 @@ DEFINE_XEN_GUEST_HANDLE(dom0_shadow_cont
  107.78  #define DOM0_SETDOMAINMAXMEM   28
  107.79  struct dom0_setdomainmaxmem {
  107.80      /* IN variables. */
  107.81 -    domid_t       domain;
  107.82 -    unsigned long max_memkb;
  107.83 +    domid_t  domain;
  107.84 +    uint64_t max_memkb;
  107.85  };
  107.86  typedef struct dom0_setdomainmaxmem dom0_setdomainmaxmem_t;
  107.87  DEFINE_XEN_GUEST_HANDLE(dom0_setdomainmaxmem_t);
  107.88 @@ -295,8 +295,8 @@ DEFINE_XEN_GUEST_HANDLE(dom0_setdomainma
  107.89  #define DOM0_GETPAGEFRAMEINFO2 29   /* batched interface */
  107.90  struct dom0_getpageframeinfo2 {
  107.91      /* IN variables. */
  107.92 -    domid_t        domain;
  107.93 -    unsigned long  num;
  107.94 +    domid_t  domain;
  107.95 +    uint64_t num;
  107.96      /* IN/OUT variables. */
  107.97      XEN_GUEST_HANDLE(ulong) array;
  107.98  };
  107.99 @@ -313,12 +313,12 @@ DEFINE_XEN_GUEST_HANDLE(dom0_getpagefram
 107.100  #define DOM0_ADD_MEMTYPE         31
 107.101  struct dom0_add_memtype {
 107.102      /* IN variables. */
 107.103 -    unsigned long mfn;
 107.104 -    unsigned long nr_mfns;
 107.105 -    uint32_t      type;
 107.106 +    xen_pfn_t mfn;
 107.107 +    uint64_t nr_mfns;
 107.108 +    uint32_t type;
 107.109      /* OUT variables. */
 107.110 -    uint32_t      handle;
 107.111 -    uint32_t      reg;
 107.112 +    uint32_t handle;
 107.113 +    uint32_t reg;
 107.114  };
 107.115  typedef struct dom0_add_memtype dom0_add_memtype_t;
 107.116  DEFINE_XEN_GUEST_HANDLE(dom0_add_memtype_t);
 107.117 @@ -345,8 +345,8 @@ struct dom0_read_memtype {
 107.118      /* IN variables. */
 107.119      uint32_t reg;
 107.120      /* OUT variables. */
 107.121 -    unsigned long mfn;
 107.122 -    unsigned long nr_mfns;
 107.123 +    xen_pfn_t mfn;
 107.124 +    uint64_t nr_mfns;
 107.125      uint32_t type;
 107.126  };
 107.127  typedef struct dom0_read_memtype dom0_read_memtype_t;
 107.128 @@ -499,8 +499,8 @@ DEFINE_XEN_GUEST_HANDLE(dom0_irq_permiss
 107.129  #define DOM0_IOMEM_PERMISSION 47
 107.130  struct dom0_iomem_permission {
 107.131      domid_t  domain;          /* domain to be affected */
 107.132 -    unsigned long first_mfn;  /* first page (physical page number) in range */
 107.133 -    unsigned long nr_mfns;    /* number of pages in range (>0) */
 107.134 +    xen_pfn_t first_mfn;      /* first page (physical page number) in range */
 107.135 +    uint64_t nr_mfns;         /* number of pages in range (>0) */
 107.136      uint8_t allow_access;     /* allow (!0) or deny (0) access to range? */
 107.137  };
 107.138  typedef struct dom0_iomem_permission dom0_iomem_permission_t;
 107.139 @@ -509,7 +509,7 @@ DEFINE_XEN_GUEST_HANDLE(dom0_iomem_permi
 107.140  #define DOM0_HYPERCALL_INIT   48
 107.141  struct dom0_hypercall_init {
 107.142      domid_t  domain;          /* domain to be affected */
 107.143 -    unsigned long mfn;        /* machine frame to be initialised */
 107.144 +    xen_pfn_t mfn;            /* machine frame to be initialised */
 107.145  };
 107.146  typedef struct dom0_hypercall_init dom0_hypercall_init_t;
 107.147  DEFINE_XEN_GUEST_HANDLE(dom0_hypercall_init_t);
   108.1 --- a/xen/include/public/grant_table.h	Wed Jun 07 11:03:15 2006 +0100
   108.2 +++ b/xen/include/public/grant_table.h	Wed Jun 07 11:03:51 2006 +0100
   108.3 @@ -240,7 +240,7 @@ DEFINE_XEN_GUEST_HANDLE(gnttab_dump_tabl
   108.4  #define GNTTABOP_transfer                4
   108.5  struct gnttab_transfer {
   108.6      /* IN parameters. */
   108.7 -    unsigned long mfn;
   108.8 +    xen_pfn_t     mfn;
   108.9      domid_t       domid;
  108.10      grant_ref_t   ref;
  108.11      /* OUT parameters. */
   109.1 --- a/xen/include/public/io/netif.h	Wed Jun 07 11:03:15 2006 +0100
   109.2 +++ b/xen/include/public/io/netif.h	Wed Jun 07 11:03:51 2006 +0100
   109.3 @@ -27,6 +27,10 @@
   109.4  #define _NETTXF_data_validated (1)
   109.5  #define  NETTXF_data_validated (1U<<_NETTXF_data_validated)
   109.6  
   109.7 +/* Packet continues in the request. */
   109.8 +#define _NETTXF_more_data      (2)
   109.9 +#define  NETTXF_more_data      (1U<<_NETTXF_more_data)
  109.10 +
  109.11  struct netif_tx_request {
  109.12      grant_ref_t gref;      /* Reference to buffer page */
  109.13      uint16_t offset;       /* Offset within buffer page */
   110.1 --- a/xen/include/public/io/ring.h	Wed Jun 07 11:03:15 2006 +0100
   110.2 +++ b/xen/include/public/io/ring.h	Wed Jun 07 11:03:51 2006 +0100
   110.3 @@ -151,19 +151,27 @@ typedef struct __name##_back_ring __name
   110.4  #define RING_SIZE(_r)                                                   \
   110.5      ((_r)->nr_ents)
   110.6  
   110.7 +/* Number of free requests (for use on front side only). */
   110.8 +#define RING_FREE_REQUESTS(_r)						\
   110.9 +    (RING_SIZE(_r) - ((_r)->req_prod_pvt - (_r)->rsp_cons))
  110.10 +
  110.11  /* Test if there is an empty slot available on the front ring.
  110.12   * (This is only meaningful from the front. )
  110.13   */
  110.14  #define RING_FULL(_r)                                                   \
  110.15 -    (((_r)->req_prod_pvt - (_r)->rsp_cons) == RING_SIZE(_r))
  110.16 +    (RING_FREE_REQUESTS(_r) == 0)
  110.17  
  110.18  /* Test if there are outstanding messages to be processed on a ring. */
  110.19  #define RING_HAS_UNCONSUMED_RESPONSES(_r)                               \
  110.20 -    ((_r)->rsp_cons != (_r)->sring->rsp_prod)
  110.21 +    ((_r)->sring->rsp_prod - (_r)->rsp_cons)
  110.22  
  110.23  #define RING_HAS_UNCONSUMED_REQUESTS(_r)                                \
  110.24 -    (((_r)->req_cons != (_r)->sring->req_prod) &&                       \
  110.25 -     (((_r)->req_cons - (_r)->rsp_prod_pvt) != RING_SIZE(_r)))
  110.26 +    ({									\
  110.27 +	unsigned int req = (_r)->sring->req_prod - (_r)->req_cons;	\
  110.28 +	unsigned int rsp = RING_SIZE(_r) -				\
  110.29 +			   ((_r)->req_cons - (_r)->rsp_prod_pvt);	\
  110.30 +	req < rsp ? req : rsp;						\
  110.31 +    })
  110.32  
  110.33  /* Direct access to individual ring elements, by index. */
  110.34  #define RING_GET_REQUEST(_r, _idx)                                      \
   111.1 --- a/xen/include/public/memory.h	Wed Jun 07 11:03:15 2006 +0100
   111.2 +++ b/xen/include/public/memory.h	Wed Jun 07 11:03:51 2006 +0100
   111.3 @@ -29,7 +29,7 @@ struct xen_memory_reservation {
   111.4       *   OUT: GMFN bases of extents that were allocated
   111.5       *   (NB. This command also updates the mach_to_phys translation table)
   111.6       */
   111.7 -    XEN_GUEST_HANDLE(ulong) extent_start;
   111.8 +    XEN_GUEST_HANDLE(xen_pfn_t) extent_start;
   111.9  
  111.10      /* Number of extents, and size/alignment of each (2^extent_order pages). */
  111.11      unsigned long  nr_extents;
  111.12 @@ -87,7 +87,7 @@ struct xen_machphys_mfn_list {
  111.13       * any large discontiguities in the machine address space, 2MB gaps in
  111.14       * the machphys table will be represented by an MFN base of zero.
  111.15       */
  111.16 -    XEN_GUEST_HANDLE(ulong) extent_start;
  111.17 +    XEN_GUEST_HANDLE(xen_pfn_t) extent_start;
  111.18  
  111.19      /*
  111.20       * Number of extents written to the above array. This will be smaller
  111.21 @@ -117,7 +117,7 @@ struct xen_add_to_physmap {
  111.22      unsigned long idx;
  111.23  
  111.24      /* GPFN where the source mapping page should appear. */
  111.25 -    unsigned long gpfn;
  111.26 +    xen_pfn_t     gpfn;
  111.27  };
  111.28  typedef struct xen_add_to_physmap xen_add_to_physmap_t;
  111.29  DEFINE_XEN_GUEST_HANDLE(xen_add_to_physmap_t);
  111.30 @@ -135,13 +135,13 @@ struct xen_translate_gpfn_list {
  111.31      unsigned long nr_gpfns;
  111.32  
  111.33      /* List of GPFNs to translate. */
  111.34 -    XEN_GUEST_HANDLE(ulong) gpfn_list;
  111.35 +    XEN_GUEST_HANDLE(xen_pfn_t) gpfn_list;
  111.36  
  111.37      /*
  111.38       * Output list to contain MFN translations. May be the same as the input
  111.39       * list (in which case each input GPFN is overwritten with the output MFN).
  111.40       */
  111.41 -    XEN_GUEST_HANDLE(ulong) mfn_list;
  111.42 +    XEN_GUEST_HANDLE(xen_pfn_t) mfn_list;
  111.43  };
  111.44  typedef struct xen_translate_gpfn_list xen_translate_gpfn_list_t;
  111.45  DEFINE_XEN_GUEST_HANDLE(xen_translate_gpfn_list_t);
   112.1 --- a/xen/include/public/xen.h	Wed Jun 07 11:03:15 2006 +0100
   112.2 +++ b/xen/include/public/xen.h	Wed Jun 07 11:03:51 2006 +0100
   112.3 @@ -197,7 +197,7 @@ struct mmuext_op {
   112.4      unsigned int cmd;
   112.5      union {
   112.6          /* [UN]PIN_TABLE, NEW_BASEPTR, NEW_USER_BASEPTR */
   112.7 -        unsigned long mfn;
   112.8 +        xen_pfn_t     mfn;
   112.9          /* INVLPG_LOCAL, INVLPG_ALL, SET_LDT */
  112.10          unsigned long linear_addr;
  112.11      } arg1;
  112.12 @@ -234,10 +234,24 @@ DEFINE_XEN_GUEST_HANDLE(mmuext_op_t);
  112.13   */
  112.14  #define VMASST_CMD_enable                0
  112.15  #define VMASST_CMD_disable               1
  112.16 +
  112.17 +/* x86/32 guests: simulate full 4GB segment limits. */
  112.18  #define VMASST_TYPE_4gb_segments         0
  112.19 +
  112.20 +/* x86/32 guests: trap (vector 15) whenever above vmassist is used. */
  112.21  #define VMASST_TYPE_4gb_segments_notify  1
  112.22 +
  112.23 +/*
  112.24 + * x86 guests: support writes to bottom-level PTEs.
  112.25 + * NB1. Page-directory entries cannot be written.
  112.26 + * NB2. Guest must continue to remove all writable mappings of PTEs.
  112.27 + */
  112.28  #define VMASST_TYPE_writable_pagetables  2
  112.29 -#define MAX_VMASST_TYPE 2
  112.30 +
  112.31 +/* x86/PAE guests: support PDPTs above 4GB. */
  112.32 +#define VMASST_TYPE_pae_extended_cr3     3
  112.33 +
  112.34 +#define MAX_VMASST_TYPE                  3
  112.35  
  112.36  #ifndef __ASSEMBLY__
  112.37  
  112.38 @@ -443,9 +457,9 @@ struct start_info {
  112.39      unsigned long nr_pages;     /* Total pages allocated to this domain.  */
  112.40      unsigned long shared_info;  /* MACHINE address of shared info struct. */
  112.41      uint32_t flags;             /* SIF_xxx flags.                         */
  112.42 -    unsigned long store_mfn;    /* MACHINE page number of shared page.    */
  112.43 +    xen_pfn_t store_mfn;        /* MACHINE page number of shared page.    */
  112.44      uint32_t store_evtchn;      /* Event channel for store communication. */
  112.45 -    unsigned long console_mfn;  /* MACHINE address of console page.       */
  112.46 +    xen_pfn_t console_mfn;      /* MACHINE page number of console page.   */
  112.47      uint32_t console_evtchn;    /* Event channel for console messages.    */
  112.48      /* THE FOLLOWING ARE ONLY FILLED IN ON INITIAL BOOT (NOT RESUME).     */
  112.49      unsigned long pt_base;      /* VIRTUAL address of page directory.     */
   113.1 --- a/xen/include/xen/console.h	Wed Jun 07 11:03:15 2006 +0100
   113.2 +++ b/xen/include/xen/console.h	Wed Jun 07 11:03:51 2006 +0100
   113.3 @@ -10,8 +10,6 @@
   113.4  #include <xen/spinlock.h>
   113.5  #include <xen/guest_access.h>
   113.6  
   113.7 -extern spinlock_t console_lock;
   113.8 -
   113.9  void set_printk_prefix(const char *prefix);
  113.10  
  113.11  long read_console_ring(XEN_GUEST_HANDLE(char), u32 *, int);
   114.1 --- a/xen/include/xen/domain.h	Wed Jun 07 11:03:15 2006 +0100
   114.2 +++ b/xen/include/xen/domain.h	Wed Jun 07 11:03:51 2006 +0100
   114.3 @@ -2,8 +2,14 @@
   114.4  #ifndef __XEN_DOMAIN_H__
   114.5  #define __XEN_DOMAIN_H__
   114.6  
   114.7 -extern int boot_vcpu(
   114.8 +struct vcpu *alloc_vcpu(
   114.9 +    struct domain *d, unsigned int vcpu_id, unsigned int cpu_id);
  114.10 +int boot_vcpu(
  114.11      struct domain *d, int vcpuid, struct vcpu_guest_context *ctxt);
  114.12 +struct vcpu *alloc_idle_vcpu(unsigned int cpu_id);
  114.13 +
  114.14 +struct domain *alloc_domain(domid_t domid);
  114.15 +void free_domain(struct domain *d);
  114.16  
  114.17  /*
  114.18   * Arch-specifics.
  114.19 @@ -11,19 +17,18 @@ extern int boot_vcpu(
  114.20  
  114.21  struct vcpu *alloc_vcpu_struct(struct domain *d, unsigned int vcpu_id);
  114.22  
  114.23 -extern void free_vcpu_struct(struct vcpu *v);
  114.24 +void free_vcpu_struct(struct vcpu *v);
  114.25  
  114.26 -extern int arch_domain_create(struct domain *d);
  114.27 +int arch_domain_create(struct domain *d);
  114.28  
  114.29 -extern void arch_domain_destroy(struct domain *d);
  114.30 +void arch_domain_destroy(struct domain *d);
  114.31  
  114.32 -extern int arch_set_info_guest(
  114.33 -    struct vcpu *v, struct vcpu_guest_context *c);
  114.34 +int arch_set_info_guest(struct vcpu *v, struct vcpu_guest_context *c);
  114.35 +
  114.36 +void domain_relinquish_resources(struct domain *d);
  114.37  
  114.38 -extern void domain_relinquish_resources(struct domain *d);
  114.39 +void dump_pageframe_info(struct domain *d);
  114.40  
  114.41 -extern void dump_pageframe_info(struct domain *d);
  114.42 -
  114.43 -extern void arch_dump_domain_info(struct domain *d);
  114.44 +void arch_dump_domain_info(struct domain *d);
  114.45  
  114.46  #endif /* __XEN_DOMAIN_H__ */
   115.1 --- a/xen/include/xen/sched-if.h	Wed Jun 07 11:03:15 2006 +0100
   115.2 +++ b/xen/include/xen/sched-if.h	Wed Jun 07 11:03:51 2006 +0100
   115.3 @@ -60,14 +60,17 @@ struct scheduler {
   115.4  
   115.5      void         (*init)           (void);
   115.6      void         (*tick)           (unsigned int cpu);
   115.7 -    int          (*alloc_task)     (struct vcpu *);
   115.8 -    void         (*add_task)       (struct vcpu *);
   115.9 -    void         (*free_task)      (struct domain *);
  115.10 -    void         (*rem_task)       (struct vcpu *);
  115.11 +
  115.12 +    int          (*init_vcpu)      (struct vcpu *);
  115.13 +    void         (*destroy_domain) (struct domain *);
  115.14 +
  115.15      void         (*sleep)          (struct vcpu *);
  115.16      void         (*wake)           (struct vcpu *);
  115.17 +
  115.18      int          (*set_affinity)   (struct vcpu *, cpumask_t *);
  115.19 +
  115.20      struct task_slice (*do_schedule) (s_time_t);
  115.21 +
  115.22      int          (*control)        (struct sched_ctl_cmd *);
  115.23      int          (*adjdom)         (struct domain *,
  115.24                                      struct sched_adjdom_cmd *);
   116.1 --- a/xen/include/xen/sched.h	Wed Jun 07 11:03:15 2006 +0100
   116.2 +++ b/xen/include/xen/sched.h	Wed Jun 07 11:03:51 2006 +0100
   116.3 @@ -186,12 +186,6 @@ extern struct vcpu *idle_vcpu[NR_CPUS];
   116.4  #define is_idle_domain(d) ((d)->domain_id == IDLE_DOMAIN_ID)
   116.5  #define is_idle_vcpu(v)   (is_idle_domain((v)->domain))
   116.6  
   116.7 -struct vcpu *alloc_vcpu(
   116.8 -    struct domain *d, unsigned int vcpu_id, unsigned int cpu_id);
   116.9 -
  116.10 -struct domain *alloc_domain(void);
  116.11 -void free_domain(struct domain *d);
  116.12 -
  116.13  #define DOMAIN_DESTROYED (1<<31) /* assumes atomic_t is >= 32 bits */
  116.14  #define put_domain(_d) \
  116.15    if ( atomic_dec_and_test(&(_d)->refcnt) ) domain_destroy(_d)
  116.16 @@ -226,7 +220,7 @@ static inline void get_knownalive_domain
  116.17  }
  116.18  
  116.19  extern struct domain *domain_create(
  116.20 -    domid_t dom_id, unsigned int cpu);
  116.21 +    domid_t domid, unsigned int cpu);
  116.22  extern int construct_dom0(
  116.23      struct domain *d,
  116.24      unsigned long image_start, unsigned long image_len, 
  116.25 @@ -269,8 +263,8 @@ void new_thread(struct vcpu *d,
  116.26  #define set_current_state(_s) do { current->state = (_s); } while (0)
  116.27  void scheduler_init(void);
  116.28  void schedulers_start(void);
  116.29 -void sched_add_domain(struct vcpu *);
  116.30 -void sched_rem_domain(struct vcpu *);
  116.31 +int  sched_init_vcpu(struct vcpu *);
  116.32 +void sched_destroy_domain(struct domain *);
  116.33  long sched_ctl(struct sched_ctl_cmd *);
  116.34  long sched_adjdom(struct sched_adjdom_cmd *);
  116.35  int  sched_id(void);