direct-io.hg

changeset 11446:1bab7d65171b

merge with xen-unstable.hg
author awilliam@xenbuild.aw
date Fri Sep 01 13:04:02 2006 -0600 (2006-09-01)
parents 4ba098226429 3e6325b73474
children 9fed76231248
files xen/arch/powerpc/htab.c xen/include/asm-ia64/mm.h xen/include/public/arch-ia64.h
line diff
     1.1 --- a/.hgignore	Fri Sep 01 12:52:12 2006 -0600
     1.2 +++ b/.hgignore	Fri Sep 01 13:04:02 2006 -0600
     1.3 @@ -203,6 +203,8 @@
     1.4  ^xen/arch/powerpc/firmware$
     1.5  ^xen/arch/powerpc/firmware_image$
     1.6  ^xen/arch/powerpc/xen\.lds$
     1.7 +^xen/arch/powerpc/.xen-syms$
     1.8 +^xen/arch/powerpc/xen-syms.S$
     1.9  ^unmodified_drivers/linux-2.6/\.tmp_versions
    1.10  ^unmodified_drivers/linux-2.6/.*\.cmd$
    1.11  ^unmodified_drivers/linux-2.6/.*\.ko$
     2.1 --- a/extras/mini-os/Makefile	Fri Sep 01 12:52:12 2006 -0600
     2.2 +++ b/extras/mini-os/Makefile	Fri Sep 01 13:04:02 2006 -0600
     2.3 @@ -7,9 +7,12 @@ include $(XEN_ROOT)/Config.mk
     2.4  # Set TARGET_ARCH
     2.5  override TARGET_ARCH     := $(XEN_TARGET_ARCH)
     2.6  
     2.7 +XEN_INTERFACE_VERSION := 0x00030203
     2.8 +
     2.9  # NB. '-Wcast-qual' is nasty, so I omitted it.
    2.10  CFLAGS := -fno-builtin -Wall -Werror -Wredundant-decls -Wno-format
    2.11  CFLAGS += -Wstrict-prototypes -Wnested-externs -Wpointer-arith -Winline
    2.12 +CFLAGS += -D__XEN_INTERFACE_VERSION__=$(XEN_INTERFACE_VERSION)
    2.13  
    2.14  ASFLAGS = -D__ASSEMBLY__
    2.15  
     3.1 --- a/linux-2.6-xen-sparse/arch/i386/kernel/setup-xen.c	Fri Sep 01 12:52:12 2006 -0600
     3.2 +++ b/linux-2.6-xen-sparse/arch/i386/kernel/setup-xen.c	Fri Sep 01 13:04:02 2006 -0600
     3.3 @@ -1380,8 +1380,10 @@ legacy_init_iomem_resources(struct e820e
     3.4  			 *  so we try it repeatedly and let the resource manager
     3.5  			 *  test it.
     3.6  			 */
     3.7 +#ifndef CONFIG_XEN
     3.8  			request_resource(res, code_resource);
     3.9  			request_resource(res, data_resource);
    3.10 +#endif
    3.11  #ifdef CONFIG_KEXEC
    3.12  			request_resource(res, &crashk_res);
    3.13  #endif
    3.14 @@ -1454,11 +1456,8 @@ static void __init register_memory(void)
    3.15  	int	      i;
    3.16  
    3.17  	/* Nothing to do if not running in dom0. */
    3.18 -	if (!is_initial_xendomain()) {
    3.19 -		legacy_init_iomem_resources(e820.map, e820.nr_map,
    3.20 -					    &code_resource, &data_resource);
    3.21 +	if (!is_initial_xendomain())
    3.22  		return;
    3.23 -	}
    3.24  
    3.25  #ifdef CONFIG_XEN
    3.26  	machine_e820 = alloc_bootmem_low_pages(PAGE_SIZE);
     4.1 --- a/linux-2.6-xen-sparse/arch/i386/mm/ioremap-xen.c	Fri Sep 01 12:52:12 2006 -0600
     4.2 +++ b/linux-2.6-xen-sparse/arch/i386/mm/ioremap-xen.c	Fri Sep 01 13:04:02 2006 -0600
     4.3 @@ -22,15 +22,6 @@
     4.4  #define ISA_START_ADDRESS	0x0
     4.5  #define ISA_END_ADDRESS		0x100000
     4.6  
     4.7 -#if 0 /* not PAE safe */
     4.8 -/* These hacky macros avoid phys->machine translations. */
     4.9 -#define __direct_pte(x) ((pte_t) { (x) } )
    4.10 -#define __direct_mk_pte(page_nr,pgprot) \
    4.11 -  __direct_pte(((page_nr) << PAGE_SHIFT) | pgprot_val(pgprot))
    4.12 -#define direct_mk_pte_phys(physpage, pgprot) \
    4.13 -  __direct_mk_pte((physpage) >> PAGE_SHIFT, pgprot)
    4.14 -#endif
    4.15 -
    4.16  static int direct_remap_area_pte_fn(pte_t *pte, 
    4.17  				    struct page *pmd_page,
    4.18  				    unsigned long address, 
    4.19 @@ -66,17 +57,16 @@ static int __direct_remap_pfn_range(stru
    4.20  
    4.21  	for (i = 0; i < size; i += PAGE_SIZE) {
    4.22  		if ((v - u) == (PAGE_SIZE / sizeof(mmu_update_t))) {
    4.23 -			/* Fill in the PTE pointers. */
    4.24 +			/* Flush a full batch after filling in the PTE ptrs. */
    4.25  			rc = apply_to_page_range(mm, start_address, 
    4.26  						 address - start_address,
    4.27  						 direct_remap_area_pte_fn, &w);
    4.28  			if (rc)
    4.29  				goto out;
    4.30 -			w = u;
    4.31  			rc = -EFAULT;
    4.32  			if (HYPERVISOR_mmu_update(u, v - u, NULL, domid) < 0)
    4.33  				goto out;
    4.34 -			v = u;
    4.35 +			v = w = u;
    4.36  			start_address = address;
    4.37  		}
    4.38  
    4.39 @@ -92,7 +82,7 @@ static int __direct_remap_pfn_range(stru
    4.40  	}
    4.41  
    4.42  	if (v != u) {
    4.43 -		/* get the ptep's filled in */
    4.44 +		/* Final batch. */
    4.45  		rc = apply_to_page_range(mm, start_address,
    4.46  					 address - start_address,
    4.47  					 direct_remap_area_pte_fn, &w);
    4.48 @@ -179,32 +169,6 @@ int touch_pte_range(struct mm_struct *mm
    4.49  
    4.50  EXPORT_SYMBOL(touch_pte_range);
    4.51  
    4.52 -void *vm_map_xen_pages (unsigned long maddr, int vm_size, pgprot_t prot)
    4.53 -{
    4.54 -	int error;
    4.55 -       
    4.56 -	struct vm_struct *vma;
    4.57 -	vma = get_vm_area (vm_size, VM_IOREMAP);
    4.58 -      
    4.59 -	if (vma == NULL) {
    4.60 -		printk ("ioremap.c,vm_map_xen_pages(): "
    4.61 -			"Failed to get VMA area\n");
    4.62 -		return NULL;
    4.63 -	}
    4.64 -
    4.65 -	error = direct_kernel_remap_pfn_range((unsigned long) vma->addr,
    4.66 -					      maddr >> PAGE_SHIFT, vm_size,
    4.67 -					      prot, DOMID_SELF );
    4.68 -	if (error == 0) {
    4.69 -		return vma->addr;
    4.70 -	} else {
    4.71 -		printk ("ioremap.c,vm_map_xen_pages(): "
    4.72 -			"Failed to map xen shared pages into kernel space\n");
    4.73 -		return NULL;
    4.74 -	}
    4.75 -}
    4.76 -EXPORT_SYMBOL(vm_map_xen_pages);
    4.77 -
    4.78  /*
    4.79   * Does @address reside within a non-highmem page that is local to this virtual
    4.80   * machine (i.e., not an I/O page, nor a memory page belonging to another VM).
     5.1 --- a/linux-2.6-xen-sparse/arch/i386/oprofile/xenoprof.c	Fri Sep 01 12:52:12 2006 -0600
     5.2 +++ b/linux-2.6-xen-sparse/arch/i386/oprofile/xenoprof.c	Fri Sep 01 13:04:02 2006 -0600
     5.3 @@ -26,6 +26,7 @@
     5.4  #include <xen/evtchn.h>
     5.5  #include "op_counter.h"
     5.6  
     5.7 +#include <xen/driver_util.h>
     5.8  #include <xen/interface/xen.h>
     5.9  #include <xen/interface/xenoprof.h>
    5.10  #include <../../../drivers/oprofile/cpu_buffer.h>
    5.11 @@ -34,8 +35,6 @@
    5.12  static int xenoprof_start(void);
    5.13  static void xenoprof_stop(void);
    5.14  
    5.15 -void * vm_map_xen_pages(unsigned long maddr, int vm_size, pgprot_t prot);
    5.16 -
    5.17  static int xenoprof_enabled = 0;
    5.18  static unsigned int num_events = 0;
    5.19  static int is_primary = 0;
    5.20 @@ -373,9 +372,9 @@ static int xenoprof_set_passive(int * p_
    5.21  {
    5.22  	int ret;
    5.23  	int i, j;
    5.24 -	int vm_size;
    5.25  	int npages;
    5.26  	struct xenoprof_buf *buf;
    5.27 +	struct vm_struct *area;
    5.28  	pgprot_t prot = __pgprot(_KERNPG_TABLE);
    5.29  
    5.30  	if (!is_primary)
    5.31 @@ -391,20 +390,30 @@ static int xenoprof_set_passive(int * p_
    5.32  	for (i = 0; i < pdoms; i++) {
    5.33  		passive_domains[i].domain_id = p_domains[i];
    5.34  		passive_domains[i].max_samples = 2048;
    5.35 -		ret = HYPERVISOR_xenoprof_op(XENOPROF_set_passive, &passive_domains[i]);
    5.36 +		ret = HYPERVISOR_xenoprof_op(XENOPROF_set_passive,
    5.37 +					     &passive_domains[i]);
    5.38  		if (ret)
    5.39 -			return ret;
    5.40 +			goto out;
    5.41  
    5.42  		npages = (passive_domains[i].bufsize * passive_domains[i].nbuf - 1) / PAGE_SIZE + 1;
    5.43 -		vm_size = npages * PAGE_SIZE;
    5.44  
    5.45 -		p_shared_buffer[i] = (char *)vm_map_xen_pages(passive_domains[i].buf_maddr,
    5.46 -							      vm_size, prot);
    5.47 -		if (!p_shared_buffer[i]) {
    5.48 +		area = alloc_vm_area(npages * PAGE_SIZE);
    5.49 +		if (area == NULL) {
    5.50  			ret = -ENOMEM;
    5.51  			goto out;
    5.52  		}
    5.53  
    5.54 +		ret = direct_kernel_remap_pfn_range(
    5.55 +			(unsigned long)area->addr,
    5.56 +			passive_domains[i].buf_maddr >> PAGE_SHIFT,
    5.57 +			npages * PAGE_SIZE, prot, DOMID_SELF);
    5.58 +		if (ret) {
    5.59 +			vunmap(area->addr);
    5.60 +			goto out;
    5.61 +		}
    5.62 +
    5.63 +		p_shared_buffer[i] = area->addr;
    5.64 +
    5.65  		for (j = 0; j < passive_domains[i].nbuf; j++) {
    5.66  			buf = (struct xenoprof_buf *)
    5.67  				&p_shared_buffer[i][j * passive_domains[i].bufsize];
    5.68 @@ -473,11 +482,9 @@ static int using_xenoprof;
    5.69  int __init oprofile_arch_init(struct oprofile_operations * ops)
    5.70  {
    5.71  	struct xenoprof_init init;
    5.72 -	struct xenoprof_buf * buf;
    5.73 -	int vm_size;
    5.74 -	int npages;
    5.75 -	int ret;
    5.76 -	int i;
    5.77 +	struct xenoprof_buf *buf;
    5.78 +	int npages, ret, i;
    5.79 +	struct vm_struct *area;
    5.80  
    5.81  	init.max_samples = 16;
    5.82  	ret = HYPERVISOR_xenoprof_op(XENOPROF_init, &init);
    5.83 @@ -495,15 +502,24 @@ int __init oprofile_arch_init(struct opr
    5.84  			num_events = OP_MAX_COUNTER;
    5.85  
    5.86  		npages = (init.bufsize * nbuf - 1) / PAGE_SIZE + 1;
    5.87 -		vm_size = npages * PAGE_SIZE;
    5.88  
    5.89 -		shared_buffer = (char *)vm_map_xen_pages(init.buf_maddr,
    5.90 -							 vm_size, prot);
    5.91 -		if (!shared_buffer) {
    5.92 +		area = alloc_vm_area(npages * PAGE_SIZE);
    5.93 +		if (area == NULL) {
    5.94  			ret = -ENOMEM;
    5.95  			goto out;
    5.96  		}
    5.97  
    5.98 +		ret = direct_kernel_remap_pfn_range(
    5.99 +			(unsigned long)area->addr,
   5.100 +			init.buf_maddr >> PAGE_SHIFT,
   5.101 +			npages * PAGE_SIZE, prot, DOMID_SELF);
   5.102 +		if (ret) {
   5.103 +			vunmap(area->addr);
   5.104 +			goto out;
   5.105 +		}
   5.106 +
   5.107 +		shared_buffer = area->addr;
   5.108 +
   5.109  		for (i=0; i< nbuf; i++) {
   5.110  			buf = (struct xenoprof_buf*) 
   5.111  				&shared_buffer[i * init.bufsize];
     6.1 --- a/linux-2.6-xen-sparse/arch/x86_64/kernel/e820-xen.c	Fri Sep 01 12:52:12 2006 -0600
     6.2 +++ b/linux-2.6-xen-sparse/arch/x86_64/kernel/e820-xen.c	Fri Sep 01 13:04:02 2006 -0600
     6.3 @@ -255,8 +255,10 @@ void __init e820_reserve_resources(struc
     6.4  			 *  so we try it repeatedly and let the resource manager
     6.5  			 *  test it.
     6.6  			 */
     6.7 +#ifndef CONFIG_XEN
     6.8  			request_resource(res, &code_resource);
     6.9  			request_resource(res, &data_resource);
    6.10 +#endif
    6.11  #ifdef CONFIG_KEXEC
    6.12  			request_resource(res, &crashk_res);
    6.13  #endif
     7.1 --- a/linux-2.6-xen-sparse/arch/x86_64/kernel/setup-xen.c	Fri Sep 01 12:52:12 2006 -0600
     7.2 +++ b/linux-2.6-xen-sparse/arch/x86_64/kernel/setup-xen.c	Fri Sep 01 13:04:02 2006 -0600
     7.3 @@ -846,7 +846,7 @@ void __init setup_arch(char **cmdline_p)
     7.4  
     7.5  		if (!xen_feature(XENFEAT_auto_translated_physmap)) {
     7.6  			/* Make sure we have a large enough P->M table. */
     7.7 -			phys_to_machine_mapping = alloc_bootmem(
     7.8 +			phys_to_machine_mapping = alloc_bootmem_pages(
     7.9  				end_pfn * sizeof(unsigned long));
    7.10  			memset(phys_to_machine_mapping, ~0,
    7.11  			       end_pfn * sizeof(unsigned long));
    7.12 @@ -863,7 +863,7 @@ void __init setup_arch(char **cmdline_p)
    7.13  			 * list of frames that make up the p2m table. Used by
    7.14                           * save/restore.
    7.15  			 */
    7.16 -			pfn_to_mfn_frame_list_list = alloc_bootmem(PAGE_SIZE);
    7.17 +			pfn_to_mfn_frame_list_list = alloc_bootmem_pages(PAGE_SIZE);
    7.18  			HYPERVISOR_shared_info->arch.pfn_to_mfn_frame_list_list =
    7.19  				virt_to_mfn(pfn_to_mfn_frame_list_list);
    7.20  
    7.21 @@ -873,7 +873,7 @@ void __init setup_arch(char **cmdline_p)
    7.22  					k++;
    7.23  					BUG_ON(k>=fpp);
    7.24  					pfn_to_mfn_frame_list[k] =
    7.25 -						alloc_bootmem(PAGE_SIZE);
    7.26 +						alloc_bootmem_pages(PAGE_SIZE);
    7.27  					pfn_to_mfn_frame_list_list[k] =
    7.28  						virt_to_mfn(pfn_to_mfn_frame_list[k]);
    7.29  					j=0;
    7.30 @@ -944,9 +944,10 @@ void __init setup_arch(char **cmdline_p)
    7.31  		BUG_ON(HYPERVISOR_memory_op(XENMEM_machine_memory_map, &memmap));
    7.32  
    7.33  		e820_reserve_resources(machine_e820, memmap.nr_entries);
    7.34 -	} else
    7.35 +	}
    7.36 +#else
    7.37 +	e820_reserve_resources(e820.map, e820.nr_map);
    7.38  #endif
    7.39 -	e820_reserve_resources(e820.map, e820.nr_map);
    7.40  
    7.41  	request_resource(&iomem_resource, &video_ram_resource);
    7.42  
     8.1 --- a/linux-2.6-xen-sparse/drivers/xen/blkback/xenbus.c	Fri Sep 01 12:52:12 2006 -0600
     8.2 +++ b/linux-2.6-xen-sparse/drivers/xen/blkback/xenbus.c	Fri Sep 01 13:04:02 2006 -0600
     8.3 @@ -301,11 +301,11 @@ static void frontend_changed(struct xenb
     8.4  	struct backend_info *be = dev->dev.driver_data;
     8.5  	int err;
     8.6  
     8.7 -	DPRINTK("");
     8.8 +	DPRINTK("%s", xenbus_strstate(frontend_state));
     8.9  
    8.10  	switch (frontend_state) {
    8.11  	case XenbusStateInitialising:
    8.12 -		if (dev->state == XenbusStateClosing) {
    8.13 +		if (dev->state == XenbusStateClosed) {
    8.14  			printk("%s: %s: prepare for reconnect\n",
    8.15  			       __FUNCTION__, dev->nodename);
    8.16  			xenbus_switch_state(dev, XenbusStateInitWait);
    8.17 @@ -331,8 +331,12 @@ static void frontend_changed(struct xenb
    8.18  		xenbus_switch_state(dev, XenbusStateClosing);
    8.19  		break;
    8.20  
    8.21 +	case XenbusStateClosed:
    8.22 +		xenbus_switch_state(dev, XenbusStateClosed);
    8.23 +		if (xenbus_dev_is_online(dev))
    8.24 +			break;
    8.25 +		/* fall through if not online */
    8.26  	case XenbusStateUnknown:
    8.27 -	case XenbusStateClosed:
    8.28  		device_unregister(&dev->dev);
    8.29  		break;
    8.30  
     9.1 --- a/linux-2.6-xen-sparse/drivers/xen/blkfront/blkfront.c	Fri Sep 01 12:52:12 2006 -0600
     9.2 +++ b/linux-2.6-xen-sparse/drivers/xen/blkfront/blkfront.c	Fri Sep 01 13:04:02 2006 -0600
     9.3 @@ -273,7 +273,7 @@ static void backend_changed(struct xenbu
     9.4  			xenbus_dev_fatal(dev, -ENODEV, "bdget failed");
     9.5  
     9.6  		down(&bd->bd_sem);
     9.7 -		if (info->users > 0)
     9.8 +		if (info->users > 0 && system_state == SYSTEM_RUNNING)
     9.9  			xenbus_dev_error(dev, -EBUSY,
    9.10  					 "Device in use; refusing to close");
    9.11  		else
    9.12 @@ -360,7 +360,7 @@ static void blkfront_closing(struct xenb
    9.13  
    9.14  	xlvbd_del(info);
    9.15  
    9.16 -	xenbus_switch_state(dev, XenbusStateClosed);
    9.17 +	xenbus_frontend_closed(dev);
    9.18  }
    9.19  
    9.20  
    10.1 --- a/linux-2.6-xen-sparse/drivers/xen/blktap/blktap.c	Fri Sep 01 12:52:12 2006 -0600
    10.2 +++ b/linux-2.6-xen-sparse/drivers/xen/blktap/blktap.c	Fri Sep 01 13:04:02 2006 -0600
    10.3 @@ -114,8 +114,8 @@ typedef struct domid_translate {
    10.4  } domid_translate_t ;
    10.5  
    10.6  
    10.7 -domid_translate_t  translate_domid[MAX_TAP_DEV];
    10.8 -tap_blkif_t *tapfds[MAX_TAP_DEV];
    10.9 +static domid_translate_t  translate_domid[MAX_TAP_DEV];
   10.10 +static tap_blkif_t *tapfds[MAX_TAP_DEV];
   10.11  
   10.12  static int __init set_blkif_reqs(char *str)
   10.13  {
   10.14 @@ -1118,7 +1118,7 @@ static int do_block_io_op(blkif_t *blkif
   10.15  			       "ring does not exist!\n");
   10.16  			print_dbug = 0; /*We only print this message once*/
   10.17  		}
   10.18 -		return 1;
   10.19 +		return 0;
   10.20  	}
   10.21  
   10.22  	info = tapfds[blkif->dev_num];
   10.23 @@ -1127,7 +1127,7 @@ static int do_block_io_op(blkif_t *blkif
   10.24  			WPRINTK("Can't get UE info!\n");
   10.25  			print_dbug = 0;
   10.26  		}
   10.27 -		return 1;
   10.28 +		return 0;
   10.29  	}
   10.30  
   10.31  	while (rc != rp) {
    11.1 --- a/linux-2.6-xen-sparse/drivers/xen/blktap/common.h	Fri Sep 01 12:52:12 2006 -0600
    11.2 +++ b/linux-2.6-xen-sparse/drivers/xen/blktap/common.h	Fri Sep 01 13:04:02 2006 -0600
    11.3 @@ -91,6 +91,7 @@ blkif_t *tap_alloc_blkif(domid_t domid);
    11.4  void tap_blkif_free(blkif_t *blkif);
    11.5  int tap_blkif_map(blkif_t *blkif, unsigned long shared_page, 
    11.6  		  unsigned int evtchn);
    11.7 +void tap_blkif_unmap(blkif_t *blkif);
    11.8  
    11.9  #define blkif_get(_b) (atomic_inc(&(_b)->refcnt))
   11.10  #define blkif_put(_b)					\
    12.1 --- a/linux-2.6-xen-sparse/drivers/xen/blktap/interface.c	Fri Sep 01 12:52:12 2006 -0600
    12.2 +++ b/linux-2.6-xen-sparse/drivers/xen/blktap/interface.c	Fri Sep 01 13:04:02 2006 -0600
    12.3 @@ -135,20 +135,25 @@ int tap_blkif_map(blkif_t *blkif, unsign
    12.4  	return 0;
    12.5  }
    12.6  
    12.7 +void tap_blkif_unmap(blkif_t *blkif)
    12.8 +{
    12.9 +	if (blkif->irq) {
   12.10 +		unbind_from_irqhandler(blkif->irq, blkif);
   12.11 +		blkif->irq = 0;
   12.12 +	}
   12.13 +	if (blkif->blk_ring.sring) {
   12.14 +		unmap_frontend_page(blkif);
   12.15 +		free_vm_area(blkif->blk_ring_area);
   12.16 +		blkif->blk_ring.sring = NULL;
   12.17 +	}
   12.18 +}
   12.19 +
   12.20  void tap_blkif_free(blkif_t *blkif)
   12.21  {
   12.22  	atomic_dec(&blkif->refcnt);
   12.23  	wait_event(blkif->waiting_to_free, atomic_read(&blkif->refcnt) == 0);
   12.24  
   12.25 -	/* Already disconnected? */
   12.26 -	if (blkif->irq)
   12.27 -		unbind_from_irqhandler(blkif->irq, blkif);
   12.28 -
   12.29 -	if (blkif->blk_ring.sring) {
   12.30 -		unmap_frontend_page(blkif);
   12.31 -		free_vm_area(blkif->blk_ring_area);
   12.32 -	}
   12.33 -
   12.34 +	tap_blkif_unmap(blkif);
   12.35  	kmem_cache_free(blkif_cachep, blkif);
   12.36  }
   12.37  
    13.1 --- a/linux-2.6-xen-sparse/drivers/xen/blktap/xenbus.c	Fri Sep 01 12:52:12 2006 -0600
    13.2 +++ b/linux-2.6-xen-sparse/drivers/xen/blktap/xenbus.c	Fri Sep 01 13:04:02 2006 -0600
    13.3 @@ -247,6 +247,11 @@ static void tap_frontend_changed(struct 
    13.4  
    13.5  	switch (frontend_state) {
    13.6  	case XenbusStateInitialising:
    13.7 +		if (dev->state == XenbusStateClosed) {
    13.8 +			printk("%s: %s: prepare for reconnect\n",
    13.9 +			       __FUNCTION__, dev->nodename);
   13.10 +			xenbus_switch_state(dev, XenbusStateInitWait);
   13.11 +		}
   13.12  		break;
   13.13  
   13.14  	case XenbusStateInitialised:
   13.15 @@ -264,11 +269,20 @@ static void tap_frontend_changed(struct 
   13.16  		break;
   13.17  
   13.18  	case XenbusStateClosing:
   13.19 +		if (be->blkif->xenblkd) {
   13.20 +			kthread_stop(be->blkif->xenblkd);
   13.21 +			be->blkif->xenblkd = NULL;
   13.22 +		}
   13.23 +		tap_blkif_unmap(be->blkif);
   13.24  		xenbus_switch_state(dev, XenbusStateClosing);
   13.25  		break;
   13.26  
   13.27 +	case XenbusStateClosed:
   13.28 +		xenbus_switch_state(dev, XenbusStateClosed);
   13.29 +		if (xenbus_dev_is_online(dev))
   13.30 +			break;
   13.31 +		/* fall through if not online */
   13.32  	case XenbusStateUnknown:
   13.33 -	case XenbusStateClosed:
   13.34  		device_unregister(&dev->dev);
   13.35  		break;
   13.36  
    14.1 --- a/linux-2.6-xen-sparse/drivers/xen/netback/xenbus.c	Fri Sep 01 12:52:12 2006 -0600
    14.2 +++ b/linux-2.6-xen-sparse/drivers/xen/netback/xenbus.c	Fri Sep 01 13:04:02 2006 -0600
    14.3 @@ -228,13 +228,13 @@ static void frontend_changed(struct xenb
    14.4  {
    14.5  	struct backend_info *be = dev->dev.driver_data;
    14.6  
    14.7 -	DPRINTK("");
    14.8 +	DPRINTK("%s", xenbus_strstate(frontend_state));
    14.9  
   14.10  	be->frontend_state = frontend_state;
   14.11  
   14.12  	switch (frontend_state) {
   14.13  	case XenbusStateInitialising:
   14.14 -		if (dev->state == XenbusStateClosing) {
   14.15 +		if (dev->state == XenbusStateClosed) {
   14.16  			printk("%s: %s: prepare for reconnect\n",
   14.17  			       __FUNCTION__, dev->nodename);
   14.18  			if (be->netif) {
   14.19 @@ -260,8 +260,12 @@ static void frontend_changed(struct xenb
   14.20  		xenbus_switch_state(dev, XenbusStateClosing);
   14.21  		break;
   14.22  
   14.23 +	case XenbusStateClosed:
   14.24 +		xenbus_switch_state(dev, XenbusStateClosed);
   14.25 +		if (xenbus_dev_is_online(dev))
   14.26 +			break;
   14.27 +		/* fall through if not online */
   14.28  	case XenbusStateUnknown:
   14.29 -	case XenbusStateClosed:
   14.30  		if (be->netif != NULL)
   14.31  			kobject_uevent(&dev->dev.kobj, KOBJ_OFFLINE);
   14.32  		device_unregister(&dev->dev);
    15.1 --- a/linux-2.6-xen-sparse/drivers/xen/netfront/netfront.c	Fri Sep 01 12:52:12 2006 -0600
    15.2 +++ b/linux-2.6-xen-sparse/drivers/xen/netfront/netfront.c	Fri Sep 01 13:04:02 2006 -0600
    15.3 @@ -193,6 +193,7 @@ static void netfront_closing(struct xenb
    15.4  
    15.5  static void end_access(int, void *);
    15.6  static void netif_disconnect_backend(struct netfront_info *);
    15.7 +static int open_netdev(struct netfront_info *);
    15.8  static void close_netdev(struct netfront_info *);
    15.9  static void netif_free(struct netfront_info *);
   15.10  
   15.11 @@ -263,15 +264,22 @@ static int __devinit netfront_probe(stru
   15.12  	dev->dev.driver_data = info;
   15.13  
   15.14  	err = talk_to_backend(dev, info);
   15.15 -	if (err) {
   15.16 -		xennet_sysfs_delif(info->netdev);
   15.17 -		unregister_netdev(netdev);
   15.18 -		free_netdev(netdev);
   15.19 -		dev->dev.driver_data = NULL;
   15.20 -		return err;
   15.21 -	}
   15.22 +	if (err)
   15.23 +		goto fail_backend;
   15.24 +
   15.25 +	err = open_netdev(info);
   15.26 +	if (err)
   15.27 +		goto fail_open;
   15.28  
   15.29  	return 0;
   15.30 +
   15.31 + fail_open:
   15.32 +	xennet_sysfs_delif(info->netdev);
   15.33 +	unregister_netdev(netdev);
   15.34 + fail_backend:
   15.35 +	free_netdev(netdev);
   15.36 +	dev->dev.driver_data = NULL;
   15.37 +	return err;
   15.38  }
   15.39  
   15.40  
   15.41 @@ -478,7 +486,7 @@ static void backend_changed(struct xenbu
   15.42  	struct netfront_info *np = dev->dev.driver_data;
   15.43  	struct net_device *netdev = np->netdev;
   15.44  
   15.45 -	DPRINTK("\n");
   15.46 +	DPRINTK("%s\n", xenbus_strstate(backend_state));
   15.47  
   15.48  	switch (backend_state) {
   15.49  	case XenbusStateInitialising:
   15.50 @@ -1887,27 +1895,9 @@ create_netdev(int handle, int copying_re
   15.51  	SET_MODULE_OWNER(netdev);
   15.52  	SET_NETDEV_DEV(netdev, &dev->dev);
   15.53  
   15.54 -	err = register_netdev(netdev);
   15.55 -	if (err) {
   15.56 -		printk(KERN_WARNING "%s> register_netdev err=%d\n",
   15.57 -		       __FUNCTION__, err);
   15.58 -		goto exit_free_rx;
   15.59 -	}
   15.60 -
   15.61 -	err = xennet_sysfs_addif(netdev);
   15.62 -	if (err) {
   15.63 -		/* This can be non-fatal: it only means no tuning parameters */
   15.64 -		printk(KERN_WARNING "%s> add sysfs failed err=%d\n",
   15.65 -		       __FUNCTION__, err);
   15.66 -	}
   15.67 -
   15.68  	np->netdev = netdev;
   15.69 -
   15.70  	return netdev;
   15.71  
   15.72 -
   15.73 - exit_free_rx:
   15.74 -	gnttab_free_grant_references(np->gref_rx_head);
   15.75   exit_free_tx:
   15.76  	gnttab_free_grant_references(np->gref_tx_head);
   15.77   exit:
   15.78 @@ -1946,11 +1936,10 @@ static void netfront_closing(struct xenb
   15.79  {
   15.80  	struct netfront_info *info = dev->dev.driver_data;
   15.81  
   15.82 -	DPRINTK("netfront_closing: %s removed\n", dev->nodename);
   15.83 +	DPRINTK("%s\n", dev->nodename);
   15.84  
   15.85  	close_netdev(info);
   15.86 -
   15.87 -	xenbus_switch_state(dev, XenbusStateClosed);
   15.88 +	xenbus_frontend_closed(dev);
   15.89  }
   15.90  
   15.91  
   15.92 @@ -1967,6 +1956,26 @@ static int __devexit netfront_remove(str
   15.93  }
   15.94  
   15.95  
   15.96 +static int open_netdev(struct netfront_info *info)
   15.97 +{
   15.98 +	int err;
   15.99 +	
  15.100 +	err = register_netdev(info->netdev);
  15.101 +	if (err) {
  15.102 +		printk(KERN_WARNING "%s: register_netdev err=%d\n",
  15.103 +		       __FUNCTION__, err);
  15.104 +		return err;
  15.105 +	}
  15.106 +
  15.107 +	err = xennet_sysfs_addif(info->netdev);
  15.108 +	if (err) {
  15.109 +		/* This can be non-fatal: it only means no tuning parameters */
  15.110 +		printk(KERN_WARNING "%s: add sysfs failed err=%d\n",
  15.111 +		       __FUNCTION__, err);
  15.112 +	}
  15.113 +	return 0;
  15.114 +}
  15.115 +
  15.116  static void close_netdev(struct netfront_info *info)
  15.117  {
  15.118  	del_timer_sync(&info->rx_refill_timer);
    16.1 --- a/linux-2.6-xen-sparse/drivers/xen/xenbus/xenbus_backend_client.c	Fri Sep 01 12:52:12 2006 -0600
    16.2 +++ b/linux-2.6-xen-sparse/drivers/xen/xenbus/xenbus_backend_client.c	Fri Sep 01 13:04:02 2006 -0600
    16.3 @@ -132,4 +132,16 @@ int xenbus_unmap_ring(struct xenbus_devi
    16.4  }
    16.5  EXPORT_SYMBOL_GPL(xenbus_unmap_ring);
    16.6  
    16.7 +int xenbus_dev_is_online(struct xenbus_device *dev)
    16.8 +{
    16.9 +	int rc, val;
   16.10 +
   16.11 +	rc = xenbus_scanf(XBT_NIL, dev->nodename, "online", "%d", &val);
   16.12 +	if (rc != 1)
   16.13 +		val = 0; /* no online node present */
   16.14 +
   16.15 +	return val;
   16.16 +}
   16.17 +EXPORT_SYMBOL_GPL(xenbus_dev_is_online);
   16.18 +
   16.19  MODULE_LICENSE("Dual BSD/GPL");
    17.1 --- a/linux-2.6-xen-sparse/drivers/xen/xenbus/xenbus_client.c	Fri Sep 01 12:52:12 2006 -0600
    17.2 +++ b/linux-2.6-xen-sparse/drivers/xen/xenbus/xenbus_client.c	Fri Sep 01 13:04:02 2006 -0600
    17.3 @@ -41,6 +41,20 @@ extern char *kasprintf(const char *fmt, 
    17.4  #define DPRINTK(fmt, args...) \
    17.5      pr_debug("xenbus_client (%s:%d) " fmt ".\n", __FUNCTION__, __LINE__, ##args)
    17.6  
    17.7 +char *xenbus_strstate(enum xenbus_state state)
    17.8 +{
    17.9 +	static char *name[] = {
   17.10 +		[ XenbusStateUnknown      ] = "Unknown",
   17.11 +		[ XenbusStateInitialising ] = "Initialising",
   17.12 +		[ XenbusStateInitWait     ] = "InitWait",
   17.13 +		[ XenbusStateInitialised  ] = "Initialised",
   17.14 +		[ XenbusStateConnected    ] = "Connected",
   17.15 +		[ XenbusStateClosing      ] = "Closing",
   17.16 +		[ XenbusStateClosed	  ] = "Closed",
   17.17 +	};
   17.18 +	return (state < ARRAY_SIZE(name)) ? name[state] : "INVALID";
   17.19 +}
   17.20 +
   17.21  int xenbus_watch_path(struct xenbus_device *dev, const char *path,
   17.22  		      struct xenbus_watch *watch,
   17.23  		      void (*callback)(struct xenbus_watch *,
   17.24 @@ -124,6 +138,13 @@ int xenbus_switch_state(struct xenbus_de
   17.25  }
   17.26  EXPORT_SYMBOL_GPL(xenbus_switch_state);
   17.27  
   17.28 +int xenbus_frontend_closed(struct xenbus_device *dev)
   17.29 +{
   17.30 +	xenbus_switch_state(dev, XenbusStateClosed);
   17.31 +	complete(&dev->down);
   17.32 +	return 0;
   17.33 +}
   17.34 +EXPORT_SYMBOL_GPL(xenbus_frontend_closed);
   17.35  
   17.36  /**
   17.37   * Return the path to the error node for the given device, or NULL on failure.
    18.1 --- a/linux-2.6-xen-sparse/drivers/xen/xenbus/xenbus_probe.c	Fri Sep 01 12:52:12 2006 -0600
    18.2 +++ b/linux-2.6-xen-sparse/drivers/xen/xenbus/xenbus_probe.c	Fri Sep 01 13:04:02 2006 -0600
    18.3 @@ -73,6 +73,7 @@ static int xenbus_probe_backend(const ch
    18.4  
    18.5  static int xenbus_dev_probe(struct device *_dev);
    18.6  static int xenbus_dev_remove(struct device *_dev);
    18.7 +static void xenbus_dev_shutdown(struct device *_dev);
    18.8  
    18.9  /* If something in array of ids matches this device, return it. */
   18.10  static const struct xenbus_device_id *
   18.11 @@ -192,6 +193,7 @@ static struct xen_bus_type xenbus_fronte
   18.12  		.match    = xenbus_match,
   18.13  		.probe    = xenbus_dev_probe,
   18.14  		.remove   = xenbus_dev_remove,
   18.15 +		.shutdown = xenbus_dev_shutdown,
   18.16  	},
   18.17  	.dev = {
   18.18  		.bus_id = "xen",
   18.19 @@ -246,6 +248,7 @@ static struct xen_bus_type xenbus_backen
   18.20  		.match    = xenbus_match,
   18.21  		.probe    = xenbus_dev_probe,
   18.22  		.remove   = xenbus_dev_remove,
   18.23 +//		.shutdown = xenbus_dev_shutdown,
   18.24  		.uevent   = xenbus_uevent_backend,
   18.25  	},
   18.26  	.dev = {
   18.27 @@ -316,8 +319,9 @@ static void otherend_changed(struct xenb
   18.28  
   18.29  	state = xenbus_read_driver_state(dev->otherend);
   18.30  
   18.31 -	DPRINTK("state is %d, %s, %s",
   18.32 -		state, dev->otherend_watch.node, vec[XS_WATCH_PATH]);
   18.33 +	DPRINTK("state is %d (%s), %s, %s", state, xenbus_strstate(state),
   18.34 +		dev->otherend_watch.node, vec[XS_WATCH_PATH]);
   18.35 +
   18.36  	if (drv->otherend_changed)
   18.37  		drv->otherend_changed(dev, state);
   18.38  }
   18.39 @@ -348,7 +352,7 @@ static int xenbus_dev_probe(struct devic
   18.40  	const struct xenbus_device_id *id;
   18.41  	int err;
   18.42  
   18.43 -	DPRINTK("");
   18.44 +	DPRINTK("%s", dev->nodename);
   18.45  
   18.46  	if (!drv->probe) {
   18.47  		err = -ENODEV;
   18.48 @@ -393,7 +397,7 @@ static int xenbus_dev_remove(struct devi
   18.49  	struct xenbus_device *dev = to_xenbus_device(_dev);
   18.50  	struct xenbus_driver *drv = to_xenbus_driver(_dev->driver);
   18.51  
   18.52 -	DPRINTK("");
   18.53 +	DPRINTK("%s", dev->nodename);
   18.54  
   18.55  	free_otherend_watch(dev);
   18.56  	free_otherend_details(dev);
   18.57 @@ -405,6 +409,27 @@ static int xenbus_dev_remove(struct devi
   18.58  	return 0;
   18.59  }
   18.60  
   18.61 +static void xenbus_dev_shutdown(struct device *_dev)
   18.62 +{
   18.63 +	struct xenbus_device *dev = to_xenbus_device(_dev);
   18.64 +	unsigned long timeout = 5*HZ;
   18.65 +
   18.66 +	DPRINTK("%s", dev->nodename);
   18.67 +
   18.68 +	get_device(&dev->dev);
   18.69 +	if (dev->state != XenbusStateConnected) {
   18.70 +		printk("%s: %s: %s != Connected, skipping\n", __FUNCTION__,
   18.71 +		       dev->nodename, xenbus_strstate(dev->state));
   18.72 +		goto out;
   18.73 +	}
   18.74 +	xenbus_switch_state(dev, XenbusStateClosing);
   18.75 +	timeout = wait_for_completion_timeout(&dev->down, timeout);
   18.76 +	if (!timeout)
   18.77 +		printk("%s: %s timeout closing device\n", __FUNCTION__, dev->nodename);
   18.78 + out:
   18.79 +	put_device(&dev->dev);
   18.80 +}
   18.81 +
   18.82  static int xenbus_register_driver_common(struct xenbus_driver *drv,
   18.83  					 struct xen_bus_type *bus)
   18.84  {
   18.85 @@ -587,6 +612,7 @@ static int xenbus_probe_node(struct xen_
   18.86  	tmpstring += strlen(tmpstring) + 1;
   18.87  	strcpy(tmpstring, type);
   18.88  	xendev->devicetype = tmpstring;
   18.89 +	init_completion(&xendev->down);
   18.90  
   18.91  	xendev->dev.parent = &bus->dev;
   18.92  	xendev->dev.bus = &bus->bus;
    19.1 --- a/linux-2.6-xen-sparse/include/xen/xenbus.h	Fri Sep 01 12:52:12 2006 -0600
    19.2 +++ b/linux-2.6-xen-sparse/include/xen/xenbus.h	Fri Sep 01 13:04:02 2006 -0600
    19.3 @@ -37,6 +37,7 @@
    19.4  #include <linux/device.h>
    19.5  #include <linux/notifier.h>
    19.6  #include <linux/mutex.h>
    19.7 +#include <linux/completion.h>
    19.8  #include <xen/interface/xen.h>
    19.9  #include <xen/interface/grant_table.h>
   19.10  #include <xen/interface/io/xenbus.h>
   19.11 @@ -74,6 +75,7 @@ struct xenbus_device {
   19.12  	struct xenbus_watch otherend_watch;
   19.13  	struct device dev;
   19.14  	enum xenbus_state state;
   19.15 +	struct completion down;
   19.16  };
   19.17  
   19.18  static inline struct xenbus_device *to_xenbus_device(struct device *dev)
   19.19 @@ -297,4 +299,8 @@ void xenbus_dev_fatal(struct xenbus_devi
   19.20  
   19.21  int __init xenbus_dev_init(void);
   19.22  
   19.23 +char *xenbus_strstate(enum xenbus_state state);
   19.24 +int xenbus_dev_is_online(struct xenbus_device *dev);
   19.25 +int xenbus_frontend_closed(struct xenbus_device *dev);
   19.26 +
   19.27  #endif /* _XEN_XENBUS_H */
    20.1 --- a/tools/blktap/drivers/block-aio.c	Fri Sep 01 12:52:12 2006 -0600
    20.2 +++ b/tools/blktap/drivers/block-aio.c	Fri Sep 01 13:04:02 2006 -0600
    20.3 @@ -52,7 +52,7 @@
    20.4   */
    20.5  #define REQUEST_ASYNC_FD 1
    20.6  
    20.7 -#define MAX_AIO_REQS (MAX_REQUESTS * MAX_SEGMENTS_PER_REQ * 8)
    20.8 +#define MAX_AIO_REQS (MAX_REQUESTS * MAX_SEGMENTS_PER_REQ)
    20.9  
   20.10  struct pending_aio {
   20.11  	td_callback_t cb;
   20.12 @@ -146,7 +146,7 @@ int tdaio_open (struct td_state *s, cons
   20.13  	struct tdaio_state *prv = (struct tdaio_state *)s->private;
   20.14  	s->private = prv;
   20.15  
   20.16 -	DPRINTF("XXX: block-aio open('%s')", name);
   20.17 +	DPRINTF("block-aio open('%s')", name);
   20.18  	/* Initialize AIO */
   20.19  	prv->iocb_free_count = MAX_AIO_REQS;
   20.20  	prv->iocb_queued     = 0;
   20.21 @@ -156,9 +156,18 @@ int tdaio_open (struct td_state *s, cons
   20.22  
   20.23  	if (prv->poll_fd < 0) {
   20.24  		ret = prv->poll_fd;
   20.25 -		DPRINTF("Couldn't get fd for AIO poll support.  This is "
   20.26 -			"probably because your kernel does not have the "
   20.27 -			"aio-poll patch applied.\n");
   20.28 +                if (ret == -EAGAIN) {
   20.29 +                        DPRINTF("Couldn't setup AIO context.  If you are "
   20.30 +                                "trying to concurrently use a large number "
   20.31 +                                "of blktap-based disks, you may need to "
   20.32 +                                "increase the system-wide aio request limit. "
   20.33 +                                "(e.g. 'echo echo 1048576 > /proc/sys/fs/"
   20.34 +                                "aio-max-nr')\n");
   20.35 +                } else {
   20.36 +                        DPRINTF("Couldn't get fd for AIO poll support.  This "
   20.37 +                                "is probably because your kernel does not "
   20.38 +                                "have the aio-poll patch applied.\n");
   20.39 +                }
   20.40  		goto done;
   20.41  	}
   20.42  
    21.1 --- a/tools/blktap/drivers/block-qcow.c	Fri Sep 01 12:52:12 2006 -0600
    21.2 +++ b/tools/blktap/drivers/block-qcow.c	Fri Sep 01 13:04:02 2006 -0600
    21.3 @@ -51,7 +51,7 @@
    21.4  /******AIO DEFINES******/
    21.5  #define REQUEST_ASYNC_FD 1
    21.6  #define MAX_QCOW_IDS  0xFFFF
    21.7 -#define MAX_AIO_REQS (MAX_REQUESTS * MAX_SEGMENTS_PER_REQ * 8)
    21.8 +#define MAX_AIO_REQS (MAX_REQUESTS * MAX_SEGMENTS_PER_REQ)
    21.9  
   21.10  struct pending_aio {
   21.11          td_callback_t cb;
   21.12 @@ -176,10 +176,21 @@ static int init_aio_state(struct td_stat
   21.13          s->aio_ctx = (io_context_t) REQUEST_ASYNC_FD;   
   21.14          s->poll_fd = io_setup(MAX_AIO_REQS, &s->aio_ctx);
   21.15  
   21.16 -        if (s->poll_fd < 0) {
   21.17 -                DPRINTF("Retrieving Async poll fd failed\n");
   21.18 +	if (s->poll_fd < 0) {
   21.19 +                if (s->poll_fd == -EAGAIN) {
   21.20 +                        DPRINTF("Couldn't setup AIO context.  If you are "
   21.21 +                                "trying to concurrently use a large number "
   21.22 +                                "of blktap-based disks, you may need to "
   21.23 +                                "increase the system-wide aio request limit. "
   21.24 +                                "(e.g. 'echo echo 1048576 > /proc/sys/fs/"
   21.25 +                                "aio-max-nr')\n");
   21.26 +                } else {
   21.27 +                        DPRINTF("Couldn't get fd for AIO poll support.  This "
   21.28 +                                "is probably because your kernel does not "
   21.29 +                                "have the aio-poll patch applied.\n");
   21.30 +                }
   21.31  		goto fail;
   21.32 -        }
   21.33 +	}
   21.34  
   21.35          for (i=0;i<MAX_AIO_REQS;i++)
   21.36                  s->iocb_free[i] = &s->iocb_list[i];
    22.1 --- a/tools/blktap/drivers/tapdisk.c	Fri Sep 01 12:52:12 2006 -0600
    22.2 +++ b/tools/blktap/drivers/tapdisk.c	Fri Sep 01 13:04:02 2006 -0600
    22.3 @@ -110,6 +110,7 @@ static void unmap_disk(struct td_state *
    22.4  	free(s->fd_entry);
    22.5  	free(s->blkif);
    22.6  	free(s->ring_info);
    22.7 +        free(s->private);
    22.8  	free(s);
    22.9  
   22.10  	return;
    23.1 --- a/tools/blktap/lib/xs_api.c	Fri Sep 01 12:52:12 2006 -0600
    23.2 +++ b/tools/blktap/lib/xs_api.c	Fri Sep 01 13:04:02 2006 -0600
    23.3 @@ -204,7 +204,7 @@ char *get_dom_domid(struct xs_handle *h,
    23.4  int convert_dev_name_to_num(char *name) {
    23.5  	char *p_sd, *p_hd, *p_xvd, *p_plx, *p, *alpha,*ptr;
    23.6  	int majors[10] = {3,22,33,34,56,57,88,89,90,91};
    23.7 -	int maj,i;
    23.8 +	int maj,i,ret = 0;
    23.9  
   23.10  	asprintf(&p_sd,"/dev/sd");
   23.11  	asprintf(&p_hd,"/dev/hd");
   23.12 @@ -221,7 +221,7 @@ int convert_dev_name_to_num(char *name) 
   23.13  			*ptr++;
   23.14  		}
   23.15  		*p++;
   23.16 -		return BASE_DEV_VAL + (16*i) + atoi(p);
   23.17 +		ret = BASE_DEV_VAL + (16*i) + atoi(p);
   23.18  	} else if (strstr(name, p_hd) != NULL) {
   23.19  		p = name + strlen(p_hd);
   23.20  		for (i = 0, ptr = alpha; i < strlen(alpha); i++) {
   23.21 @@ -229,7 +229,7 @@ int convert_dev_name_to_num(char *name) 
   23.22  			*ptr++;
   23.23  		}
   23.24  		*p++;
   23.25 -		return (majors[i/2]*256) + atoi(p);
   23.26 +		ret = (majors[i/2]*256) + atoi(p);
   23.27  
   23.28  	} else if (strstr(name, p_xvd) != NULL) {
   23.29  		p = name + strlen(p_xvd);
   23.30 @@ -238,17 +238,24 @@ int convert_dev_name_to_num(char *name) 
   23.31  			*ptr++;
   23.32  		}
   23.33  		*p++;
   23.34 -		return (202*256) + (16*i) + atoi(p);
   23.35 +		ret = (202*256) + (16*i) + atoi(p);
   23.36  
   23.37  	} else if (strstr(name, p_plx) != NULL) {
   23.38  		p = name + strlen(p_plx);
   23.39 -		return atoi(p);
   23.40 +		ret = atoi(p);
   23.41  
   23.42  	} else {
   23.43  		DPRINTF("Unknown device type, setting to default.\n");
   23.44 -		return BASE_DEV_VAL;
   23.45 +		ret = BASE_DEV_VAL;
   23.46  	}
   23.47 -	return 0;
   23.48 +
   23.49 +        free(p_sd);
   23.50 +        free(p_hd);
   23.51 +        free(p_xvd);
   23.52 +        free(p_plx);
   23.53 +        free(alpha);
   23.54 +        
   23.55 +	return ret;
   23.56  }
   23.57  
   23.58  /**
    24.1 --- a/tools/libxc/ia64/xc_ia64_stubs.c	Fri Sep 01 12:52:12 2006 -0600
    24.2 +++ b/tools/libxc/ia64/xc_ia64_stubs.c	Fri Sep 01 13:04:02 2006 -0600
    24.3 @@ -36,7 +36,6 @@ xc_ia64_get_pfn_list(int xc_handle, uint
    24.4      struct xen_domctl domctl;
    24.5      int num_pfns,ret;
    24.6      unsigned int __start_page, __nr_pages;
    24.7 -    unsigned long max_pfns;
    24.8      xen_pfn_t *__pfn_buf;
    24.9  
   24.10      __start_page = start_page;
   24.11 @@ -44,27 +43,22 @@ xc_ia64_get_pfn_list(int xc_handle, uint
   24.12      __pfn_buf = pfn_buf;
   24.13    
   24.14      while (__nr_pages) {
   24.15 -        max_pfns = ((unsigned long)__start_page << 32) | __nr_pages;
   24.16          domctl.cmd = XEN_DOMCTL_getmemlist;
   24.17 -        domctl.domain   = (domid_t)domid;
   24.18 -        domctl.u.getmemlist.max_pfns = max_pfns;
   24.19 +        domctl.domain = (domid_t)domid;
   24.20 +        domctl.u.getmemlist.max_pfns = __nr_pages;
   24.21 +        domctl.u.getmemlist.start_pfn =__start_page;
   24.22          domctl.u.getmemlist.num_pfns = 0;
   24.23          set_xen_guest_handle(domctl.u.getmemlist.buffer, __pfn_buf);
   24.24  
   24.25 -        if ((max_pfns != -1UL)
   24.26 -            && mlock(__pfn_buf, __nr_pages * sizeof(xen_pfn_t)) != 0) {
   24.27 +        if (mlock(__pfn_buf, __nr_pages * sizeof(xen_pfn_t)) != 0) {
   24.28              PERROR("Could not lock pfn list buffer");
   24.29              return -1;
   24.30          }
   24.31  
   24.32          ret = do_domctl(xc_handle, &domctl);
   24.33  
   24.34 -        if (max_pfns != -1UL)
   24.35 -            (void)munlock(__pfn_buf, __nr_pages * sizeof(xen_pfn_t));
   24.36 +        (void)munlock(__pfn_buf, __nr_pages * sizeof(xen_pfn_t));
   24.37  
   24.38 -        if (max_pfns == -1UL)
   24.39 -            return 0;
   24.40 -        
   24.41          num_pfns = domctl.u.getmemlist.num_pfns;
   24.42          __start_page += num_pfns;
   24.43          __nr_pages -= num_pfns;
    25.1 --- a/tools/libxc/powerpc64/xc_linux_build.c	Fri Sep 01 12:52:12 2006 -0600
    25.2 +++ b/tools/libxc/powerpc64/xc_linux_build.c	Fri Sep 01 13:04:02 2006 -0600
    25.3 @@ -309,7 +309,7 @@ static unsigned long create_start_info(s
    25.4      si->store_evtchn = store_evtchn;
    25.5      si->console.domU.mfn = si->nr_pages - 3;
    25.6      si->console.domU.evtchn = console_evtchn;
    25.7 -    si_addr = eomem - (PAGE_SIZE * 4);
    25.8 +    si_addr = (si->nr_pages - 4) << PAGE_SHIFT;
    25.9  
   25.10      return si_addr;
   25.11  }
   25.12 @@ -388,7 +388,7 @@ int xc_linux_build(int xc_handle,
   25.13      }
   25.14  
   25.15      si_addr = create_start_info(&si, console_evtchn, store_evtchn);
   25.16 -    *console_mfn = page_array[si.console_mfn];
   25.17 +    *console_mfn = page_array[si.console.domU.mfn];
   25.18      *store_mfn = page_array[si.store_mfn];
   25.19      
   25.20      if (install_image(xc_handle, domid, page_array, &si, si_addr,
    26.1 --- a/tools/libxc/xenctrl.h	Fri Sep 01 12:52:12 2006 -0600
    26.2 +++ b/tools/libxc/xenctrl.h	Fri Sep 01 13:04:02 2006 -0600
    26.3 @@ -9,6 +9,11 @@
    26.4  #ifndef XENCTRL_H
    26.5  #define XENCTRL_H
    26.6  
    26.7 +/* Tell the Xen public headers we are a user-space tools build. */
    26.8 +#ifndef __XEN_TOOLS__
    26.9 +#define __XEN_TOOLS__ 1
   26.10 +#endif
   26.11 +
   26.12  #include <stddef.h>
   26.13  #include <stdint.h>
   26.14  #include <sys/ptrace.h>
    27.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
    27.2 +++ b/tools/python/xen/xend/FlatDeviceTree.py	Fri Sep 01 13:04:02 2006 -0600
    27.3 @@ -0,0 +1,323 @@
    27.4 +#!/usr/bin/env python
    27.5 +#
    27.6 +# This library is free software; you can redistribute it and/or
    27.7 +# modify it under the terms of version 2.1 of the GNU Lesser General Public
    27.8 +# License as published by the Free Software Foundation.
    27.9 +#
   27.10 +# This library is distributed in the hope that it will be useful,
   27.11 +# but WITHOUT ANY WARRANTY; without even the implied warranty of
   27.12 +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
   27.13 +# Lesser General Public License for more details.
   27.14 +#
   27.15 +# You should have received a copy of the GNU Lesser General Public
   27.16 +# License along with this library; if not, write to the Free Software
   27.17 +# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
   27.18 +#
   27.19 +# Copyright (C) IBM Corp. 2006
   27.20 +#
   27.21 +# Authors: Hollis Blanchard <hollisb@us.ibm.com>
   27.22 +
   27.23 +import os
   27.24 +import sys
   27.25 +import struct
   27.26 +import stat
   27.27 +import re
   27.28 +
   27.29 +_OF_DT_HEADER = int("d00dfeed", 16) # avoid signed/unsigned FutureWarning
   27.30 +_OF_DT_BEGIN_NODE = 0x1
   27.31 +_OF_DT_END_NODE = 0x2
   27.32 +_OF_DT_PROP = 0x3
   27.33 +_OF_DT_END = 0x9
   27.34 +
   27.35 +def _bincat(seq, separator=''):
   27.36 +    '''Concatenate the contents of seq into a bytestream.'''
   27.37 +    strs = []
   27.38 +    for item in seq:
   27.39 +        if type(item) == type(0):
   27.40 +            strs.append(struct.pack(">I", item))
   27.41 +        else:
   27.42 +            try:
   27.43 +                strs.append(item.to_bin())
   27.44 +            except AttributeError, e:
   27.45 +                strs.append(item)
   27.46 +    return separator.join(strs)
   27.47 +
   27.48 +def _alignup(val, alignment):
   27.49 +    return (val + alignment - 1) & ~(alignment - 1)
   27.50 +
   27.51 +def _pad(buf, alignment):
   27.52 +    '''Pad bytestream with NULLs to specified alignment.'''
   27.53 +    padlen = _alignup(len(buf), alignment)
   27.54 +    return buf + '\0' * (padlen - len(buf))
   27.55 +    # not present in Python 2.3:
   27.56 +    #return buf.ljust(_padlen, '\0')
   27.57 +
   27.58 +def _indent(item):
   27.59 +    indented = []
   27.60 +    for line in str(item).splitlines(True):
   27.61 +        indented.append('    ' + line)
   27.62 +    return ''.join(indented)
   27.63 +
   27.64 +class _Property:
   27.65 +    _nonprint = re.compile('[\000-\037\200-\377]')
   27.66 +    def __init__(self, node, name, value):
   27.67 +        self.node = node
   27.68 +        self.value = value
   27.69 +        self.name = name
   27.70 +        self.node.tree.stradd(name)
   27.71 +
   27.72 +    def __str__(self):
   27.73 +        result = self.name
   27.74 +        if self.value:
   27.75 +            searchtext = self.value
   27.76 +            # it's ok for a string to end in NULL
   27.77 +            if searchtext.find('\000') == len(searchtext)-1:
   27.78 +                searchtext = searchtext[:-1]
   27.79 +            m = self._nonprint.search(searchtext)
   27.80 +            if m:
   27.81 +                bytes = struct.unpack("B" * len(self.value), self.value)
   27.82 +                hexbytes = [ '%02x' % b for b in bytes ]
   27.83 +                words = []
   27.84 +                for i in range(0, len(self.value), 4):
   27.85 +                    words.append(''.join(hexbytes[i:i+4]))
   27.86 +                v = '<' + ' '.join(words) + '>'
   27.87 +            else:
   27.88 +                v = '"%s"' % self.value
   27.89 +            result += ': ' + v
   27.90 +        return result
   27.91 +
   27.92 +    def to_bin(self):
   27.93 +        offset = self.node.tree.stroffset(self.name)
   27.94 +        return struct.pack('>III', _OF_DT_PROP, len(self.value), offset) \
   27.95 +            + _pad(self.value, 4)
   27.96 +
   27.97 +class _Node:
   27.98 +    def __init__(self, tree, name):
   27.99 +        self.tree = tree
  27.100 +        self.name = name
  27.101 +        self.props = {}
  27.102 +        self.children = {}
  27.103 +        self.phandle = 0
  27.104 +
  27.105 +    def __str__(self):
  27.106 +        propstrs = [ _indent(prop) for prop in self.props.values() ]
  27.107 +        childstrs = [ _indent(child) for child in self.children.values() ]
  27.108 +        return '%s:\n%s\n%s' % (self.name, '\n'.join(propstrs),
  27.109 +            '\n'.join(childstrs))
  27.110 +
  27.111 +    def to_bin(self):
  27.112 +        name = _pad(self.name + '\0', 4)
  27.113 +        return struct.pack('>I', _OF_DT_BEGIN_NODE) + \
  27.114 +                name + \
  27.115 +                _bincat(self.props.values()) + \
  27.116 +                _bincat(self.children.values()) + \
  27.117 +                struct.pack('>I', _OF_DT_END_NODE)
  27.118 +
  27.119 +    def addprop(self, propname, *cells):
  27.120 +        '''setprop with duplicate error-checking.'''
  27.121 +        if propname in self.props:
  27.122 +            raise AttributeError('%s/%s already exists' % (self.name, propname))
  27.123 +        self.setprop(propname, *cells)
  27.124 +
  27.125 +    def setprop(self, propname, *cells):
  27.126 +        self.props[propname] = _Property(self, propname, _bincat(cells))
  27.127 +
  27.128 +    def addnode(self, nodename):
  27.129 +        '''newnode with duplicate error-checking.'''
  27.130 +        if nodename in self.children:
  27.131 +            raise AttributeError('%s/%s already exists' % (self.name, nodename))
  27.132 +        return self.newnode(nodename)
  27.133 +
  27.134 +    def newnode(self, nodename):
  27.135 +        node = _Node(self.tree, nodename)
  27.136 +        self.children[nodename] = node
  27.137 +        return node
  27.138 +
  27.139 +    def getprop(self, propname):
  27.140 +        return self.props[propname]
  27.141 +
  27.142 +    def getchild(self, nodename):
  27.143 +        return self.children[nodename]
  27.144 +
  27.145 +    def get_phandle(self):
  27.146 +        if self.phandle:
  27.147 +            return self.phandle
  27.148 +        self.phandle = self.tree.alloc_phandle()
  27.149 +        self.addprop('linux,phandle', self.phandle)
  27.150 +        return self.phandle
  27.151 +
  27.152 +class _Header:
  27.153 +    def __init__(self):
  27.154 +        self.magic = 0
  27.155 +        self.totalsize = 0
  27.156 +        self.off_dt_struct = 0
  27.157 +        self.off_dt_strings = 0
  27.158 +        self.off_mem_rsvmap = 0
  27.159 +        self.version = 0
  27.160 +        self.last_comp_version = 0
  27.161 +        self.boot_cpuid_phys = 0
  27.162 +        self.size_dt_strings = 0
  27.163 +    def to_bin(self):
  27.164 +        return struct.pack('>9I',
  27.165 +            self.magic,
  27.166 +            self.totalsize,
  27.167 +            self.off_dt_struct,
  27.168 +            self.off_dt_strings,
  27.169 +            self.off_mem_rsvmap,
  27.170 +            self.version,
  27.171 +            self.last_comp_version,
  27.172 +            self.boot_cpuid_phys,
  27.173 +            self.size_dt_strings)
  27.174 +
  27.175 +class _StringBlock:
  27.176 +    def __init__(self):
  27.177 +        self.table = []
  27.178 +    def to_bin(self):
  27.179 +        return _bincat(self.table, '\0') + '\0'
  27.180 +    def add(self, str):
  27.181 +        self.table.append(str)
  27.182 +    def getoffset(self, str):
  27.183 +        return self.to_bin().index(str + '\0')
  27.184 +
  27.185 +class Tree(_Node):
  27.186 +    def __init__(self):
  27.187 +        self.last_phandle = 0
  27.188 +        self.strings = _StringBlock()
  27.189 +        self.reserved = [(0, 0)]
  27.190 +        _Node.__init__(self, self, '\0')
  27.191 +
  27.192 +    def alloc_phandle(self):
  27.193 +        self.last_phandle += 1
  27.194 +        return self.last_phandle
  27.195 +
  27.196 +    def stradd(self, str):
  27.197 +        return self.strings.add(str)
  27.198 +
  27.199 +    def stroffset(self, str):
  27.200 +        return self.strings.getoffset(str)
  27.201 +
  27.202 +    def reserve(self, start, len):
  27.203 +        self.reserved.insert(0, (start, len))
  27.204 +
  27.205 +    def to_bin(self):
  27.206 +        # layout:
  27.207 +        #   header
  27.208 +        #   reservation map
  27.209 +        #   string block
  27.210 +        #   data block
  27.211 +
  27.212 +        datablock = _Node.to_bin(self)
  27.213 +
  27.214 +        r = [ struct.pack('>QQ', rsrv[0], rsrv[1]) for rsrv in self.reserved ]
  27.215 +        reserved = _bincat(r)
  27.216 +
  27.217 +        strblock = _pad(self.strings.to_bin(), 4)
  27.218 +        strblocklen = len(strblock)
  27.219 +
  27.220 +        header = _Header()
  27.221 +        header.magic = _OF_DT_HEADER
  27.222 +        header.off_mem_rsvmap = _alignup(len(header.to_bin()), 8)
  27.223 +        header.off_dt_strings = header.off_mem_rsvmap + len(reserved)
  27.224 +        header.off_dt_struct = header.off_dt_strings + strblocklen
  27.225 +        header.version = 0x10
  27.226 +        header.last_comp_version = 0x10
  27.227 +        header.boot_cpuid_phys = 0
  27.228 +        header.size_dt_strings = strblocklen
  27.229 +
  27.230 +        payload = reserved + \
  27.231 +                strblock + \
  27.232 +                datablock + \
  27.233 +                struct.pack('>I', _OF_DT_END)
  27.234 +        header.totalsize = len(payload) + _alignup(len(header.to_bin()), 8)
  27.235 +        return _pad(header.to_bin(), 8) + payload
  27.236 +
  27.237 +_host_devtree_root = '/proc/device-tree'
  27.238 +def _getprop(propname):
  27.239 +    '''Extract a property from the system's device tree.'''
  27.240 +    f = file(os.path.join(_host_devtree_root, propname), 'r')
  27.241 +    data = f.read()
  27.242 +    f.close()
  27.243 +    return data
  27.244 +
  27.245 +def _copynode(node, dirpath, propfilter):
  27.246 +    '''Extract all properties from a node in the system's device tree.'''
  27.247 +    dirents = os.listdir(dirpath)
  27.248 +    for dirent in dirents:
  27.249 +        fullpath = os.path.join(dirpath, dirent)
  27.250 +        st = os.lstat(fullpath)
  27.251 +        if stat.S_ISDIR(st.st_mode):
  27.252 +            child = node.addnode(dirent)
  27.253 +            _copytree(child, fullpath, propfilter)
  27.254 +        elif stat.S_ISREG(st.st_mode) and propfilter(fullpath):
  27.255 +            node.addprop(dirent, _getprop(fullpath))
  27.256 +
  27.257 +def _copytree(node, dirpath, propfilter):
  27.258 +    path = os.path.join(_host_devtree_root, dirpath)
  27.259 +    _copynode(node, path, propfilter)
  27.260 +
  27.261 +def build(imghandler):
  27.262 +    '''Construct a device tree by combining the domain's configuration and
  27.263 +    the host's device tree.'''
  27.264 +    root = Tree()
  27.265 +
  27.266 +    # 4 pages: start_info, console, store, shared_info
  27.267 +    root.reserve(0x3ffc000, 0x4000)
  27.268 +
  27.269 +    root.addprop('device_type', 'chrp-but-not-really\0')
  27.270 +    root.addprop('#size-cells', 2)
  27.271 +    root.addprop('#address-cells', 2)
  27.272 +    root.addprop('model', 'Momentum,Maple-D\0')
  27.273 +    root.addprop('compatible', 'Momentum,Maple\0')
  27.274 +
  27.275 +    xen = root.addnode('xen')
  27.276 +    xen.addprop('start-info', 0, 0x3ffc000, 0, 0x1000)
  27.277 +    xen.addprop('version', 'Xen-3.0-unstable\0')
  27.278 +    xen.addprop('reg', 0, imghandler.vm.domid, 0, 0)
  27.279 +    xen.addprop('domain-name', imghandler.vm.getName() + '\0')
  27.280 +    xencons = xen.addnode('console')
  27.281 +    xencons.addprop('interrupts', 1, 0)
  27.282 +
  27.283 +    # XXX split out RMA node
  27.284 +    mem = root.addnode('memory@0')
  27.285 +    totalmem = imghandler.vm.getMemoryTarget() * 1024
  27.286 +    mem.addprop('reg', 0, 0, 0, totalmem)
  27.287 +    mem.addprop('device_type', 'memory\0')
  27.288 +
  27.289 +    cpus = root.addnode('cpus')
  27.290 +    cpus.addprop('smp-enabled')
  27.291 +    cpus.addprop('#size-cells', 0)
  27.292 +    cpus.addprop('#address-cells', 1)
  27.293 +
  27.294 +    # Copy all properties the system firmware gave us, except for 'linux,'
  27.295 +    # properties, from 'cpus/@0', once for every vcpu. Hopefully all cpus are
  27.296 +    # identical...
  27.297 +    cpu0 = None
  27.298 +    def _nolinuxprops(fullpath):
  27.299 +        return not os.path.basename(fullpath).startswith('linux,')
  27.300 +    for i in range(imghandler.vm.getVCpuCount()):
  27.301 +        cpu = cpus.addnode('PowerPC,970@0')
  27.302 +        _copytree(cpu, 'cpus/PowerPC,970@0', _nolinuxprops)
  27.303 +        # and then overwrite what we need to
  27.304 +        pft_size = imghandler.vm.info.get('pft-size', 0x14)
  27.305 +        cpu.setprop('ibm,pft-size', 0, pft_size)
  27.306 +
  27.307 +        # set default CPU
  27.308 +        if cpu0 == None:
  27.309 +            cpu0 = cpu
  27.310 +
  27.311 +    chosen = root.addnode('chosen')
  27.312 +    chosen.addprop('cpu', cpu0.get_phandle())
  27.313 +    chosen.addprop('memory', mem.get_phandle())
  27.314 +    chosen.addprop('linux,stdout-path', '/xen/console\0')
  27.315 +    chosen.addprop('interrupt-controller', xen.get_phandle())
  27.316 +    chosen.addprop('bootargs', imghandler.cmdline + '\0')
  27.317 +    # xc_linux_load.c will overwrite these 64-bit properties later
  27.318 +    chosen.addprop('linux,initrd-start', 0, 0)
  27.319 +    chosen.addprop('linux,initrd-end', 0, 0)
  27.320 +
  27.321 +    if 1:
  27.322 +        f = file('/tmp/domU.dtb', 'w')
  27.323 +        f.write(root.to_bin())
  27.324 +        f.close()
  27.325 +
  27.326 +    return root
    28.1 --- a/tools/python/xen/xend/XendCheckpoint.py	Fri Sep 01 12:52:12 2006 -0600
    28.2 +++ b/tools/python/xen/xend/XendCheckpoint.py	Fri Sep 01 13:04:02 2006 -0600
    28.3 @@ -161,10 +161,12 @@ def restore(xd, fd):
    28.4          if handler.store_mfn is None or handler.console_mfn is None:
    28.5              raise XendError('Could not read store/console MFN')
    28.6  
    28.7 +        #Block until src closes connection
    28.8 +        os.read(fd, 1)
    28.9          dominfo.unpause()
   28.10 -
   28.11 +        
   28.12          dominfo.completeRestore(handler.store_mfn, handler.console_mfn)
   28.13 -
   28.14 +        
   28.15          return dominfo
   28.16      except:
   28.17          dominfo.destroy()
    29.1 --- a/tools/python/xen/xend/XendDomain.py	Fri Sep 01 12:52:12 2006 -0600
    29.2 +++ b/tools/python/xen/xend/XendDomain.py	Fri Sep 01 13:04:02 2006 -0600
    29.3 @@ -431,7 +431,8 @@ class XendDomain:
    29.4          sock.send("receive\n")
    29.5          sock.recv(80)
    29.6          XendCheckpoint.save(sock.fileno(), dominfo, True, live, dst)
    29.7 -
    29.8 +        dominfo.testDeviceComplete()
    29.9 +        sock.close()
   29.10  
   29.11      def domain_save(self, domid, dst):
   29.12          """Start saving a domain to file.
    30.1 --- a/tools/python/xen/xend/XendDomainInfo.py	Fri Sep 01 12:52:12 2006 -0600
    30.2 +++ b/tools/python/xen/xend/XendDomainInfo.py	Fri Sep 01 13:04:02 2006 -0600
    30.3 @@ -30,7 +30,6 @@ import string
    30.4  import time
    30.5  import threading
    30.6  import os
    30.7 -import math
    30.8  
    30.9  import xen.lowlevel.xc
   30.10  from xen.util import asserts
   30.11 @@ -703,6 +702,9 @@ class XendDomainInfo:
   30.12                  if security[idx][0] == 'ssidref':
   30.13                      to_store['security/ssidref'] = str(security[idx][1])
   30.14  
   30.15 +        if not self.readVm('xend/restart_count'):
   30.16 +            to_store['xend/restart_count'] = str(0)
   30.17 +
   30.18          log.debug("Storing VM details: %s", to_store)
   30.19  
   30.20          self.writeVm(to_store)
   30.21 @@ -824,6 +826,9 @@ class XendDomainInfo:
   30.22      def setResume(self, state):
   30.23          self.info['resume'] = state
   30.24  
   30.25 +    def getRestartCount(self):
   30.26 +        return self.readVm('xend/restart_count')
   30.27 +
   30.28      def refreshShutdown(self, xeninfo = None):
   30.29          # If set at the end of this method, a restart is required, with the
   30.30          # given reason.  This restart has to be done out of the scope of
   30.31 @@ -1280,34 +1285,28 @@ class XendDomainInfo:
   30.32                  for v in range(0, self.info['max_vcpu_id']+1):
   30.33                      xc.vcpu_setaffinity(self.domid, v, self.info['cpus'])
   30.34  
   30.35 -            # set domain maxmem in KiB
   30.36 -            xc.domain_setmaxmem(self.domid, self.info['maxmem'] * 1024)
   30.37 +            # set memory limit
   30.38 +            maxmem = self.image.getRequiredMemory(self.info['maxmem'] * 1024)
   30.39 +            xc.domain_setmaxmem(self.domid, maxmem)
   30.40  
   30.41 -            m = self.image.getDomainMemory(self.info['memory'] * 1024)
   30.42 +            mem_kb = self.image.getRequiredMemory(self.info['memory'] * 1024)
   30.43  
   30.44              # get the domain's shadow memory requirement
   30.45 -            sm = int(math.ceil(self.image.getDomainShadowMemory(m) / 1024.0))
   30.46 -            if self.info['shadow_memory'] > sm:
   30.47 -                sm = self.info['shadow_memory']
   30.48 +            shadow_kb = self.image.getRequiredShadowMemory(mem_kb)
   30.49 +            shadow_kb_req = self.info['shadow_memory'] * 1024
   30.50 +            if shadow_kb_req > shadow_kb:
   30.51 +                shadow_kb = shadow_kb_req
   30.52 +            shadow_mb = (shadow_kb + 1023) / 1024
   30.53  
   30.54              # Make sure there's enough RAM available for the domain
   30.55 -            balloon.free(m + sm * 1024)
   30.56 +            balloon.free(mem_kb + shadow_mb * 1024)
   30.57  
   30.58              # Set up the shadow memory
   30.59 -            sm = xc.shadow_mem_control(self.domid, mb=sm)
   30.60 -            self.info['shadow_memory'] = sm
   30.61 +            shadow_cur = xc.shadow_mem_control(self.domid, shadow_mb)
   30.62 +            self.info['shadow_memory'] = shadow_cur
   30.63  
   30.64 -            init_reservation = self.info['memory'] * 1024
   30.65 -            if os.uname()[4] in ('ia64', 'ppc64'):
   30.66 -                # Workaround for architectures that don't yet support
   30.67 -                # ballooning.
   30.68 -                init_reservation = m
   30.69 -                # Following line from xiantao.zhang@intel.com
   30.70 -                # Needed for IA64 until supports ballooning -- okay for PPC64?
   30.71 -                xc.domain_setmaxmem(self.domid, m)
   30.72 -
   30.73 -            xc.domain_memory_increase_reservation(self.domid, init_reservation,
   30.74 -                                                  0, 0)
   30.75 +            # initial memory allocation
   30.76 +            xc.domain_memory_increase_reservation(self.domid, mem_kb, 0, 0)
   30.77  
   30.78              self.createChannels()
   30.79  
   30.80 @@ -1495,6 +1494,21 @@ class XendDomainInfo:
   30.81              if rc != 0:
   30.82                  raise XendError("Device of type '%s' refuses migration." % n)
   30.83  
   30.84 +    def testDeviceComplete(self):
   30.85 +        """ For Block IO migration safety we must ensure that
   30.86 +        the device has shutdown correctly, i.e. all blocks are
   30.87 +        flushed to disk
   30.88 +        """
   30.89 +        while True:
   30.90 +            test = 0
   30.91 +            for i in self.getDeviceController('vbd').deviceIDs():
   30.92 +                test = 1
   30.93 +                log.info("Dev %s still active, looping...", i)
   30.94 +                time.sleep(0.1)
   30.95 +                
   30.96 +            if test == 0:
   30.97 +                break
   30.98 +
   30.99      def migrateDevices(self, network, dst, step, domName=''):
  30.100          """Notify the devices about migration
  30.101          """
  30.102 @@ -1615,6 +1629,9 @@ class XendDomainInfo:
  30.103              try:
  30.104                  new_dom = XendDomain.instance().domain_create(config)
  30.105                  new_dom.unpause()
  30.106 +                rst_cnt = self.readVm('xend/restart_count')
  30.107 +                rst_cnt = int(rst_cnt) + 1
  30.108 +                self.writeVm('xend/restart_count', str(rst_cnt))
  30.109                  new_dom.removeVm(RESTART_IN_PROGRESS)
  30.110              except:
  30.111                  if new_dom:
    31.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
    31.2 +++ b/tools/python/xen/xend/arch.py	Fri Sep 01 13:04:02 2006 -0600
    31.3 @@ -0,0 +1,32 @@
    31.4 +#!/usr/bin/env python
    31.5 +#
    31.6 +# This library is free software; you can redistribute it and/or
    31.7 +# modify it under the terms of version 2.1 of the GNU Lesser General Public
    31.8 +# License as published by the Free Software Foundation.
    31.9 +#
   31.10 +# This library is distributed in the hope that it will be useful,
   31.11 +# but WITHOUT ANY WARRANTY; without even the implied warranty of
   31.12 +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
   31.13 +# Lesser General Public License for more details.
   31.14 +#
   31.15 +# You should have received a copy of the GNU Lesser General Public
   31.16 +# License along with this library; if not, write to the Free Software
   31.17 +# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
   31.18 +#
   31.19 +# Copyright (C) IBM Corp. 2006
   31.20 +#
   31.21 +# Authors: Hollis Blanchard <hollisb@us.ibm.com>
   31.22 +
   31.23 +import os
   31.24 +
   31.25 +_types = {
   31.26 +    "i386": "x86",
   31.27 +    "i486": "x86",
   31.28 +    "i586": "x86",
   31.29 +    "i686": "x86",
   31.30 +    "x86_64": "x86",
   31.31 +    "ia64": "ia64",
   31.32 +    "ppc": "powerpc",
   31.33 +    "ppc64": "powerpc",
   31.34 +}
   31.35 +type = _types.get(os.uname()[4], "unknown")
    32.1 --- a/tools/python/xen/xend/image.py	Fri Sep 01 12:52:12 2006 -0600
    32.2 +++ b/tools/python/xen/xend/image.py	Fri Sep 01 13:04:02 2006 -0600
    32.3 @@ -27,6 +27,8 @@ from xen.xend.XendError import VmError
    32.4  from xen.xend.XendLogging import log
    32.5  from xen.xend.server.netif import randomMAC
    32.6  from xen.xend.xenstore.xswatch import xswatch
    32.7 +from xen.xend import arch
    32.8 +from xen.xend import FlatDeviceTree
    32.9  
   32.10  
   32.11  xc = xen.lowlevel.xc.xc()
   32.12 @@ -141,19 +143,10 @@ class ImageHandler:
   32.13              raise VmError('Building domain failed: ostype=%s dom=%d err=%s'
   32.14                            % (self.ostype, self.vm.getDomid(), str(result)))
   32.15  
   32.16 -
   32.17 -    def getDomainMemory(self, mem_kb):
   32.18 -        """@return The memory required, in KiB, by the domain to store the
   32.19 -        given amount, also in KiB."""
   32.20 -        if os.uname()[4] != 'ia64':
   32.21 -            # A little extra because auto-ballooning is broken w.r.t. HVM
   32.22 -            # guests. Also, slack is necessary for live migration since that
   32.23 -            # uses shadow page tables.
   32.24 -            if 'hvm' in xc.xeninfo()['xen_caps']:
   32.25 -                mem_kb += 4*1024;
   32.26 +    def getRequiredMemory(self, mem_kb):
   32.27          return mem_kb
   32.28  
   32.29 -    def getDomainShadowMemory(self, mem_kb):
   32.30 +    def getRequiredShadowMemory(self, mem_kb):
   32.31          """@return The minimum shadow memory required, in KiB, for a domain 
   32.32          with mem_kb KiB of RAM."""
   32.33          # PV domains don't need any shadow memory
   32.34 @@ -197,9 +190,39 @@ class LinuxImageHandler(ImageHandler):
   32.35                                ramdisk        = self.ramdisk,
   32.36                                features       = self.vm.getFeatures())
   32.37  
   32.38 -class HVMImageHandler(ImageHandler):
   32.39 +class PPC_LinuxImageHandler(LinuxImageHandler):
   32.40 +
   32.41 +    ostype = "linux"
   32.42 +
   32.43 +    def configure(self, imageConfig, deviceConfig):
   32.44 +        LinuxImageHandler.configure(self, imageConfig, deviceConfig)
   32.45 +        self.imageConfig = imageConfig
   32.46 +
   32.47 +    def buildDomain(self):
   32.48 +        store_evtchn = self.vm.getStorePort()
   32.49 +        console_evtchn = self.vm.getConsolePort()
   32.50  
   32.51 -    ostype = "hvm"
   32.52 +        log.debug("dom            = %d", self.vm.getDomid())
   32.53 +        log.debug("image          = %s", self.kernel)
   32.54 +        log.debug("store_evtchn   = %d", store_evtchn)
   32.55 +        log.debug("console_evtchn = %d", console_evtchn)
   32.56 +        log.debug("cmdline        = %s", self.cmdline)
   32.57 +        log.debug("ramdisk        = %s", self.ramdisk)
   32.58 +        log.debug("vcpus          = %d", self.vm.getVCpuCount())
   32.59 +        log.debug("features       = %s", self.vm.getFeatures())
   32.60 +
   32.61 +        devtree = FlatDeviceTree.build(self)
   32.62 +
   32.63 +        return xc.linux_build(dom            = self.vm.getDomid(),
   32.64 +                              image          = self.kernel,
   32.65 +                              store_evtchn   = store_evtchn,
   32.66 +                              console_evtchn = console_evtchn,
   32.67 +                              cmdline        = self.cmdline,
   32.68 +                              ramdisk        = self.ramdisk,
   32.69 +                              features       = self.vm.getFeatures(),
   32.70 +                              arch_args      = devtree.to_bin())
   32.71 +
   32.72 +class HVMImageHandler(ImageHandler):
   32.73  
   32.74      def configure(self, imageConfig, deviceConfig):
   32.75          ImageHandler.configure(self, imageConfig, deviceConfig)
   32.76 @@ -282,7 +305,7 @@ class HVMImageHandler(ImageHandler):
   32.77          for (name, info) in deviceConfig:
   32.78              if name == 'vbd':
   32.79                  uname = sxp.child_value(info, 'uname')
   32.80 -                if 'file:' in uname:
   32.81 +                if uname is not None and 'file:' in uname:
   32.82                      (_, vbdparam) = string.split(uname, ':', 1)
   32.83                      if not os.path.isfile(vbdparam):
   32.84                          raise VmError('Disk image does not exist: %s' %
   32.85 @@ -355,32 +378,6 @@ class HVMImageHandler(ImageHandler):
   32.86          os.waitpid(self.pid, 0)
   32.87          self.pid = 0
   32.88  
   32.89 -    def getDomainMemory(self, mem_kb):
   32.90 -        """@see ImageHandler.getDomainMemory"""
   32.91 -        if os.uname()[4] == 'ia64':
   32.92 -            page_kb = 16
   32.93 -            # ROM size for guest firmware, ioreq page and xenstore page
   32.94 -            extra_pages = 1024 + 2
   32.95 -        else:
   32.96 -            page_kb = 4
   32.97 -            # This was derived emperically:
   32.98 -            #   2.4 MB overhead per 1024 MB RAM + 8 MB constant
   32.99 -            #   + 4 to avoid low-memory condition
  32.100 -            extra_mb = (2.4/1024) * (mem_kb/1024.0) + 12;
  32.101 -            extra_pages = int( math.ceil( extra_mb*1024 / page_kb ))
  32.102 -        return mem_kb + extra_pages * page_kb
  32.103 -
  32.104 -    def getDomainShadowMemory(self, mem_kb):
  32.105 -        """@return The minimum shadow memory required, in KiB, for a domain 
  32.106 -        with mem_kb KiB of RAM."""
  32.107 -        if os.uname()[4] in ('ia64', 'ppc64'):
  32.108 -            # Explicit shadow memory is not a concept 
  32.109 -            return 0
  32.110 -        else:
  32.111 -            # 1MB per vcpu plus 4Kib/Mib of RAM.  This is higher than 
  32.112 -            # the minimum that Xen would allocate if no value were given.
  32.113 -            return 1024 * self.vm.getVCpuCount() + mem_kb / 256
  32.114 -
  32.115      def register_shutdown_watch(self):
  32.116          """ add xen store watch on control/shutdown """
  32.117          self.shutdownWatch = xswatch(self.vm.dompath + "/control/shutdown", \
  32.118 @@ -417,15 +414,51 @@ class HVMImageHandler(ImageHandler):
  32.119  
  32.120          return 1 # Keep watching
  32.121  
  32.122 -"""Table of image handler classes for virtual machine images.  Indexed by
  32.123 -image type.
  32.124 -"""
  32.125 -imageHandlerClasses = {}
  32.126 +class IA64_HVM_ImageHandler(HVMImageHandler):
  32.127 +
  32.128 +    ostype = "hvm"
  32.129 +
  32.130 +    def getRequiredMemory(self, mem_kb):
  32.131 +        page_kb = 16
  32.132 +        # ROM size for guest firmware, ioreq page and xenstore page
  32.133 +        extra_pages = 1024 + 2
  32.134 +        return mem_kb + extra_pages * page_kb
  32.135 +
  32.136 +    def getRequiredShadowMemory(self, mem_kb):
  32.137 +        # Explicit shadow memory is not a concept 
  32.138 +        return 0
  32.139 +
  32.140 +class X86_HVM_ImageHandler(HVMImageHandler):
  32.141 +
  32.142 +    ostype = "hvm"
  32.143  
  32.144 +    def getRequiredMemory(self, mem_kb):
  32.145 +        page_kb = 4
  32.146 +        # This was derived emperically:
  32.147 +        #   2.4 MB overhead per 1024 MB RAM + 8 MB constant
  32.148 +        #   + 4 to avoid low-memory condition
  32.149 +        extra_mb = (2.4/1024) * (mem_kb/1024.0) + 12;
  32.150 +        extra_pages = int( math.ceil( extra_mb*1024 / page_kb ))
  32.151 +        return mem_kb + extra_pages * page_kb
  32.152  
  32.153 -for h in LinuxImageHandler, HVMImageHandler:
  32.154 -    imageHandlerClasses[h.ostype] = h
  32.155 +    def getRequiredShadowMemory(self, mem_kb):
  32.156 +        # 1MB per vcpu plus 4Kib/Mib of RAM.  This is higher than 
  32.157 +        # the minimum that Xen would allocate if no value were given.
  32.158 +        return 1024 * self.vm.getVCpuCount() + mem_kb / 256
  32.159  
  32.160 +_handlers = {
  32.161 +    "powerpc": {
  32.162 +        "linux": PPC_LinuxImageHandler,
  32.163 +    },
  32.164 +    "ia64": {
  32.165 +        "linux": LinuxImageHandler,
  32.166 +        "hvm": IA64_HVM_ImageHandler,
  32.167 +    },
  32.168 +    "x86": {
  32.169 +        "linux": LinuxImageHandler,
  32.170 +        "hvm": X86_HVM_ImageHandler,
  32.171 +    },
  32.172 +}
  32.173  
  32.174  def findImageHandlerClass(image):
  32.175      """Find the image handler class for an image config.
  32.176 @@ -433,10 +466,10 @@ def findImageHandlerClass(image):
  32.177      @param image config
  32.178      @return ImageHandler subclass or None
  32.179      """
  32.180 -    ty = sxp.name(image)
  32.181 -    if ty is None:
  32.182 +    type = sxp.name(image)
  32.183 +    if type is None:
  32.184          raise VmError('missing image type')
  32.185 -    imageClass = imageHandlerClasses.get(ty)
  32.186 -    if imageClass is None:
  32.187 -        raise VmError('unknown image type: ' + ty)
  32.188 -    return imageClass
  32.189 +    try:
  32.190 +        return _handlers[arch.type][type]
  32.191 +    except KeyError:
  32.192 +        raise VmError('unknown image type: ' + type)
    33.1 --- a/tools/python/xen/xend/server/DevController.py	Fri Sep 01 12:52:12 2006 -0600
    33.2 +++ b/tools/python/xen/xend/server/DevController.py	Fri Sep 01 13:04:02 2006 -0600
    33.3 @@ -207,6 +207,9 @@ class DevController:
    33.4  
    33.5          devid = int(devid)
    33.6  
    33.7 +        # Modify online status /before/ updating state (latter is watched by
    33.8 +        # drivers, so this ordering avoids a race).
    33.9 +        self.writeBackend(devid, 'online', "0")
   33.10          self.writeBackend(devid, 'state', str(xenbusState['Closing']))
   33.11  
   33.12  
   33.13 @@ -406,7 +409,8 @@ class DevController:
   33.14              'domain' : self.vm.getName(),
   33.15              'frontend' : frontpath,
   33.16              'frontend-id' : "%i" % self.vm.getDomid(),
   33.17 -            'state' : str(xenbusState['Initialising'])
   33.18 +            'state' : str(xenbusState['Initialising']),
   33.19 +            'online' : "1"
   33.20              })
   33.21  
   33.22          return (backpath, frontpath)
    34.1 --- a/tools/python/xen/xend/server/XMLRPCServer.py	Fri Sep 01 12:52:12 2006 -0600
    34.2 +++ b/tools/python/xen/xend/server/XMLRPCServer.py	Fri Sep 01 13:04:02 2006 -0600
    34.3 @@ -78,7 +78,8 @@ def get_log():
    34.4  methods = ['device_create', 'device_configure', 'destroyDevice',
    34.5             'getDeviceSxprs',
    34.6             'setMemoryTarget', 'setName', 'setVCpuCount', 'shutdown',
    34.7 -           'send_sysrq', 'getVCPUInfo', 'waitForDevices']
    34.8 +           'send_sysrq', 'getVCPUInfo', 'waitForDevices',
    34.9 +           'getRestartCount']
   34.10  
   34.11  exclude = ['domain_create', 'domain_restore']
   34.12  
    35.1 --- a/tools/python/xen/xend/server/blkif.py	Fri Sep 01 12:52:12 2006 -0600
    35.2 +++ b/tools/python/xen/xend/server/blkif.py	Fri Sep 01 13:04:02 2006 -0600
    35.3 @@ -52,10 +52,18 @@ class BlkifController(DevController):
    35.4          except ValueError:
    35.5              dev_type = "disk"
    35.6  
    35.7 -        try:
    35.8 -            (typ, params) = string.split(uname, ':', 1)
    35.9 -        except ValueError:
   35.10 -            (typ, params) = ("", "")
   35.11 +        if uname is None:
   35.12 +            if dev_type == 'cdrom':
   35.13 +                (typ, params) = ("", "")
   35.14 +            else:
   35.15 +                raise VmError(
   35.16 +                    'Block device must have physical details specified')
   35.17 +        else:
   35.18 +            try:
   35.19 +                (typ, params) = string.split(uname, ':', 1)
   35.20 +            except ValueError:
   35.21 +                (typ, params) = ("", "")
   35.22 +
   35.23          back = { 'dev'    : dev,
   35.24                   'type'   : typ,
   35.25                   'params' : params,
    36.1 --- a/tools/python/xen/xm/migrate.py	Fri Sep 01 12:52:12 2006 -0600
    36.2 +++ b/tools/python/xen/xm/migrate.py	Fri Sep 01 13:04:02 2006 -0600
    36.3 @@ -57,7 +57,8 @@ def main(argv):
    36.4          opts.usage()
    36.5          return
    36.6      if len(args) != 2:
    36.7 -        opts.err('Invalid arguments: ' + str(args))
    36.8 +        opts.usage()
    36.9 +        sys.exit(1)
   36.10      dom = args[0]
   36.11      dst = args[1]
   36.12      server.xend.domain.migrate(dom, dst, opts.vals.live, opts.vals.resource, opts.vals.port)
    37.1 --- a/tools/python/xen/xm/shutdown.py	Fri Sep 01 12:52:12 2006 -0600
    37.2 +++ b/tools/python/xen/xm/shutdown.py	Fri Sep 01 13:04:02 2006 -0600
    37.3 @@ -48,21 +48,48 @@ gopts.opt('reboot', short='R',
    37.4            fn=set_true, default=0,
    37.5            use='Shutdown and reboot.')
    37.6  
    37.7 +def wait_reboot(opts, doms, rcs):
    37.8 +    while doms:
    37.9 +        alive = server.xend.domains(0)
   37.10 +        reboot = []
   37.11 +        for d in doms:
   37.12 +            if d in alive:
   37.13 +                rc = server.xend.domain.getRestartCount(d)
   37.14 +                if rc == rcs[d]: continue
   37.15 +                reboot.append(d)
   37.16 +            else:
   37.17 +                opts.info("Domain %s destroyed for failed in rebooting" % d)
   37.18 +                doms.remove(d)
   37.19 +        for d in reboot:
   37.20 +            opts.info("Domain %s rebooted" % d)
   37.21 +            doms.remove(d)
   37.22 +        time.sleep(1)
   37.23 +    opts.info("All domains rebooted")
   37.24 +
   37.25 +def wait_shutdown(opts, doms):
   37.26 +    while doms:
   37.27 +        alive = server.xend.domains(0)
   37.28 +        dead = []
   37.29 +        for d in doms:
   37.30 +            if d in alive: continue
   37.31 +            dead.append(d)
   37.32 +        for d in dead:
   37.33 +            opts.info("Domain %s terminated" % d)
   37.34 +            doms.remove(d)
   37.35 +        time.sleep(1)
   37.36 +    opts.info("All domains terminated")
   37.37 +
   37.38  def shutdown(opts, doms, mode, wait):
   37.39 +    rcs = {}
   37.40      for d in doms:
   37.41 +        rcs[d] = server.xend.domain.getRestartCount(d)
   37.42          server.xend.domain.shutdown(d, mode)
   37.43 +
   37.44      if wait:
   37.45 -        while doms:
   37.46 -            alive = server.xend.domains(0)
   37.47 -            dead = []
   37.48 -            for d in doms:
   37.49 -                if d in alive: continue
   37.50 -                dead.append(d)
   37.51 -            for d in dead:
   37.52 -                opts.info("Domain %s terminated" % d)
   37.53 -                doms.remove(d)
   37.54 -            time.sleep(1)
   37.55 -        opts.info("All domains terminated")
   37.56 +        if mode == 'reboot':
   37.57 +            wait_reboot(opts, doms, rcs)
   37.58 +        else:
   37.59 +            wait_shutdown(opts, doms)
   37.60  
   37.61  def shutdown_mode(opts):
   37.62      if opts.vals.halt and opts.vals.reboot:
    38.1 --- a/xen/arch/ia64/xen/dom0_ops.c	Fri Sep 01 12:52:12 2006 -0600
    38.2 +++ b/xen/arch/ia64/xen/dom0_ops.c	Fri Sep 01 13:04:02 2006 -0600
    38.3 @@ -40,8 +40,8 @@ long arch_do_domctl(xen_domctl_t *op, XE
    38.4      {
    38.5          unsigned long i;
    38.6          struct domain *d = find_domain_by_id(op->domain);
    38.7 -        unsigned long start_page = op->u.getmemlist.max_pfns >> 32;
    38.8 -        unsigned long nr_pages = op->u.getmemlist.max_pfns & 0xffffffff;
    38.9 +        unsigned long start_page = op->u.getmemlist.start_pfn;
   38.10 +        unsigned long nr_pages = op->u.getmemlist.max_pfns;
   38.11          unsigned long mfn;
   38.12  
   38.13          if ( d == NULL ) {
    39.1 --- a/xen/arch/powerpc/Makefile	Fri Sep 01 12:52:12 2006 -0600
    39.2 +++ b/xen/arch/powerpc/Makefile	Fri Sep 01 13:04:02 2006 -0600
    39.3 @@ -6,6 +6,7 @@ subdir-$(HAS_PPC64) += powerpc64
    39.4  subdir-y += papr
    39.5  
    39.6  obj-y += audit.o
    39.7 +obj-y += backtrace.o
    39.8  obj-y += bitops.o
    39.9  obj-y += boot_of.o
   39.10  obj-y += dart.o
   39.11 @@ -19,19 +20,21 @@ obj-y += exceptions.o
   39.12  obj-y += external.o
   39.13  obj-y += float.o
   39.14  obj-y += hcalls.o
   39.15 -obj-y += htab.o
   39.16  obj-y += iommu.o
   39.17  obj-y += irq.o
   39.18  obj-y += mambo.o
   39.19 +obj-y += memory.o
   39.20  obj-y += mm.o
   39.21  obj-y += mpic.o
   39.22  obj-y += mpic_init.o
   39.23  obj-y += of-devtree.o
   39.24  obj-y += of-devwalk.o
   39.25  obj-y += ofd_fixup.o
   39.26 +obj-y += ofd_fixup_memory.o
   39.27  obj-y += physdev.o
   39.28  obj-y += rtas.o
   39.29  obj-y += setup.o
   39.30 +obj-y += shadow.o
   39.31  obj-y += smp.o
   39.32  obj-y += time.o
   39.33  obj-y += usercopy.o
   39.34 @@ -47,6 +50,7 @@ obj-y += elf32.o
   39.35  # These are extra warnings like for the arch/ppc directory but may not
   39.36  # allow the rest of the tree to build.
   39.37  PPC_C_WARNINGS += -Wundef -Wmissing-prototypes -Wmissing-declarations
   39.38 +PPC_C_WARNINGS += -Wshadow
   39.39  CFLAGS += $(PPC_C_WARNINGS)
   39.40  
   39.41  LINK=0x400000
   39.42 @@ -91,8 +95,27 @@ boot_of.o: CFLAGS += -DCMDLINE="\"$(IMAG
   39.43  start.o: boot/start.S
   39.44  	$(CC) $(CFLAGS) -D__ASSEMBLY__ -c $< -o $@
   39.45  
   39.46 -$(TARGET)-syms: start.o $(ALL_OBJS) xen.lds
   39.47 -	$(CC) $(CFLAGS) $(OMAGIC) -Wl,-Ttext,$(xen_link_base),-T,xen.lds start.o $(ALL_OBJS) -o $@
   39.48 +TARGET_OPTS = $(OMAGIC) -Wl,-Ttext,$(xen_link_base),-T,xen.lds
   39.49 +TARGET_OPTS += start.o $(ALL_OBJS)
   39.50 +
   39.51 +.xen-syms: start.o $(ALL_OBJS) xen.lds
   39.52 +	$(CC) $(CFLAGS) $(TARGET_OPTS) -o $@
   39.53 +
   39.54 +NM=$(CROSS_COMPILE)nm
   39.55 +new_nm := $(shell if $(NM) --help 2>&1 | grep -- '--synthetic' > /dev/null; then echo y; else echo n; fi)
   39.56 +
   39.57 +ifeq ($(new_nm),y)
   39.58 +NM		:= $(NM) --synthetic
   39.59 +endif
   39.60 +
   39.61 +xen-syms.S: .xen-syms
   39.62 +	$(NM) -n $^ | $(BASEDIR)/tools/symbols > $@
   39.63 +
   39.64 +xen-syms.o: xen-syms.S
   39.65 +	$(CC) $(CFLAGS) -D__ASSEMBLY__ -c $< -o $@
   39.66 +
   39.67 +$(TARGET)-syms: start.o $(ALL_OBJS) xen-syms.o xen.lds
   39.68 +	$(CC) $(CFLAGS) $(TARGET_OPTS) xen-syms.o -o $@
   39.69  
   39.70  $(TARGET).bin: $(TARGET)-syms
   39.71  	$(CROSS_COMPILE)objcopy --output-target=binary $< $@
   39.72 @@ -122,4 +145,4 @@ dom0.bin: $(DOM0_IMAGE)
   39.73  
   39.74  clean::
   39.75  	$(MAKE) -f $(BASEDIR)/Rules.mk -C of_handler clean
   39.76 -	rm -f firmware firmware_image dom0.bin
   39.77 +	rm -f firmware firmware_image dom0.bin .xen-syms
    40.1 --- a/xen/arch/powerpc/Rules.mk	Fri Sep 01 12:52:12 2006 -0600
    40.2 +++ b/xen/arch/powerpc/Rules.mk	Fri Sep 01 13:04:02 2006 -0600
    40.3 @@ -4,7 +4,7 @@ CC := $(CROSS_COMPILE)gcc
    40.4  LD := $(CROSS_COMPILE)ld
    40.5  
    40.6  # These are goodess that applies to all source.
    40.7 -C_WARNINGS := -Wpointer-arith -Wredundant-decls
    40.8 +C_WARNINGS := -Wredundant-decls
    40.9  
   40.10  # _no_ common code can have packed data structures or we are in touble.
   40.11  C_WARNINGS += -Wpacked
    41.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
    41.2 +++ b/xen/arch/powerpc/backtrace.c	Fri Sep 01 13:04:02 2006 -0600
    41.3 @@ -0,0 +1,193 @@
    41.4 +/*
    41.5 + * Routines providing a simple monitor for use on the PowerMac.
    41.6 + *
    41.7 + * Copyright (C) 1996-2005 Paul Mackerras.
    41.8 + *
    41.9 + *      This program is free software; you can redistribute it and/or
   41.10 + *      modify it under the terms of the GNU General Public License
   41.11 + *      as published by the Free Software Foundation; either version
   41.12 + *      2 of the License, or (at your option) any later version.
   41.13 + */
   41.14 +
   41.15 +#include <xen/config.h>
   41.16 +#include <xen/lib.h>
   41.17 +#include <xen/console.h>
   41.18 +#include <xen/sched.h>
   41.19 +#include <xen/symbols.h>
   41.20 +
   41.21 +static char namebuf[KSYM_NAME_LEN+1];
   41.22 +
   41.23 +/* Shamelessly lifted from Linux Xmon try to keep pristene */
   41.24 +#ifdef __powerpc64__
   41.25 +#define LRSAVE_OFFSET		0x10
   41.26 +#define REG_FRAME_MARKER	0x7265677368657265ul	/* "regshere" */
   41.27 +#define MARKER_OFFSET		0x60
   41.28 +#define REGS_OFFSET		0x70
   41.29 +#define REG "%016lX"
   41.30 +#else
   41.31 +#define LRSAVE_OFFSET		4
   41.32 +#define REG_FRAME_MARKER	0x72656773
   41.33 +#define MARKER_OFFSET		8
   41.34 +#define REGS_OFFSET		16
   41.35 +#define REG "%08lX"
   41.36 +#endif
   41.37 +
   41.38 +#define TRAP(regs) ((regs)->entry_vector & ~0xF)
   41.39 +static int xmon_depth_to_print = 64;
   41.40 +
   41.41 +/* Very cheap human name for vector lookup. */
   41.42 +static
   41.43 +const char *getvecname(unsigned long vec)
   41.44 +{
   41.45 +	char *ret;
   41.46 +
   41.47 +	switch (vec) {
   41.48 +	case 0x100:	ret = "(System Reset)"; break;
   41.49 +	case 0x200:	ret = "(Machine Check)"; break;
   41.50 +	case 0x300:	ret = "(Data Access)"; break;
   41.51 +	case 0x380:	ret = "(Data SLB Access)"; break;
   41.52 +	case 0x400:	ret = "(Instruction Access)"; break;
   41.53 +	case 0x480:	ret = "(Instruction SLB Access)"; break;
   41.54 +	case 0x500:	ret = "(Hardware Interrupt)"; break;
   41.55 +	case 0x600:	ret = "(Alignment)"; break;
   41.56 +	case 0x700:	ret = "(Program Check)"; break;
   41.57 +	case 0x800:	ret = "(FPU Unavailable)"; break;
   41.58 +	case 0x900:	ret = "(Decrementer)"; break;
   41.59 +	case 0xc00:	ret = "(System Call)"; break;
   41.60 +	case 0xd00:	ret = "(Single Step)"; break;
   41.61 +	case 0xf00:	ret = "(Performance Monitor)"; break;
   41.62 +	case 0xf20:	ret = "(Altivec Unavailable)"; break;
   41.63 +	case 0x1300:	ret = "(Instruction Breakpoint)"; break;
   41.64 +	default: ret = "";
   41.65 +	}
   41.66 +	return ret;
   41.67 +}
   41.68 +
   41.69 +static int mread(unsigned long adrs, void *buf, int size)
   41.70 +{
   41.71 +    memcpy(buf, (void *)adrs, size);
   41.72 +    return size;
   41.73 +}
   41.74 +
   41.75 +static void get_function_bounds(unsigned long pc, unsigned long *startp,
   41.76 +				unsigned long *endp)
   41.77 +{
   41.78 +    unsigned long size, offset;
   41.79 +	const char *name;
   41.80 +
   41.81 +    *startp = *endp = 0;
   41.82 +	if (pc == 0)
   41.83 +		return;
   41.84 +
   41.85 +    name = symbols_lookup(pc, &size, &offset, namebuf);
   41.86 +    if (name != NULL) {
   41.87 +			*startp = pc - offset;
   41.88 +			*endp = pc - offset + size;
   41.89 +    }
   41.90 +}
   41.91 +    
   41.92 +/* Print an address in numeric and symbolic form (if possible) */
   41.93 +static void xmon_print_symbol(unsigned long address, const char *mid,
   41.94 +                              const char *after)
   41.95 +{
   41.96 +	const char *name = NULL;
   41.97 +	unsigned long offset, size;
   41.98 +
   41.99 +	printf(REG, address);
  41.100 +
  41.101 +    name = symbols_lookup(address, &size, &offset, namebuf);
  41.102 +	if (name) {
  41.103 +		printf("%s%s+%#lx/%#lx", mid, name, offset, size);
  41.104 +	}
  41.105 +	printf("%s", after);
  41.106 +}
  41.107 +
  41.108 +static void backtrace(
  41.109 +    unsigned long sp, unsigned long lr, unsigned long pc)
  41.110 +{
  41.111 +	unsigned long ip;
  41.112 +	unsigned long newsp;
  41.113 +	unsigned long marker;
  41.114 +	int count = 0;
  41.115 +	struct cpu_user_regs regs;
  41.116 +
  41.117 +	do {
  41.118 +		if (sp > xenheap_phys_end) {
  41.119 +			if (sp != 0)
  41.120 +				printf("SP (%lx) is not in xen space\n", sp);
  41.121 +			break;
  41.122 +		}
  41.123 +
  41.124 +		if (!mread(sp + LRSAVE_OFFSET, &ip, sizeof(unsigned long))
  41.125 +		    || !mread(sp, &newsp, sizeof(unsigned long))) {
  41.126 +			printf("Couldn't read stack frame at %lx\n", sp);
  41.127 +			break;
  41.128 +		}
  41.129 +
  41.130 +		/*
  41.131 +		 * For the first stack frame, try to work out if
  41.132 +		 * LR and/or the saved LR value in the bottommost
  41.133 +		 * stack frame are valid.
  41.134 +		 */
  41.135 +		if ((pc | lr) != 0) {
  41.136 +			unsigned long fnstart, fnend;
  41.137 +			unsigned long nextip;
  41.138 +			int printip = 1;
  41.139 +
  41.140 +			get_function_bounds(pc, &fnstart, &fnend);
  41.141 +			nextip = 0;
  41.142 +			if (newsp > sp)
  41.143 +				mread(newsp + LRSAVE_OFFSET, &nextip,
  41.144 +				      sizeof(unsigned long));
  41.145 +			if (lr == ip) {
  41.146 +				if (lr >= xenheap_phys_end
  41.147 +				    || (fnstart <= lr && lr < fnend))
  41.148 +					printip = 0;
  41.149 +			} else if (lr == nextip) {
  41.150 +				printip = 0;
  41.151 +			} else if (lr < xenheap_phys_end
  41.152 +                       && !(fnstart <= lr && lr < fnend)) {
  41.153 +				printf("[link register   ] ");
  41.154 +				xmon_print_symbol(lr, " ", "\n");
  41.155 +			}
  41.156 +			if (printip) {
  41.157 +				printf("["REG"] ", sp);
  41.158 +				xmon_print_symbol(ip, " ", " (unreliable)\n");
  41.159 +			}
  41.160 +			pc = lr = 0;
  41.161 +
  41.162 +		} else {
  41.163 +			printf("["REG"] ", sp);
  41.164 +			xmon_print_symbol(ip, " ", "\n");
  41.165 +		}
  41.166 +
  41.167 +		/* Look for "regshere" marker to see if this is
  41.168 +		   an exception frame. */
  41.169 +		if (mread(sp + MARKER_OFFSET, &marker, sizeof(unsigned long))
  41.170 +		    && marker == REG_FRAME_MARKER) {
  41.171 +			if (mread(sp + REGS_OFFSET, &regs, sizeof(regs))
  41.172 +			    != sizeof(regs)) {
  41.173 +				printf("Couldn't read registers at %lx\n",
  41.174 +				       sp + REGS_OFFSET);
  41.175 +				break;
  41.176 +			}
  41.177 +            printf("--- Exception: %x %s at ", regs.entry_vector,
  41.178 +			       getvecname(TRAP(&regs)));
  41.179 +			pc = regs.pc;
  41.180 +			lr = regs.lr;
  41.181 +			xmon_print_symbol(pc, " ", "\n");
  41.182 +		}
  41.183 +
  41.184 +		if (newsp == 0)
  41.185 +			break;
  41.186 +        
  41.187 +		sp = newsp;
  41.188 +	} while (count++ < xmon_depth_to_print);
  41.189 +}
  41.190 +
  41.191 +void show_backtrace(ulong sp, ulong lr, ulong pc)
  41.192 +{
  41.193 +    console_start_sync();
  41.194 +    backtrace(sp, lr, pc);
  41.195 +    console_end_sync();
  41.196 +}
    42.1 --- a/xen/arch/powerpc/boot_of.c	Fri Sep 01 12:52:12 2006 -0600
    42.2 +++ b/xen/arch/powerpc/boot_of.c	Fri Sep 01 13:04:02 2006 -0600
    42.3 @@ -26,11 +26,15 @@
    42.4  #include <xen/spinlock.h>
    42.5  #include <xen/serial.h>
    42.6  #include <xen/time.h>
    42.7 +#include <xen/sched.h>
    42.8  #include <asm/page.h>
    42.9  #include <asm/io.h>
   42.10  #include "exceptions.h"
   42.11  #include "of-devtree.h"
   42.12  
   42.13 +/* Secondary processors use this for handshaking with main processor.  */
   42.14 +volatile unsigned int __spin_ack;
   42.15 +
   42.16  static ulong of_vec;
   42.17  static ulong of_msr;
   42.18  static int of_out;
   42.19 @@ -322,17 +326,18 @@ static void __init of_test(const char *o
   42.20      }
   42.21  }
   42.22  
   42.23 -static int __init of_claim(void * virt, u32 size)
   42.24 +static int __init of_claim(u32 virt, u32 size, u32 align)
   42.25  {
   42.26      int rets[1] = { OF_FAILURE };
   42.27      
   42.28 -    of_call("claim", 3, 1, rets, virt, size, 0/*align*/);
   42.29 +    of_call("claim", 3, 1, rets, virt, size, align);
   42.30      if (rets[0] == OF_FAILURE) {
   42.31 -        DBG("%s 0x%p 0x%08x -> FAIL\n", __func__, virt, size);
   42.32 +        DBG("%s 0x%08x 0x%08x  0x%08x -> FAIL\n", __func__, virt, size, align);
   42.33          return OF_FAILURE;
   42.34      }
   42.35  
   42.36 -    DBG("%s 0x%p 0x%08x -> 0x%x\n", __func__, virt, size, rets[0]);
   42.37 +    DBG("%s 0x%08x 0x%08x  0x%08x -> 0x%08x\n", __func__, virt, size, align,
   42.38 +        rets[0]);
   42.39      return rets[0];
   42.40  }
   42.41  
   42.42 @@ -683,32 +688,53 @@ static int boot_of_fixup_chosen(void *me
   42.43  }
   42.44  
   42.45  static ulong space_base;
   42.46 -static ulong find_space(u32 size, ulong align, multiboot_info_t *mbi)
   42.47 +
   42.48 +/*
   42.49 + * The following function is necessary because we cannot depend on all
   42.50 + * FW to actually allocate us any space, so we look for it _hoping_
   42.51 + * that at least is will fail if we try to claim something that
   42.52 + * belongs to FW.  This hope does not seem to be true on some version
   42.53 + * of PIBS.
   42.54 + */
   42.55 +static ulong find_space(u32 size, u32 align, multiboot_info_t *mbi)
   42.56  {
   42.57      memory_map_t *map = (memory_map_t *)((ulong)mbi->mmap_addr);
   42.58      ulong eomem = ((u64)map->length_high << 32) | (u64)map->length_low;
   42.59      ulong base;
   42.60  
   42.61 -    of_printf("%s base=0x%016lx  eomem=0x%016lx  size=0x%08x  align=0x%lx\n",
   42.62 +    if (size == 0)
   42.63 +        return 0;
   42.64 +
   42.65 +    if (align == 0)
   42.66 +        of_panic("cannot call %s() with align of 0\n", __func__);
   42.67 +
   42.68 +#ifdef BROKEN_CLAIM_WORKAROUND
   42.69 +    {
   42.70 +        static int broken_claim;
   42.71 +        if (!broken_claim) {
   42.72 +            /* just try and claim it to the FW chosen address */
   42.73 +            base = of_claim(0, size, align);
   42.74 +            if (base != OF_FAILURE)
   42.75 +                return base;
   42.76 +            of_printf("%s: Firmware does not allocate memory for you\n",
   42.77 +                      __func__);
   42.78 +            broken_claim = 1;
   42.79 +        }
   42.80 +    }
   42.81 +#endif
   42.82 +
   42.83 +    of_printf("%s base=0x%016lx  eomem=0x%016lx  size=0x%08x  align=0x%x\n",
   42.84                      __func__, space_base, eomem, size, align);
   42.85      base = ALIGN_UP(space_base, PAGE_SIZE);
   42.86 -    if ((base + size) >= 0x4000000) return 0;
   42.87 -    if (base + size > eomem) of_panic("not enough RAM\n");
   42.88  
   42.89 -    if (size == 0) return base;
   42.90 -    if (of_claim((void*)base, size) != OF_FAILURE) {
   42.91 -        space_base = base + size;
   42.92 -        return base;
   42.93 -    } else {
   42.94 -        for(base += 0x100000; (base+size) < 0x4000000; base += 0x100000) {
   42.95 -            of_printf("Trying 0x%016lx\n", base);
   42.96 -            if (of_claim((void*)base, size) != OF_FAILURE) {
   42.97 -                space_base = base + size;
   42.98 -                return base;
   42.99 -            }
  42.100 +    while ((base + size) < rma_size(cpu_default_rma_order_pages())) {
  42.101 +        if (of_claim(base, size, 0) != OF_FAILURE) {
  42.102 +            space_base = base + size;
  42.103 +            return base;
  42.104          }
  42.105 -        return 0;
  42.106 +        base += (PAGE_SIZE >  align) ? PAGE_SIZE : align;
  42.107      }
  42.108 +    of_panic("Cannot find memory in the RMA\n");
  42.109  }
  42.110  
  42.111  /* PIBS Version 1.05.0000 04/26/2005 has an incorrect /ht/isa/ranges
  42.112 @@ -834,9 +860,8 @@ static void boot_of_module(ulong r3, ulo
  42.113      static module_t mods[3];
  42.114      void *oftree;
  42.115      ulong oftree_sz = 48 * PAGE_SIZE;
  42.116 -    char *mod0_start;
  42.117 +    ulong mod0_start;
  42.118      ulong mod0_size;
  42.119 -    ulong mod0;
  42.120      static const char sepr[] = " -- ";
  42.121      extern char dom0_start[] __attribute__ ((weak));
  42.122      extern char dom0_size[] __attribute__ ((weak));
  42.123 @@ -844,60 +869,49 @@ static void boot_of_module(ulong r3, ulo
  42.124  
  42.125      if ((r3 > 0) && (r4 > 0)) {
  42.126          /* was it handed to us in registers ? */
  42.127 -        mod0_start = (void *)r3;
  42.128 +        mod0_start = r3;
  42.129          mod0_size = r4;
  42.130 +            of_printf("%s: Dom0 was loaded and found using r3/r4:"
  42.131 +                      "0x%lx[size 0x%lx]\n",
  42.132 +                      __func__, mod0_start, mod0_size);
  42.133      } else {
  42.134          /* see if it is in the boot params */
  42.135          p = strstr((char *)((ulong)mbi->cmdline), "dom0_start=");
  42.136          if ( p != NULL) {
  42.137              p += 11;
  42.138 -            mod0_start = (char *)simple_strtoul(p, NULL, 0);
  42.139 +            mod0_start = simple_strtoul(p, NULL, 0);
  42.140  
  42.141              p = strstr((char *)((ulong)mbi->cmdline), "dom0_size=");
  42.142              p += 10;
  42.143              mod0_size = simple_strtoul(p, NULL, 0);
  42.144 -
  42.145 -            of_printf("mod0: %o %c %c %c\n",
  42.146 -                      mod0_start[0],
  42.147 -                      mod0_start[1],
  42.148 -                      mod0_start[2],
  42.149 -                      mod0_start[3]);
  42.150 -
  42.151 +            of_printf("%s: Dom0 was loaded and found using cmdline:"
  42.152 +                      "0x%lx[size 0x%lx]\n",
  42.153 +                      __func__, mod0_start, mod0_size);
  42.154          } else if ( ((ulong)dom0_start != 0) && ((ulong)dom0_size != 0) ) {
  42.155              /* was it linked in ? */
  42.156          
  42.157 -            mod0_start = dom0_start;
  42.158 +            mod0_start = (ulong)dom0_start;
  42.159              mod0_size = (ulong)dom0_size;
  42.160 -            of_printf("%s: linked in module copied after _end "
  42.161 -                      "(start 0x%p size 0x%lx)\n",
  42.162 +            of_printf("%s: Dom0 is linked in: 0x%lx[size 0x%lx]\n",
  42.163                        __func__, mod0_start, mod0_size);
  42.164          } else {
  42.165 -            mod0_start = _end;
  42.166 +            mod0_start = (ulong)_end;
  42.167              mod0_size = 0;
  42.168 +            of_printf("%s: FYI Dom0 is unknown, will be caught later\n",
  42.169 +                      __func__);
  42.170          }
  42.171      }
  42.172  
  42.173 -    space_base = (ulong)_end;
  42.174 -    mod0 = find_space(mod0_size, PAGE_SIZE, mbi);
  42.175 +    if (mod0_size > 0) {
  42.176 +        const char *c = (const char *)mod0_start;
  42.177  
  42.178 -    /* three cases
  42.179 -     * 1) mod0_size is not 0 and the image can be copied
  42.180 -     * 2) mod0_size is not 0 and the image cannot be copied
  42.181 -     * 3) mod0_size is 0
  42.182 -     */
  42.183 -    if (mod0_size > 0) {
  42.184 -        if (mod0 != 0) {
  42.185 -            memcpy((void *)mod0, mod0_start, mod0_size);
  42.186 -            mods[0].mod_start = mod0;
  42.187 -            mods[0].mod_end = mod0 + mod0_size;
  42.188 -        } else {
  42.189 -            of_panic("No space to copy mod0\n");
  42.190 -        }
  42.191 -    } else {
  42.192 -        mods[0].mod_start = mod0;
  42.193 -        mods[0].mod_end = mod0;
  42.194 +        of_printf("mod0: %o %c %c %c\n", c[0], c[1], c[2], c[3]);
  42.195      }
  42.196  
  42.197 +    space_base = (ulong)_end;
  42.198 +    mods[0].mod_start = mod0_start;
  42.199 +    mods[0].mod_end = mod0_start + mod0_size;
  42.200 +
  42.201      of_printf("%s: mod[0] @ 0x%016x[0x%x]\n", __func__,
  42.202                mods[0].mod_start, mods[0].mod_end);
  42.203      p = strstr((char *)(ulong)mbi->cmdline, sepr);
  42.204 @@ -909,16 +923,23 @@ static void boot_of_module(ulong r3, ulo
  42.205  
  42.206      /* snapshot the tree */
  42.207      oftree = (void*)find_space(oftree_sz, PAGE_SIZE, mbi);
  42.208 -    if (oftree == 0) of_panic("Could not allocate OFD tree\n");
  42.209 +    if (oftree == 0)
  42.210 +        of_panic("Could not allocate OFD tree\n");
  42.211  
  42.212      of_printf("creating oftree\n");
  42.213      of_test("package-to-path");
  42.214 -    ofd_create(oftree, oftree_sz);
  42.215 +    oftree = ofd_create(oftree, oftree_sz);
  42.216      pkg_save(oftree);
  42.217  
  42.218 +    if (ofd_size(oftree) > oftree_sz)
  42.219 +         of_panic("Could not fit all of native devtree\n");
  42.220 +
  42.221      boot_of_fixup_refs(oftree);
  42.222      boot_of_fixup_chosen(oftree);
  42.223  
  42.224 +    if (ofd_size(oftree) > oftree_sz)
  42.225 +         of_panic("Could not fit all devtree fixups\n");
  42.226 +
  42.227      ofd_walk(oftree, OFD_ROOT, /* add_hype_props */ NULL, 2);
  42.228  
  42.229      mods[1].mod_start = (ulong)oftree;
  42.230 @@ -937,7 +958,7 @@ static void boot_of_module(ulong r3, ulo
  42.231  static int __init boot_of_cpus(void)
  42.232  {
  42.233      int cpus;
  42.234 -    int cpu;
  42.235 +    int cpu, bootcpu, logical;
  42.236      int result;
  42.237      u32 cpu_clock[2];
  42.238  
  42.239 @@ -962,10 +983,68 @@ static int __init boot_of_cpus(void)
  42.240      cpu_khz /= 1000;
  42.241      of_printf("OF: clock-frequency = %ld KHz\n", cpu_khz);
  42.242  
  42.243 -    /* FIXME: should not depend on the boot CPU bring the first child */
  42.244 +    /* Look up which CPU we are running on right now.  */
  42.245 +    result = of_getprop(bof_chosen, "cpu", &bootcpu, sizeof (bootcpu));
  42.246 +    if (result == OF_FAILURE)
  42.247 +        of_panic("Failed to look up boot cpu\n");
  42.248 +
  42.249      cpu = of_getpeer(cpu);
  42.250 -    while (cpu > 0) {
  42.251 -        of_start_cpu(cpu, (ulong)spin_start, 0);
  42.252 +
  42.253 +    /* We want a continuous logical cpu number space.  */
  42.254 +    cpu_set(0, cpu_present_map);
  42.255 +    cpu_set(0, cpu_online_map);
  42.256 +    cpu_set(0, cpu_possible_map);
  42.257 +
  42.258 +    /* Spin up all CPUS, even if there are more than NR_CPUS, because
  42.259 +     * Open Firmware has them spinning on cache lines which will
  42.260 +     * eventually be scrubbed, which could lead to random CPU activation.
  42.261 +     */
  42.262 +    for (logical = 1; cpu > 0; logical++) {
  42.263 +        unsigned int cpuid, ping, pong;
  42.264 +        unsigned long now, then, timeout;
  42.265 +
  42.266 +        if (cpu == bootcpu) {
  42.267 +            of_printf("skipping boot cpu!\n");
  42.268 +            continue;
  42.269 +        }
  42.270 +
  42.271 +        result = of_getprop(cpu, "reg", &cpuid, sizeof(cpuid));
  42.272 +        if (result == OF_FAILURE)
  42.273 +            of_panic("cpuid lookup failed\n");
  42.274 +
  42.275 +        of_printf("spinning up secondary processor #%d: ", logical);
  42.276 +
  42.277 +        __spin_ack = ~0x0;
  42.278 +        ping = __spin_ack;
  42.279 +        pong = __spin_ack;
  42.280 +        of_printf("ping = 0x%x: ", ping);
  42.281 +
  42.282 +        mb();
  42.283 +        result = of_start_cpu(cpu, (ulong)spin_start, logical);
  42.284 +        if (result == OF_FAILURE)
  42.285 +            of_panic("start cpu failed\n");
  42.286 +
  42.287 +        /* We will give the secondary processor five seconds to reply.  */
  42.288 +        then = mftb();
  42.289 +        timeout = then + (5 * timebase_freq);
  42.290 +
  42.291 +        do {
  42.292 +            now = mftb();
  42.293 +            if (now >= timeout) {
  42.294 +                of_printf("BROKEN: ");
  42.295 +                break;
  42.296 +            }
  42.297 +
  42.298 +            mb();
  42.299 +            pong = __spin_ack;
  42.300 +        } while (pong == ping);
  42.301 +        of_printf("pong = 0x%x\n", pong);
  42.302 +
  42.303 +        if (pong != ping) {
  42.304 +            cpu_set(logical, cpu_present_map);
  42.305 +            cpu_set(logical, cpu_possible_map);
  42.306 +        }
  42.307 +
  42.308          cpu = of_getpeer(cpu);
  42.309      }
  42.310      return 1;
  42.311 @@ -1013,6 +1092,7 @@ multiboot_info_t __init *boot_of_init(
  42.312      boot_of_rtas();
  42.313  
  42.314      /* end of OF */
  42.315 +    of_printf("Quiescing Open Firmware ...\n");
  42.316      of_call("quiesce", 0, 0, NULL);
  42.317  
  42.318      return &mbi;
    43.1 --- a/xen/arch/powerpc/dart_u3.c	Fri Sep 01 12:52:12 2006 -0600
    43.2 +++ b/xen/arch/powerpc/dart_u3.c	Fri Sep 01 13:04:02 2006 -0600
    43.3 @@ -55,10 +55,10 @@ static void u3_inv_all(void)
    43.4          dc.reg.dc_invtlb = 1;
    43.5          out_32(dart_ctl_reg, dc.dc_word);
    43.6  
    43.7 -    do {
    43.8 -        dc.dc_word = in_32(dart_ctl_reg);
    43.9 -        r++;
   43.10 -    } while ((dc.reg.dc_invtlb == 1) && (r < (1 << l)));
   43.11 +        do {
   43.12 +            dc.dc_word = in_32(dart_ctl_reg);
   43.13 +            r++;
   43.14 +        } while ((dc.reg.dc_invtlb == 1) && (r < (1 << l)));
   43.15  
   43.16          if (r == (1 << l)) {
   43.17              if (l < 4) {
    44.1 --- a/xen/arch/powerpc/dom0_ops.c	Fri Sep 01 12:52:12 2006 -0600
    44.2 +++ b/xen/arch/powerpc/dom0_ops.c	Fri Sep 01 13:04:02 2006 -0600
    44.3 @@ -23,10 +23,12 @@
    44.4  #include <xen/lib.h>
    44.5  #include <xen/sched.h>
    44.6  #include <xen/guest_access.h>
    44.7 +#include <xen/shadow.h>
    44.8  #include <public/xen.h>
    44.9  #include <public/domctl.h>
   44.10  #include <public/sysctl.h>
   44.11  
   44.12 +void arch_getdomaininfo_ctxt(struct vcpu *, vcpu_guest_context_t *);
   44.13  void arch_getdomaininfo_ctxt(struct vcpu *v, vcpu_guest_context_t *c)
   44.14  { 
   44.15      memcpy(&c->user_regs, &v->arch.ctxt, sizeof(struct cpu_user_regs));
   44.16 @@ -34,6 +36,8 @@ void arch_getdomaininfo_ctxt(struct vcpu
   44.17  }
   44.18  
   44.19  long arch_do_domctl(struct xen_domctl *domctl,
   44.20 +                    XEN_GUEST_HANDLE(xen_domctl_t) u_domctl);
   44.21 +long arch_do_domctl(struct xen_domctl *domctl,
   44.22                      XEN_GUEST_HANDLE(xen_domctl_t) u_domctl)
   44.23  {
   44.24      long ret = 0;
   44.25 @@ -75,6 +79,19 @@ long arch_do_domctl(struct xen_domctl *d
   44.26          }
   44.27      }
   44.28      break;
   44.29 +    case XEN_DOMCTL_shadow_op:
   44.30 +    {
   44.31 +        struct domain *d;
   44.32 +        ret = -ESRCH;
   44.33 +        d = find_domain_by_id(domctl->domain);
   44.34 +        if ( d != NULL )
   44.35 +        {
   44.36 +            ret = shadow_domctl(d, &domctl->u.shadow_op, u_domctl);
   44.37 +            put_domain(d);
   44.38 +            copy_to_guest(u_domctl, domctl, 1);
   44.39 +        } 
   44.40 +    }
   44.41 +    break;
   44.42  
   44.43      default:
   44.44          ret = -ENOSYS;
   44.45 @@ -85,6 +102,8 @@ long arch_do_domctl(struct xen_domctl *d
   44.46  }
   44.47  
   44.48  long arch_do_sysctl(struct xen_sysctl *sysctl,
   44.49 +                    XEN_GUEST_HANDLE(xen_sysctl_t) u_sysctl);
   44.50 +long arch_do_sysctl(struct xen_sysctl *sysctl,
   44.51                      XEN_GUEST_HANDLE(xen_sysctl_t) u_sysctl)
   44.52  {
   44.53      long ret = 0;
   44.54 @@ -109,6 +128,7 @@ long arch_do_sysctl(struct xen_sysctl *s
   44.55      break;
   44.56  
   44.57      default:
   44.58 +        printk("%s: unsupported sysctl: 0x%x\n", __func__, (sysctl->cmd));
   44.59          ret = -ENOSYS;
   44.60          break;
   44.61      }
    45.1 --- a/xen/arch/powerpc/domain.c	Fri Sep 01 12:52:12 2006 -0600
    45.2 +++ b/xen/arch/powerpc/domain.c	Fri Sep 01 13:04:02 2006 -0600
    45.3 @@ -27,6 +27,8 @@
    45.4  #include <xen/domain.h>
    45.5  #include <xen/console.h>
    45.6  #include <xen/shutdown.h>
    45.7 +#include <xen/shadow.h>
    45.8 +#include <xen/mm.h>
    45.9  #include <asm/htab.h>
   45.10  #include <asm/current.h>
   45.11  #include <asm/hcalls.h>
   45.12 @@ -75,7 +77,8 @@ int arch_domain_create(struct domain *d)
   45.13  {
   45.14      unsigned long rma_base;
   45.15      unsigned long rma_sz;
   45.16 -    uint htab_order;
   45.17 +    uint rma_order_pages;
   45.18 +    int rc;
   45.19  
   45.20      if (d->domain_id == IDLE_DOMAIN_ID) {
   45.21          d->shared_info = (void *)alloc_xenheap_page();
   45.22 @@ -84,44 +87,31 @@ int arch_domain_create(struct domain *d)
   45.23          return 0;
   45.24      }
   45.25  
   45.26 -    d->arch.rma_order = cpu_rma_order();
   45.27 -    rma_sz = rma_size(d->arch.rma_order);
   45.28 -
   45.29      /* allocate the real mode area */
   45.30 -    d->max_pages = 1UL << d->arch.rma_order;
   45.31 +    rma_order_pages = cpu_default_rma_order_pages();
   45.32 +    d->max_pages = 1UL << rma_order_pages;
   45.33      d->tot_pages = 0;
   45.34 -    d->arch.rma_page = alloc_domheap_pages(d, d->arch.rma_order, 0);
   45.35 -    if (NULL == d->arch.rma_page)
   45.36 -        return 1;
   45.37 +
   45.38 +    rc = allocate_rma(d, rma_order_pages);
   45.39 +    if (rc)
   45.40 +        return rc;
   45.41      rma_base = page_to_maddr(d->arch.rma_page);
   45.42 -
   45.43 -    BUG_ON(rma_base & (rma_sz - 1)); /* check alignment */
   45.44 -
   45.45 -    printk("clearing RMO: 0x%lx[0x%lx]\n", rma_base, rma_sz);
   45.46 -    memset((void *)rma_base, 0, rma_sz);
   45.47 +    rma_sz = rma_size(rma_order_pages);
   45.48  
   45.49      d->shared_info = (shared_info_t *)
   45.50          (rma_addr(&d->arch, RMA_SHARED_INFO) + rma_base);
   45.51  
   45.52 -    d->arch.large_page_sizes = 1;
   45.53 -    d->arch.large_page_shift[0] = 24; /* 16 M for 970s */
   45.54 +    d->arch.large_page_sizes = cpu_large_page_orders(
   45.55 +        d->arch.large_page_order, ARRAY_SIZE(d->arch.large_page_order));
   45.56  
   45.57 -    /* FIXME: we need to the the maximum addressible memory for this
   45.58 -     * domain to calculate this correctly. It should probably be set
   45.59 -     * by the managment tools */
   45.60 -    htab_order = d->arch.rma_order - 6; /* (1/64) */
   45.61 -    if (test_bit(_DOMF_privileged, &d->domain_flags)) {
   45.62 -        /* bump the htab size of privleged domains */
   45.63 -        ++htab_order;
   45.64 -    }
   45.65 -    htab_alloc(d, htab_order);
   45.66 +    INIT_LIST_HEAD(&d->arch.extent_list);
   45.67  
   45.68      return 0;
   45.69  }
   45.70  
   45.71  void arch_domain_destroy(struct domain *d)
   45.72  {
   45.73 -    htab_free(d);
   45.74 +    shadow_teardown(d);
   45.75  }
   45.76  
   45.77  void machine_halt(void)
   45.78 @@ -163,6 +153,16 @@ int arch_set_info_guest(struct vcpu *v, 
   45.79  { 
   45.80      memcpy(&v->arch.ctxt, &c->user_regs, sizeof(c->user_regs));
   45.81  
   45.82 +    printf("Domain[%d].%d: initializing\n",
   45.83 +           v->domain->domain_id, v->vcpu_id);
   45.84 +
   45.85 +    if (v->domain->arch.htab.order == 0)
   45.86 +        panic("Page table never allocated for Domain: %d\n",
   45.87 +              v->domain->domain_id);
   45.88 +    if (v->domain->arch.rma_order == 0)
   45.89 +        panic("RMA never allocated for Domain: %d\n",
   45.90 +              v->domain->domain_id);
   45.91 +
   45.92      set_bit(_VCPUF_initialised, &v->vcpu_flags);
   45.93  
   45.94      cpu_init_vcpu(v);
   45.95 @@ -253,17 +253,19 @@ void context_switch(struct vcpu *prev, s
   45.96  void continue_running(struct vcpu *same)
   45.97  {
   45.98      /* nothing to do */
   45.99 +    return;
  45.100  }
  45.101  
  45.102  void sync_vcpu_execstate(struct vcpu *v)
  45.103  {
  45.104 -    /* XXX for now, for domain destruction, make this non-fatal */
  45.105 -    printf("%s: called\n", __func__);
  45.106 +    /* do nothing */
  45.107 +    return;
  45.108  }
  45.109  
  45.110  void domain_relinquish_resources(struct domain *d)
  45.111  {
  45.112      free_domheap_pages(d->arch.rma_page, d->arch.rma_order);
  45.113 +    free_extents(d);
  45.114  }
  45.115  
  45.116  void arch_dump_domain_info(struct domain *d)
    46.1 --- a/xen/arch/powerpc/domain_build.c	Fri Sep 01 12:52:12 2006 -0600
    46.2 +++ b/xen/arch/powerpc/domain_build.c	Fri Sep 01 13:04:02 2006 -0600
    46.3 @@ -25,6 +25,7 @@
    46.4  #include <xen/init.h>
    46.5  #include <xen/ctype.h>
    46.6  #include <xen/iocap.h>
    46.7 +#include <xen/shadow.h>
    46.8  #include <xen/version.h>
    46.9  #include <asm/processor.h>
   46.10  #include <asm/papr.h>
   46.11 @@ -34,18 +35,22 @@ extern int parseelfimage_32(struct domai
   46.12  extern int loadelfimage_32(struct domain_setup_info *dsi);
   46.13  
   46.14  /* opt_dom0_mem: memory allocated to domain 0. */
   46.15 -static unsigned int opt_dom0_mem;
   46.16 +static unsigned int dom0_nrpages;
   46.17  static void parse_dom0_mem(char *s)
   46.18  {
   46.19 -    unsigned long long bytes = parse_size_and_unit(s);
   46.20 -    /* If no unit is specified we default to kB units, not bytes. */
   46.21 -    if (isdigit(s[strlen(s)-1]))
   46.22 -        opt_dom0_mem = (unsigned int)bytes;
   46.23 -    else
   46.24 -        opt_dom0_mem = (unsigned int)(bytes >> 10);
   46.25 +    unsigned long long bytes;
   46.26 +
   46.27 +    bytes = parse_size_and_unit(s);
   46.28 +    dom0_nrpages = bytes >> PAGE_SHIFT;
   46.29  }
   46.30  custom_param("dom0_mem", parse_dom0_mem);
   46.31  
   46.32 +static unsigned int opt_dom0_max_vcpus;
   46.33 +integer_param("dom0_max_vcpus", opt_dom0_max_vcpus);
   46.34 +
   46.35 +static unsigned int opt_dom0_shadow;
   46.36 +boolean_param("dom0_shadow", opt_dom0_shadow);
   46.37 +
   46.38  int elf_sanity_check(Elf_Ehdr *ehdr)
   46.39  {
   46.40      if (IS_ELF(*ehdr))
   46.41 @@ -105,11 +110,13 @@ int construct_dom0(struct domain *d,
   46.42      struct domain_setup_info dsi;
   46.43      ulong dst;
   46.44      u64 *ofh_tree;
   46.45 +    uint rma_nrpages = 1 << d->arch.rma_order;
   46.46      ulong rma_sz = rma_size(d->arch.rma_order);
   46.47      ulong rma = page_to_maddr(d->arch.rma_page);
   46.48      start_info_t *si;
   46.49      ulong eomem;
   46.50      int am64 = 1;
   46.51 +    int preempt = 0;
   46.52      ulong msr;
   46.53      ulong pc;
   46.54      ulong r2;
   46.55 @@ -118,13 +125,18 @@ int construct_dom0(struct domain *d,
   46.56      BUG_ON(d->domain_id != 0);
   46.57      BUG_ON(d->vcpu[0] == NULL);
   46.58  
   46.59 +    if (image_len == 0)
   46.60 +        panic("No Dom0 image supplied\n");
   46.61 +
   46.62      cpu_init_vcpu(v);
   46.63  
   46.64      memset(&dsi, 0, sizeof(struct domain_setup_info));
   46.65      dsi.image_addr = image_start;
   46.66      dsi.image_len  = image_len;
   46.67  
   46.68 +    printk("Trying Dom0 as 64bit ELF\n");
   46.69      if ((rc = parseelfimage(&dsi)) != 0) {
   46.70 +        printk("Trying Dom0 as 32bit ELF\n");
   46.71          if ((rc = parseelfimage_32(&dsi)) != 0)
   46.72              return rc;
   46.73          am64 = 0;
   46.74 @@ -141,7 +153,33 @@ int construct_dom0(struct domain *d,
   46.75  
   46.76      /* By default DOM0 is allocated all available memory. */
   46.77      d->max_pages = ~0U;
   46.78 -    d->tot_pages = 1UL << d->arch.rma_order;
   46.79 +
   46.80 +    /* default is the max(1/16th of memory, CONFIG_MIN_DOM0_PAGES) */
   46.81 +    if (dom0_nrpages == 0) {
   46.82 +        dom0_nrpages = total_pages >> 4;
   46.83 +
   46.84 +        if (dom0_nrpages < CONFIG_MIN_DOM0_PAGES)
   46.85 +            dom0_nrpages = CONFIG_MIN_DOM0_PAGES;
   46.86 +    }
   46.87 +
   46.88 +    /* make sure we are at least as big as the RMA */
   46.89 +    if (dom0_nrpages > rma_nrpages)
   46.90 +        dom0_nrpages = allocate_extents(d, dom0_nrpages, rma_nrpages);
   46.91 +
   46.92 +    ASSERT(d->tot_pages == dom0_nrpages);
   46.93 +    ASSERT(d->tot_pages >= rma_nrpages);
   46.94 +
   46.95 +    if (opt_dom0_shadow == 0) {
   46.96 +        /* 1/64 of memory  */
   46.97 +        opt_dom0_shadow = (d->tot_pages >> 6) >> (20 - PAGE_SHIFT);
   46.98 +    }
   46.99 +
  46.100 +    do {
  46.101 +        shadow_set_allocation(d, opt_dom0_shadow, &preempt);
  46.102 +    } while (preempt);
  46.103 +    if (shadow_get_allocation(d) == 0)
  46.104 +        panic("shadow allocation failed 0x%x < 0x%x\n",
  46.105 +              shadow_get_allocation(d), opt_dom0_shadow);
  46.106  
  46.107      ASSERT( image_len < rma_sz );
  46.108  
  46.109 @@ -157,10 +195,6 @@ int construct_dom0(struct domain *d,
  46.110  
  46.111      eomem = si->shared_info;
  46.112  
  46.113 -    /* allow dom0 to access all of system RAM */
  46.114 -    d->arch.logical_base_pfn = 128 << (20 - PAGE_SHIFT); /* 128 MB */
  46.115 -    d->arch.logical_end_pfn = max_page;
  46.116 -
  46.117      /* number of pages accessible */
  46.118      si->nr_pages = rma_sz >> PAGE_SHIFT;
  46.119  
  46.120 @@ -265,7 +299,7 @@ int construct_dom0(struct domain *d,
  46.121  
  46.122      printk("DOM: pc = 0x%lx, r2 = 0x%lx\n", pc, r2);
  46.123  
  46.124 -    ofd_dom0_fixup(d, *ofh_tree + rma, si, dst - rma);
  46.125 +    ofd_dom0_fixup(d, *ofh_tree + rma, si);
  46.126  
  46.127      set_bit(_VCPUF_initialised, &v->vcpu_flags);
  46.128  
    47.1 --- a/xen/arch/powerpc/exceptions.c	Fri Sep 01 12:52:12 2006 -0600
    47.2 +++ b/xen/arch/powerpc/exceptions.c	Fri Sep 01 13:04:02 2006 -0600
    47.3 @@ -82,6 +82,8 @@ void program_exception(struct cpu_user_r
    47.4      show_registers(regs);
    47.5      printk("dar 0x%016lx, dsisr 0x%08x\n", mfdar(), mfdsisr());
    47.6      printk("hid4 0x%016lx\n", regs->hid4);
    47.7 +    printk("---[ backtrace ]---\n");
    47.8 +    show_backtrace(regs->gprs[1], regs->lr, regs->pc);
    47.9      panic("%s: 0x%lx\n", __func__, cookie);
   47.10  #endif /* CRASH_DEBUG */
   47.11  }
    48.1 --- a/xen/arch/powerpc/exceptions.h	Fri Sep 01 12:52:12 2006 -0600
    48.2 +++ b/xen/arch/powerpc/exceptions.h	Fri Sep 01 13:04:02 2006 -0600
    48.3 @@ -51,7 +51,4 @@ extern ulong *__hypercall_table[];
    48.4  extern char exception_vectors[];
    48.5  extern char exception_vectors_end[];
    48.6  extern int spin_start[];
    48.7 -extern int firmware_image_start[0];
    48.8 -extern int firmware_image_size[0];
    48.9 -
   48.10  #endif
    49.1 --- a/xen/arch/powerpc/external.c	Fri Sep 01 12:52:12 2006 -0600
    49.2 +++ b/xen/arch/powerpc/external.c	Fri Sep 01 13:04:02 2006 -0600
    49.3 @@ -175,8 +175,7 @@ void init_IRQ(void)
    49.4  
    49.5  void ack_APIC_irq(void)
    49.6  {
    49.7 -    printk("%s: EOI the whole MPIC?\n", __func__);
    49.8 -    for (;;);
    49.9 +    panic("%s: EOI the whole MPIC?\n", __func__);
   49.10  }
   49.11  
   49.12  void ack_bad_irq(unsigned int irq)
    50.1 --- a/xen/arch/powerpc/htab.c	Fri Sep 01 12:52:12 2006 -0600
    50.2 +++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
    50.3 @@ -1,68 +0,0 @@
    50.4 -/*
    50.5 - * This program is free software; you can redistribute it and/or modify
    50.6 - * it under the terms of the GNU General Public License as published by
    50.7 - * the Free Software Foundation; either version 2 of the License, or
    50.8 - * (at your option) any later version.
    50.9 - *
   50.10 - * This program is distributed in the hope that it will be useful,
   50.11 - * but WITHOUT ANY WARRANTY; without even the implied warranty of
   50.12 - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
   50.13 - * GNU General Public License for more details.
   50.14 - *
   50.15 - * You should have received a copy of the GNU General Public License
   50.16 - * along with this program; if not, write to the Free Software
   50.17 - * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
   50.18 - *
   50.19 - * Copyright (C) IBM Corp. 2005
   50.20 - *
   50.21 - * Authors: Hollis Blanchard <hollisb@us.ibm.com>
   50.22 - */
   50.23 -
   50.24 -#include <xen/config.h>
   50.25 -#include <xen/sched.h>
   50.26 -
   50.27 -static ulong htab_calc_sdr1(ulong htab_addr, ulong log_htab_size)
   50.28 -{
   50.29 -    ulong sdr1_htabsize;
   50.30 -
   50.31 -    ASSERT((htab_addr & ((1UL << log_htab_size) - 1)) == 0);
   50.32 -    ASSERT(log_htab_size <= SDR1_HTABSIZE_MAX);
   50.33 -    ASSERT(log_htab_size >= HTAB_MIN_LOG_SIZE);
   50.34 -
   50.35 -    sdr1_htabsize = log_htab_size - LOG_PTEG_SIZE - SDR1_HTABSIZE_BASEBITS;
   50.36 -
   50.37 -    return (htab_addr | (sdr1_htabsize & SDR1_HTABSIZE_MASK));
   50.38 -}
   50.39 -
   50.40 -void htab_alloc(struct domain *d, uint order)
   50.41 -{
   50.42 -    ulong htab_raddr;
   50.43 -    ulong log_htab_bytes = order + PAGE_SHIFT;
   50.44 -    ulong htab_bytes = 1UL << log_htab_bytes;
   50.45 -
   50.46 -    /* XXX use alloc_domheap_pages instead? */
   50.47 -    htab_raddr = (ulong)alloc_xenheap_pages(order);
   50.48 -    ASSERT(htab_raddr != 0);
   50.49 -    /* XXX check alignment guarantees */
   50.50 -    ASSERT((htab_raddr & (htab_bytes - 1)) == 0);
   50.51 -
   50.52 -    /* XXX slow. move memset out to service partition? */
   50.53 -    memset((void *)htab_raddr, 0, htab_bytes);
   50.54 -
   50.55 -    d->arch.htab.order = order;
   50.56 -    d->arch.htab.log_num_ptes = log_htab_bytes - LOG_PTE_SIZE;
   50.57 -    d->arch.htab.sdr1 = htab_calc_sdr1(htab_raddr, log_htab_bytes);
   50.58 -    d->arch.htab.map = (union pte *)htab_raddr;
   50.59 -    d->arch.htab.shadow = xmalloc_array(ulong,
   50.60 -                                        1UL << d->arch.htab.log_num_ptes);
   50.61 -    ASSERT(d->arch.htab.shadow != NULL);
   50.62 -}
   50.63 -
   50.64 -void htab_free(struct domain *d)
   50.65 -{
   50.66 -    ulong htab_raddr = GET_HTAB(d);
   50.67 -
   50.68 -    free_xenheap_pages((void *)htab_raddr, d->arch.htab.order);
   50.69 -    xfree(d->arch.htab.shadow);
   50.70 -}
   50.71 -
    51.1 --- a/xen/arch/powerpc/iommu.c	Fri Sep 01 12:52:12 2006 -0600
    51.2 +++ b/xen/arch/powerpc/iommu.c	Fri Sep 01 13:04:02 2006 -0600
    51.3 @@ -52,17 +52,14 @@ int iommu_put(u32 buid, ulong ioba, unio
    51.4  
    51.5          pfn = tce.tce_bits.tce_rpn;
    51.6          mfn = pfn2mfn(d, pfn, &mtype);
    51.7 -        if (mtype != 0) {
    51.8 -            panic("we don't do non-RMO memory yet\n");
    51.9 -        }
   51.10 -
   51.11 +        if (mfn > 0) {
   51.12  #ifdef DEBUG
   51.13 -        printk("%s: ioba=0x%lx pfn=0x%lx mfn=0x%lx\n", __func__,
   51.14 -               ioba, pfn, mfn);
   51.15 +            printk("%s: ioba=0x%lx pfn=0x%lx mfn=0x%lx\n", __func__,
   51.16 +                   ioba, pfn, mfn);
   51.17  #endif
   51.18 -        tce.tce_bits.tce_rpn = mfn;
   51.19 -
   51.20 -        return iommu_phbs[buid].iommu_put(ioba, tce);
   51.21 +            tce.tce_bits.tce_rpn = mfn;
   51.22 +            return iommu_phbs[buid].iommu_put(ioba, tce);
   51.23 +        }
   51.24      }
   51.25      return -1;
   51.26  }
    52.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
    52.2 +++ b/xen/arch/powerpc/memory.c	Fri Sep 01 13:04:02 2006 -0600
    52.3 @@ -0,0 +1,206 @@
    52.4 +/*
    52.5 + * This program is free software; you can redistribute it and/or modify
    52.6 + * it under the terms of the GNU General Public License as published by
    52.7 + * the Free Software Foundation; either version 2 of the License, or
    52.8 + * (at your option) any later version.
    52.9 + *
   52.10 + * This program is distributed in the hope that it will be useful,
   52.11 + * but WITHOUT ANY WARRANTY; without even the implied warranty of
   52.12 + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
   52.13 + * GNU General Public License for more details.
   52.14 + *
   52.15 + * You should have received a copy of the GNU General Public License
   52.16 + * along with this program; if not, write to the Free Software
   52.17 + * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
   52.18 + *
   52.19 + * Copyright (C) IBM Corp. 2006
   52.20 + *
   52.21 + * Authors: Dan Poff <poff@us.ibm.com>
   52.22 + *          Jimi Xenidis <jimix@watson.ibm.com>
   52.23 + */
   52.24 +#include <xen/sched.h>
   52.25 +#include <xen/mm.h>
   52.26 +#include "of-devtree.h"
   52.27 +#include "oftree.h"
   52.28 +
   52.29 +unsigned long xenheap_phys_end;
   52.30 +struct membuf {
   52.31 +    ulong start;
   52.32 +    ulong size;
   52.33 +};
   52.34 +
   52.35 +typedef void (*walk_mem_fn)(struct membuf *, uint);
   52.36 +
   52.37 +static ulong free_xenheap(ulong start, ulong end)
   52.38 +{
   52.39 +    start = ALIGN_UP(start, PAGE_SIZE);
   52.40 +    end = ALIGN_DOWN(end, PAGE_SIZE);
   52.41 +
   52.42 +    printk("%s: 0x%lx - 0x%lx\n", __func__, start, end);
   52.43 +
   52.44 +    if (oftree <= end && oftree >= start) {
   52.45 +        printk("%s:     Go around the devtree: 0x%lx - 0x%lx\n",
   52.46 +               __func__, oftree, oftree_end);
   52.47 +        init_xenheap_pages(start, ALIGN_DOWN(oftree, PAGE_SIZE));
   52.48 +        init_xenheap_pages(ALIGN_UP(oftree_end, PAGE_SIZE), end);
   52.49 +    } else {
   52.50 +        init_xenheap_pages(start, end);
   52.51 +    }
   52.52 +
   52.53 +    return ALIGN_UP(end, PAGE_SIZE);
   52.54 +}
   52.55 +
   52.56 +static void set_max_page(struct membuf *mb, uint entries)
   52.57 +{
   52.58 +    int i;
   52.59 +
   52.60 +    for (i = 0; i < entries; i++) {
   52.61 +        ulong end_page;
   52.62 +
   52.63 +        end_page = (mb[i].start + mb[i].size) >> PAGE_SHIFT;
   52.64 +
   52.65 +        if (end_page > max_page)
   52.66 +            max_page = end_page;
   52.67 +    }
   52.68 +}
   52.69 +
   52.70 +/* mark all memory from modules onward as unused */
   52.71 +static void heap_init(struct membuf *mb, uint entries)
   52.72 +{
   52.73 +    int i;
   52.74 +    ulong start_blk;
   52.75 +    ulong end_blk = 0;
   52.76 +
   52.77 +	for (i = 0; i < entries; i++) {
   52.78 +	    start_blk = mb[i].start;
   52.79 +	    end_blk = start_blk + mb[i].size;
   52.80 +
   52.81 +	    if (start_blk < xenheap_phys_end) {
   52.82 +            if (xenheap_phys_end > end_blk) {
   52.83 +                panic("xenheap spans LMB\n");
   52.84 +            }
   52.85 +            if (xenheap_phys_end == end_blk)
   52.86 +                continue;
   52.87 +
   52.88 +            start_blk = xenheap_phys_end;
   52.89 +        }
   52.90 +
   52.91 +        init_boot_pages(start_blk, end_blk);
   52.92 +        total_pages += (end_blk - start_blk) >> PAGE_SHIFT;
   52.93 +	}
   52.94 +}
   52.95 +
   52.96 +static void ofd_walk_mem(void *m, walk_mem_fn fn)
   52.97 +{
   52.98 +    ofdn_t n;
   52.99 +    uint p_len;
  52.100 +    struct membuf mb[8];
  52.101 +    static char name[] = "memory";
  52.102 +
  52.103 +    n = ofd_node_find_by_prop(m, OFD_ROOT, "device_type", name, sizeof(name));
  52.104 +    while (n > 0) {
  52.105 +
  52.106 +        p_len = ofd_getprop(m, n, "reg", mb, sizeof (mb));
  52.107 +        if (p_len <= 0) {
  52.108 +            panic("ofd_getprop(): failed\n");
  52.109 +        }
  52.110 +        if (p_len > sizeof(mb))
  52.111 +            panic("%s: buffer is not big enuff for this firmware: "
  52.112 +                  "0x%lx < 0x%x\n", __func__, sizeof(mb), p_len);
  52.113 +
  52.114 +        fn(mb, p_len / sizeof(mb[0]));
  52.115 +        n = ofd_node_find_next(m, n);
  52.116 +    }
  52.117 +}
  52.118 +
  52.119 +static void setup_xenheap(module_t *mod, int mcount)
  52.120 +{
  52.121 +    int i;
  52.122 +    ulong freemem;
  52.123 +
  52.124 +    freemem = ALIGN_UP((ulong)_end, PAGE_SIZE);
  52.125 +
  52.126 +    for (i = 0; i < mcount; i++) {
  52.127 +        u32 s;
  52.128 +
  52.129 +        if(mod[i].mod_end == mod[i].mod_start)
  52.130 +            continue;
  52.131 +
  52.132 +        s = ALIGN_DOWN(mod[i].mod_start, PAGE_SIZE);
  52.133 +
  52.134 +        if (mod[i].mod_start > (ulong)_start &&
  52.135 +            mod[i].mod_start < (ulong)_end) {
  52.136 +            /* mod was linked in */
  52.137 +            continue;
  52.138 +        }
  52.139 +
  52.140 +        if (s < freemem) 
  52.141 +            panic("module addresses must assend\n");
  52.142 +
  52.143 +        free_xenheap(freemem, s);
  52.144 +        freemem = ALIGN_UP(mod[i].mod_end, PAGE_SIZE);
  52.145 +        
  52.146 +    }
  52.147 +
  52.148 +    /* the rest of the xenheap, starting at the end of modules */
  52.149 +    free_xenheap(freemem, xenheap_phys_end);
  52.150 +}
  52.151 +
  52.152 +void memory_init(module_t *mod, int mcount)
  52.153 +{
  52.154 +    ulong eomem;
  52.155 +    ulong heap_start, heap_size;
  52.156 +
  52.157 +    printk("Physical RAM map:\n");
  52.158 +
  52.159 +    /* lets find out how much memory there is and set max_page */
  52.160 +    max_page = 0;
  52.161 +    ofd_walk_mem((void *)oftree, set_max_page);
  52.162 +    eomem = max_page << PAGE_SHIFT;
  52.163 +
  52.164 +    if (eomem == 0){
  52.165 +        panic("ofd_walk_mem() failed\n");
  52.166 +    }
  52.167 +    printk("End of RAM: %luMB (%lukB)\n", eomem >> 20, eomem >> 10);
  52.168 +
  52.169 +    /* Architecturally the first 4 pages are exception hendlers, we
  52.170 +     * will also be copying down some code there */
  52.171 +    heap_start = 4 << PAGE_SHIFT;
  52.172 +    if (oftree < (ulong)_start)
  52.173 +        heap_start = ALIGN_UP(oftree_end, PAGE_SIZE);
  52.174 +
  52.175 +    heap_start = init_boot_allocator(heap_start);
  52.176 +    if (heap_start > (ulong)_start) {
  52.177 +        panic("space below _start (%p) is not enough memory "
  52.178 +              "for heap (0x%lx)\n", _start, heap_start);
  52.179 +    }
  52.180 +
  52.181 +    /* allow everything else to be allocated */
  52.182 +    total_pages = 0;
  52.183 +    ofd_walk_mem((void *)oftree, heap_init);
  52.184 +    if (total_pages == 0)
  52.185 +        panic("heap_init: failed");
  52.186 +
  52.187 +    if (total_pages > max_page)
  52.188 +        panic("total_pages > max_page: 0x%lx > 0x%lx\n",
  52.189 +              total_pages, max_page);
  52.190 +
  52.191 +    printk("total_pages: 0x%016lx\n", total_pages);
  52.192 +
  52.193 +    init_frametable();
  52.194 +    end_boot_allocator();
  52.195 +
  52.196 +    /* Add memory between the beginning of the heap and the beginning
  52.197 +     * of out text */
  52.198 +    free_xenheap(heap_start, (ulong)_start);
  52.199 +
  52.200 +    heap_size = xenheap_phys_end - heap_start;
  52.201 +    printk("Xen heap: %luMB (%lukB)\n", heap_size >> 20, heap_size >> 10);
  52.202 +
  52.203 +    setup_xenheap(mod, mcount);
  52.204 +
  52.205 +    eomem = avail_domheap_pages();
  52.206 +    printk("Domheap pages: 0x%lx %luMB (%lukB)\n", eomem,
  52.207 +           (eomem << PAGE_SHIFT) >> 20,
  52.208 +           (eomem << PAGE_SHIFT) >> 10);
  52.209 +}
    53.1 --- a/xen/arch/powerpc/mm.c	Fri Sep 01 12:52:12 2006 -0600
    53.2 +++ b/xen/arch/powerpc/mm.c	Fri Sep 01 13:04:02 2006 -0600
    53.3 @@ -13,9 +13,10 @@
    53.4   * along with this program; if not, write to the Free Software
    53.5   * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
    53.6   *
    53.7 - * Copyright (C) IBM Corp. 2005
    53.8 + * Copyright (C) IBM Corp. 2005, 2006
    53.9   *
   53.10   * Authors: Hollis Blanchard <hollisb@us.ibm.com>
   53.11 + *          Jimi Xenidis <jimix@watson.ibm.com>
   53.12   */
   53.13  
   53.14  #include <xen/config.h>
   53.15 @@ -23,10 +24,19 @@
   53.16  #include <xen/shadow.h>
   53.17  #include <xen/kernel.h>
   53.18  #include <xen/sched.h>
   53.19 +#include <xen/perfc.h>
   53.20  #include <asm/misc.h>
   53.21  #include <asm/init.h>
   53.22  #include <asm/page.h>
   53.23  
   53.24 +#ifdef VERBOSE
   53.25 +#define MEM_LOG(_f, _a...)                                  \
   53.26 +  printk("DOM%u: (file=mm.c, line=%d) " _f "\n",            \
   53.27 +         current->domain->domain_id , __LINE__ , ## _a )
   53.28 +#else
   53.29 +#define MEM_LOG(_f, _a...) ((void)0)
   53.30 +#endif
   53.31 +
   53.32  /* Frame table and its size in pages. */
   53.33  struct page_info *frame_table;
   53.34  unsigned long frame_table_size;
   53.35 @@ -53,16 +63,128 @@ int steal_page(struct domain *d, struct 
   53.36      return 1;
   53.37  }
   53.38  
   53.39 -
   53.40 -int get_page_type(struct page_info *page, u32 type)
   53.41 +void put_page_type(struct page_info *page)
   53.42  {
   53.43 -    panic("%s called\n", __func__);
   53.44 -    return 1;
   53.45 +    unsigned long nx, x, y = page->u.inuse.type_info;
   53.46 +
   53.47 +    do {
   53.48 +        x  = y;
   53.49 +        nx = x - 1;
   53.50 +
   53.51 +        ASSERT((x & PGT_count_mask) != 0);
   53.52 +
   53.53 +        /*
   53.54 +         * The page should always be validated while a reference is held. The 
   53.55 +         * exception is during domain destruction, when we forcibly invalidate 
   53.56 +         * page-table pages if we detect a referential loop.
   53.57 +         * See domain.c:relinquish_list().
   53.58 +         */
   53.59 +        ASSERT((x & PGT_validated) || 
   53.60 +               test_bit(_DOMF_dying, &page_get_owner(page)->domain_flags));
   53.61 +
   53.62 +        if ( unlikely((nx & PGT_count_mask) == 0) )
   53.63 +        {
   53.64 +            /* Record TLB information for flush later. */
   53.65 +            page->tlbflush_timestamp = tlbflush_current_time();
   53.66 +        }
   53.67 +        else if ( unlikely((nx & (PGT_pinned|PGT_type_mask|PGT_count_mask)) == 
   53.68 +                           (PGT_pinned | 1)) )
   53.69 +        {
   53.70 +            /* Page is now only pinned. Make the back pointer mutable again. */
   53.71 +            nx |= PGT_va_mutable;
   53.72 +        }
   53.73 +    }
   53.74 +    while ( unlikely((y = cmpxchg(&page->u.inuse.type_info, x, nx)) != x) );
   53.75  }
   53.76  
   53.77 -void put_page_type(struct page_info *page)
   53.78 +
   53.79 +int get_page_type(struct page_info *page, unsigned long type)
   53.80  {
   53.81 -    panic("%s called\n", __func__);
   53.82 +    unsigned long nx, x, y = page->u.inuse.type_info;
   53.83 +
   53.84 + again:
   53.85 +    do {
   53.86 +        x  = y;
   53.87 +        nx = x + 1;
   53.88 +        if ( unlikely((nx & PGT_count_mask) == 0) )
   53.89 +        {
   53.90 +            MEM_LOG("Type count overflow on pfn %lx", page_to_mfn(page));
   53.91 +            return 0;
   53.92 +        }
   53.93 +        else if ( unlikely((x & PGT_count_mask) == 0) )
   53.94 +        {
   53.95 +            if ( (x & (PGT_type_mask|PGT_va_mask)) != type )
   53.96 +            {
   53.97 +                if ( (x & PGT_type_mask) != (type & PGT_type_mask) )
   53.98 +                {
   53.99 +                    /*
  53.100 +                     * On type change we check to flush stale TLB
  53.101 +                     * entries. This may be unnecessary (e.g., page
  53.102 +                     * was GDT/LDT) but those circumstances should be
  53.103 +                     * very rare.
  53.104 +                     */
  53.105 +                    cpumask_t mask =
  53.106 +                        page_get_owner(page)->domain_dirty_cpumask;
  53.107 +                    tlbflush_filter(mask, page->tlbflush_timestamp);
  53.108 +
  53.109 +                    if ( unlikely(!cpus_empty(mask)) )
  53.110 +                    {
  53.111 +                        perfc_incrc(need_flush_tlb_flush);
  53.112 +                        flush_tlb_mask(mask);
  53.113 +                    }
  53.114 +                }
  53.115 +
  53.116 +                /* We lose existing type, back pointer, and validity. */
  53.117 +                nx &= ~(PGT_type_mask | PGT_va_mask | PGT_validated);
  53.118 +                nx |= type;
  53.119 +
  53.120 +                /* No special validation needed for writable pages. */
  53.121 +                /* Page tables and GDT/LDT need to be scanned for validity. */
  53.122 +                if ( type == PGT_writable_page )
  53.123 +                    nx |= PGT_validated;
  53.124 +            }
  53.125 +        }
  53.126 +        else
  53.127 +        {
  53.128 +            if ( unlikely((x & (PGT_type_mask|PGT_va_mask)) != type) )
  53.129 +            {
  53.130 +                if ( unlikely((x & PGT_type_mask) != (type & PGT_type_mask) ) )
  53.131 +                {
  53.132 +                    return 0;
  53.133 +                }
  53.134 +                else if ( (x & PGT_va_mask) == PGT_va_mutable )
  53.135 +                {
  53.136 +                    /* The va backpointer is mutable, hence we update it. */
  53.137 +                    nx &= ~PGT_va_mask;
  53.138 +                    nx |= type; /* we know the actual type is correct */
  53.139 +                }
  53.140 +                else if ( (type & PGT_va_mask) != PGT_va_mutable )
  53.141 +                {
  53.142 +                    ASSERT((type & PGT_va_mask) != (x & PGT_va_mask));
  53.143 +
  53.144 +                    /* This table is possibly mapped at multiple locations. */
  53.145 +                    nx &= ~PGT_va_mask;
  53.146 +                    nx |= PGT_va_unknown;
  53.147 +                }
  53.148 +            }
  53.149 +            if ( unlikely(!(x & PGT_validated)) )
  53.150 +            {
  53.151 +                /* Someone else is updating validation of this page. Wait... */
  53.152 +                while ( (y = page->u.inuse.type_info) == x )
  53.153 +                    cpu_relax();
  53.154 +                goto again;
  53.155 +            }
  53.156 +        }
  53.157 +    }
  53.158 +    while ( unlikely((y = cmpxchg(&page->u.inuse.type_info, x, nx)) != x) );
  53.159 +
  53.160 +    if ( unlikely(!(nx & PGT_validated)) )
  53.161 +    {
  53.162 +        /* Noone else is updating simultaneously. */
  53.163 +        __set_bit(_PGT_validated, &page->u.inuse.type_info);
  53.164 +    }
  53.165 +
  53.166 +    return 1;
  53.167  }
  53.168  
  53.169  void __init init_frametable(void)
  53.170 @@ -107,44 +229,148 @@ extern void copy_page(void *dp, void *sp
  53.171      }
  53.172  }
  53.173  
  53.174 +static int mfn_in_hole(ulong mfn)
  53.175 +{
  53.176 +    /* totally cheating */
  53.177 +    if (mfn >= (0xf0000000UL >> PAGE_SHIFT) &&
  53.178 +        mfn < (((1UL << 32) - 1) >> PAGE_SHIFT))
  53.179 +        return 1;
  53.180 +
  53.181 +    return 0;
  53.182 +}
  53.183 +
  53.184 +static uint add_extent(struct domain *d, struct page_info *pg, uint order)
  53.185 +{
  53.186 +    struct page_extents *pe;
  53.187 +
  53.188 +    pe = xmalloc(struct page_extents);
  53.189 +    if (pe == NULL)
  53.190 +        return 0;
  53.191 +
  53.192 +    pe->pg = pg;
  53.193 +    pe->order = order;
  53.194 +    pe->pfn = page_to_mfn(pg);
  53.195 +
  53.196 +    list_add_tail(&pe->pe_list, &d->arch.extent_list);
  53.197 +
  53.198 +    return pe->pfn;
  53.199 +}
  53.200 +
  53.201 +void free_extents(struct domain *d)
  53.202 +{
  53.203 +    /* we just need to free the memory behind list */
  53.204 +    struct list_head *list;
  53.205 +    struct list_head *ent;
  53.206 +    struct list_head *next;
  53.207 +
  53.208 +    list = &d->arch.extent_list;
  53.209 +    ent = list->next;
  53.210 +
  53.211 +    while (ent != list) {
  53.212 +        next = ent->next;
  53.213 +        xfree(ent);
  53.214 +        ent = next;
  53.215 +    }
  53.216 +}
  53.217 +
  53.218 +uint allocate_extents(struct domain *d, uint nrpages, uint rma_nrpages)
  53.219 +{
  53.220 +    uint ext_order;
  53.221 +    uint ext_nrpages;
  53.222 +    uint total_nrpages;
  53.223 +    struct page_info *pg;
  53.224 +
  53.225 +    ext_order = cpu_extent_order();
  53.226 +    ext_nrpages = 1 << ext_order;
  53.227 +
  53.228 +    total_nrpages = rma_nrpages;
  53.229 +
  53.230 +    /* We only allocate in nr_extsz chunks so if you are not divisible
  53.231 +     * you get more than you asked for */
  53.232 +    while (total_nrpages < nrpages) {
  53.233 +        pg = alloc_domheap_pages(d, ext_order, 0);
  53.234 +        if (pg == NULL)
  53.235 +            return total_nrpages;
  53.236 +
  53.237 +        if (add_extent(d, pg, ext_order) == 0) {
  53.238 +            free_domheap_pages(pg, ext_order);
  53.239 +            return total_nrpages;
  53.240 +        }
  53.241 +        total_nrpages += ext_nrpages;
  53.242 +    }
  53.243 +
  53.244 +    return total_nrpages;
  53.245 +}
  53.246 +        
  53.247 +int allocate_rma(struct domain *d, unsigned int order_pages)
  53.248 +{
  53.249 +    ulong rma_base;
  53.250 +    ulong rma_sz = rma_size(order_pages);
  53.251 +
  53.252 +    d->arch.rma_page = alloc_domheap_pages(d, order_pages, 0);
  53.253 +    if (d->arch.rma_page == NULL) {
  53.254 +        DPRINTK("Could not allocate order_pages=%d RMA for domain %u\n",
  53.255 +                order_pages, d->domain_id);
  53.256 +        return -ENOMEM;
  53.257 +    }
  53.258 +    d->arch.rma_order = order_pages;
  53.259 +
  53.260 +    rma_base = page_to_maddr(d->arch.rma_page);
  53.261 +    BUG_ON(rma_base & (rma_sz - 1)); /* check alignment */
  53.262 +
  53.263 +    /* XXX */
  53.264 +    printk("clearing RMA: 0x%lx[0x%lx]\n", rma_base, rma_sz);
  53.265 +    memset((void *)rma_base, 0, rma_sz);
  53.266 +
  53.267 +    return 0;
  53.268 +}
  53.269 +
  53.270  ulong pfn2mfn(struct domain *d, long pfn, int *type)
  53.271  {
  53.272      ulong rma_base_mfn = page_to_mfn(d->arch.rma_page);
  53.273      ulong rma_size_mfn = 1UL << d->arch.rma_order;
  53.274 -    ulong mfn;
  53.275 -    int t;
  53.276 +    struct page_extents *pe;
  53.277  
  53.278      if (pfn < rma_size_mfn) {
  53.279 -        mfn = pfn + rma_base_mfn;
  53.280 -        t = PFN_TYPE_RMA;
  53.281 -    } else if (pfn >= d->arch.logical_base_pfn &&
  53.282 -               pfn < d->arch.logical_end_pfn) {
  53.283 -        if (test_bit(_DOMF_privileged, &d->domain_flags)) {
  53.284 -            /* This hack allows dom0 to map all memory, necessary to
  53.285 -             * initialize domU state. */
  53.286 -            mfn = pfn;
  53.287 -        } else {
  53.288 -            panic("we do not handle the logical area yet\n");
  53.289 -            mfn = 0;
  53.290 -        }
  53.291 +        if (type)
  53.292 +            *type = PFN_TYPE_RMA;
  53.293 +        return pfn + rma_base_mfn;
  53.294 +    }
  53.295  
  53.296 -        t = PFN_TYPE_LOGICAL;
  53.297 -    } else {
  53.298 -        /* don't know */
  53.299 -        mfn = pfn;
  53.300 -        t = PFN_TYPE_IO;
  53.301 +    if (test_bit(_DOMF_privileged, &d->domain_flags) &&
  53.302 +        mfn_in_hole(pfn)) {
  53.303 +        if (type)
  53.304 +            *type = PFN_TYPE_IO;
  53.305 +        return pfn;
  53.306      }
  53.307  
  53.308 -    if (type != NULL)
  53.309 -        *type = t;
  53.310 +    /* quick tests first */
  53.311 +    list_for_each_entry (pe, &d->arch.extent_list, pe_list) {
  53.312 +        uint end_pfn = pe->pfn + (1 << pe->order);
  53.313 +
  53.314 +        if (pfn >= pe->pfn && pfn < end_pfn) {
  53.315 +            if (type)
  53.316 +                *type = PFN_TYPE_LOGICAL;
  53.317 +            return page_to_mfn(pe->pg) + (pfn - pe->pfn);
  53.318 +        }
  53.319 +    }
  53.320  
  53.321 -    return mfn;
  53.322 +    /* This hack allows dom0 to map all memory, necessary to
  53.323 +     * initialize domU state. */
  53.324 +    if (test_bit(_DOMF_privileged, &d->domain_flags)) {
  53.325 +        if (type)
  53.326 +            *type = PFN_TYPE_REMOTE;
  53.327 +        return pfn;
  53.328 +    }
  53.329 +
  53.330 +    BUG();
  53.331 +    return 0;
  53.332  }
  53.333  
  53.334  void guest_physmap_add_page(
  53.335      struct domain *d, unsigned long gpfn, unsigned long mfn)
  53.336  {
  53.337 -    panic("%s\n", __func__);
  53.338 +    printk("%s(%d, 0x%lx, 0x%lx)\n", __func__, d->domain_id, gpfn, mfn);
  53.339  }
  53.340  void guest_physmap_remove_page(
  53.341      struct domain *d, unsigned long gpfn, unsigned long mfn)
    54.1 --- a/xen/arch/powerpc/mpic.c	Fri Sep 01 12:52:12 2006 -0600
    54.2 +++ b/xen/arch/powerpc/mpic.c	Fri Sep 01 13:04:02 2006 -0600
    54.3 @@ -498,10 +498,10 @@ static void mpic_enable_irq(unsigned int
    54.4  
    54.5  #ifdef CONFIG_MPIC_BROKEN_U3
    54.6  	if (mpic->flags & MPIC_BROKEN_U3) {
    54.7 -		unsigned int src = irq - mpic->irq_offset;
    54.8 -		if (mpic_is_ht_interrupt(mpic, src) &&
    54.9 +		unsigned int bsrc = irq - mpic->irq_offset;
   54.10 +		if (mpic_is_ht_interrupt(mpic, bsrc) &&
   54.11  		    (irq_desc[irq].status & IRQ_LEVEL))
   54.12 -			mpic_ht_end_irq(mpic, src);
   54.13 +			mpic_ht_end_irq(mpic, bsrc);
   54.14  	}
   54.15  #endif /* CONFIG_MPIC_BROKEN_U3 */
   54.16  }
    55.1 --- a/xen/arch/powerpc/ofd_fixup.c	Fri Sep 01 12:52:12 2006 -0600
    55.2 +++ b/xen/arch/powerpc/ofd_fixup.c	Fri Sep 01 13:04:02 2006 -0600
    55.3 @@ -13,7 +13,7 @@
    55.4   * along with this program; if not, write to the Free Software
    55.5   * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
    55.6   *
    55.7 - * Copyright (C) IBM Corp. 2005
    55.8 + * Copyright (C) IBM Corp. 2005, 2006
    55.9   *
   55.10   * Authors: Jimi Xenidis <jimix@watson.ibm.com>
   55.11   */
   55.12 @@ -24,6 +24,7 @@
   55.13  #include <xen/version.h>
   55.14  #include <public/xen.h>
   55.15  #include "of-devtree.h"
   55.16 +#include "oftree.h"
   55.17  
   55.18  #undef RTAS
   55.19  
   55.20 @@ -316,91 +317,6 @@ static ofdn_t ofd_rtas_props(void *m)
   55.21  }
   55.22  #endif
   55.23  
   55.24 -struct mem_reg {
   55.25 -    u64 addr;
   55.26 -    u64 sz;
   55.27 -};
   55.28 -
   55.29 -static ofdn_t ofd_memory_chunk_create(void *m, ofdn_t p,
   55.30 -        const char *ppath,
   55.31 -        const char *name,
   55.32 -        const char *dt,
   55.33 -        ulong start, ulong size)
   55.34 -{
   55.35 -    struct mem_reg reg;
   55.36 -    char path[128];
   55.37 -    ulong l;
   55.38 -    u32 v;
   55.39 -    ofdn_t n;
   55.40 -    ulong nl = strlen(name) + 1;
   55.41 -    ulong dtl = strlen(dt) + 1;
   55.42 -
   55.43 -    l = snprintf(path, sizeof (path), "%s/%s@%lx", ppath, name, start);
   55.44 -    n = ofd_node_add(m, p, path, l + 1);
   55.45 -    ofd_prop_add(m, n, "name", name, nl);
   55.46 -
   55.47 -    v = 1;
   55.48 -    ofd_prop_add(m, n, "#address-cells", &v, sizeof (v));
   55.49 -    v = 0;
   55.50 -    ofd_prop_add(m, n, "#size-cells", &v, sizeof (v));
   55.51 -
   55.52 -    ofd_prop_add(m, n, "device_type", dt, dtl);
   55.53 -
   55.54 -    /* physical addresses usable without regard to OF */
   55.55 -    reg.addr = start;
   55.56 -    reg.sz = size;
   55.57 -    ofd_prop_add(m, n, "reg", &reg, sizeof (reg));
   55.58 -
   55.59 -    return n;
   55.60 -}
   55.61 -
   55.62 -static ofdn_t ofd_memory_props(void *m, struct domain *d, ulong eoload)
   55.63 -{
   55.64 -    ofdn_t n = -1;
   55.65 -    ulong start = 0;
   55.66 -    static char name[] = "memory";
   55.67 -    ulong mem_size = rma_size(d->arch.rma_order);
   55.68 -    ulong chunk_size = rma_size(d->arch.rma_order);
   55.69 -
   55.70 -    /* Remove all old memory props */
   55.71 -    do {
   55.72 -        ofdn_t old;
   55.73 -
   55.74 -        old = ofd_node_find_by_prop(m, OFD_ROOT, "device_type",
   55.75 -                                    name, sizeof(name));
   55.76 -        if (old <= 0) break;
   55.77 -
   55.78 -        ofd_node_prune(m, old);
   55.79 -    } while (1);
   55.80 -
   55.81 -    while (start < mem_size) {
   55.82 -        ulong size = (mem_size < chunk_size) ? mem_size : chunk_size;
   55.83 -
   55.84 -        n = ofd_memory_chunk_create(m, OFD_ROOT, "", "memory", "memory",
   55.85 -                start, size);
   55.86 -
   55.87 -        if (start == 0) {
   55.88 -            /* We are processing the first and RMA chunk */
   55.89 -
   55.90 -            /* free list of physical addresses available after OF and
   55.91 -             * client program have been accounted for */
   55.92 -            struct mem_reg avail[] = {
   55.93 -                /* 0 til OF @ 32MiB - 16KiB stack */
   55.94 -                { .addr = 0, .sz = ((32 << 20) - (16 << 10)) },
   55.95 -                /* end of loaded material to the end the chunk - 1 page */
   55.96 -                { .addr = eoload, .sz = chunk_size - eoload - PAGE_SIZE },
   55.97 -                /* the last page is reserved for xen_start_info */
   55.98 -            };
   55.99 -            ofd_prop_add(m, n, "available", &avail,
  55.100 -                    sizeof (avail));
  55.101 -        }
  55.102 -
  55.103 -        start += size;
  55.104 -        mem_size -= size;
  55.105 -    }
  55.106 -    return n;
  55.107 -}
  55.108 -
  55.109  static ofdn_t ofd_xen_props(void *m, struct domain *d, start_info_t *si)
  55.110  {
  55.111      ofdn_t n;
  55.112 @@ -440,9 +356,8 @@ static ofdn_t ofd_xen_props(void *m, str
  55.113      }
  55.114      return n;
  55.115  }
  55.116 -extern int ofd_dom0_fixup(
  55.117 -    struct domain *d, ulong oftree, start_info_t *si, ulong dst);
  55.118 -int ofd_dom0_fixup(struct domain *d, ulong mem, start_info_t *si, ulong eoload)
  55.119 +
  55.120 +int ofd_dom0_fixup(struct domain *d, ulong mem, start_info_t *si)
  55.121  {
  55.122      void *m;
  55.123      const ofdn_t n = OFD_ROOT;
  55.124 @@ -470,8 +385,8 @@ int ofd_dom0_fixup(struct domain *d, ulo
  55.125      printk("Add /chosen props\n");
  55.126      ofd_chosen_props(m, (char *)si->cmd_line);
  55.127  
  55.128 -    printk("fix /memory@0 props\n");
  55.129 -    ofd_memory_props(m, d, eoload);
  55.130 +    printk("fix /memory props\n");
  55.131 +    ofd_memory_props(m, d);
  55.132  
  55.133      printk("fix /xen props\n");
  55.134      ofd_xen_props(m, d, si);
  55.135 @@ -497,8 +412,8 @@ int ofd_dom0_fixup(struct domain *d, ulo
  55.136      r = ofd_prop_add(m, n, "ibm,partition-no", &did, sizeof(did));
  55.137      ASSERT( r > 0 );
  55.138  
  55.139 -    const char dom0[] = "dom0";
  55.140 -    r = ofd_prop_add(m, n, "ibm,partition-name", dom0, sizeof (dom0));
  55.141 +    const char d0[] = "dom0";
  55.142 +    r = ofd_prop_add(m, n, "ibm,partition-name", d0, sizeof (d0));
  55.143      ASSERT( r > 0 );
  55.144  
  55.145  
    56.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
    56.2 +++ b/xen/arch/powerpc/ofd_fixup_memory.c	Fri Sep 01 13:04:02 2006 -0600
    56.3 @@ -0,0 +1,107 @@
    56.4 +/*
    56.5 + * This program is free software; you can redistribute it and/or modify
    56.6 + * it under the terms of the GNU General Public License as published by
    56.7 + * the Free Software Foundation; either version 2 of the License, or
    56.8 + * (at your option) any later version.
    56.9 + *
   56.10 + * This program is distributed in the hope that it will be useful,
   56.11 + * but WITHOUT ANY WARRANTY; without even the implied warranty of
   56.12 + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
   56.13 + * GNU General Public License for more details.
   56.14 + *
   56.15 + * You should have received a copy of the GNU General Public License
   56.16 + * along with this program; if not, write to the Free Software
   56.17 + * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
   56.18 + *
   56.19 + * Copyright (C) IBM Corp. 2006
   56.20 + *
   56.21 + * Authors: Jimi Xenidis <jimix@watson.ibm.com>
   56.22 + */
   56.23 +
   56.24 +#include <xen/config.h>
   56.25 +#include <xen/lib.h>
   56.26 +#include <xen/sched.h>
   56.27 +#include <public/xen.h>
   56.28 +#include "of-devtree.h"
   56.29 +#include "oftree.h"
   56.30 +
   56.31 +static char memory[] = "memory";
   56.32 +
   56.33 +struct mem_reg {
   56.34 +    u64 addr;
   56.35 +    u64 sz;
   56.36 +};
   56.37 +
   56.38 +static void ofd_memory_clean(void *m)
   56.39 +{
   56.40 +    ofdn_t old;
   56.41 +
   56.42 +    /* Remove all old memory props */
   56.43 +    do {
   56.44 +        old = ofd_node_find_by_prop(m, OFD_ROOT, "device_type",
   56.45 +                                    memory, sizeof(memory));
   56.46 +        if (old <= 0)
   56.47 +            break;
   56.48 +
   56.49 +        ofd_node_prune(m, old);
   56.50 +    } while (1);
   56.51 +}
   56.52 +
   56.53 +static ofdn_t ofd_memory_node_create(
   56.54 +    void *m, ofdn_t p, const char *ppath, const char *name,
   56.55 +    const char *dt, ulong start, ulong size)
   56.56 +{
   56.57 +    struct mem_reg reg;
   56.58 +    char path[128];
   56.59 +    ulong l;
   56.60 +    ofdn_t n;
   56.61 +    ulong nl = strlen(name) + 1;
   56.62 +    ulong dtl = strlen(dt) + 1;
   56.63 +
   56.64 +    l = snprintf(path, sizeof (path), "%s/%s@%lx", ppath, name, start);
   56.65 +    n = ofd_node_add(m, p, path, l + 1);
   56.66 +    ofd_prop_add(m, n, "name", name, nl);
   56.67 +    ofd_prop_add(m, n, "device_type", dt, dtl);
   56.68 +
   56.69 +    /* physical addresses usable without regard to OF */
   56.70 +    reg.addr = start;
   56.71 +    reg.sz = size;
   56.72 +    ofd_prop_add(m, n, "reg", &reg, sizeof (reg));
   56.73 +
   56.74 +    return n;
   56.75 +}
   56.76 +
   56.77 +static void ofd_memory_rma_node(void *m, struct domain *d)
   56.78 +{
   56.79 +    ulong size = rma_size(d->arch.rma_order);
   56.80 +    ofdn_t n;
   56.81 +
   56.82 +    n = ofd_memory_node_create(m, OFD_ROOT, "", memory, memory, 0, size);
   56.83 +    BUG_ON(n <= 0);
   56.84 +}
   56.85 +
   56.86 +static void ofd_memory_extent_nodes(void *m, struct domain *d)
   56.87 +{
   56.88 +    ulong start;
   56.89 +    ulong size;
   56.90 +    ofdn_t n;
   56.91 +    struct page_extents *pe;
   56.92 +
   56.93 +    list_for_each_entry (pe, &d->arch.extent_list, pe_list) {
   56.94 +
   56.95 +        start = pe->pfn << PAGE_SHIFT;
   56.96 +        size = 1UL << (pe->order + PAGE_SHIFT);
   56.97 +
   56.98 +        n = ofd_memory_node_create(m, OFD_ROOT, "", memory, memory,
   56.99 +                                    start, size);
  56.100 +
  56.101 +        BUG_ON(n <= 0);
  56.102 +    }
  56.103 +}
  56.104 +
  56.105 +void ofd_memory_props(void *m, struct domain *d)
  56.106 +{
  56.107 +    ofd_memory_clean(m);
  56.108 +    ofd_memory_rma_node(m, d);
  56.109 +    ofd_memory_extent_nodes(m,d);
  56.110 +}
    57.1 --- a/xen/arch/powerpc/oftree.h	Fri Sep 01 12:52:12 2006 -0600
    57.2 +++ b/xen/arch/powerpc/oftree.h	Fri Sep 01 13:04:02 2006 -0600
    57.3 @@ -20,14 +20,18 @@
    57.4  
    57.5  #ifndef _OFTREE_H
    57.6  #define _OFTREE_H
    57.7 +#include <xen/multiboot.h>
    57.8  
    57.9  extern ulong oftree;
   57.10  extern ulong oftree_len;
   57.11 +extern ulong oftree_end;
   57.12  
   57.13 -extern int ofd_dom0_fixup(
   57.14 -    struct domain *d, ulong oftree, start_info_t *si, ulong dst);
   57.15 +extern int ofd_dom0_fixup(struct domain *d, ulong mem, start_info_t *si);
   57.16 +extern void ofd_memory_props(void *m, struct domain *d);
   57.17  
   57.18  extern int firmware_image_start[0];
   57.19  extern int firmware_image_size[0];
   57.20  
   57.21 +extern void memory_init(module_t *mod, int mcount);
   57.22 +
   57.23  #endif  /* #ifndef _OFTREE_H */
    58.1 --- a/xen/arch/powerpc/papr/tce.c	Fri Sep 01 12:52:12 2006 -0600
    58.2 +++ b/xen/arch/powerpc/papr/tce.c	Fri Sep 01 13:04:02 2006 -0600
    58.3 @@ -47,7 +47,7 @@ static void h_put_tce(struct cpu_user_re
    58.4          regs->gprs[3] = H_Success;
    58.5      }
    58.6  }
    58.7 -    
    58.8 +
    58.9  static void h_get_tce(struct cpu_user_regs *regs)
   58.10  {
   58.11      u32 liobn = regs->gprs[4];
   58.12 @@ -57,7 +57,7 @@ static void h_get_tce(struct cpu_user_re
   58.13      printk("%s: liobn: 0x%x ioba: 0x%lx \n", __func__, liobn, ioba);
   58.14  #endif
   58.15      regs->gprs[3] = H_Function;
   58.16 -    for(;;) ;
   58.17 +    BUG();
   58.18  }
   58.19  
   58.20  static void h_stuff_tce(struct cpu_user_regs *regs)
   58.21 @@ -76,7 +76,7 @@ static void h_stuff_tce(struct cpu_user_
   58.22              count);
   58.23  #endif
   58.24      regs->gprs[3] = H_Function;
   58.25 -    for(;;);
   58.26 +    BUG();
   58.27  }
   58.28     
   58.29  __init_papr_hcall(H_PUT_TCE, h_put_tce);
    59.1 --- a/xen/arch/powerpc/papr/xlate.c	Fri Sep 01 12:52:12 2006 -0600
    59.2 +++ b/xen/arch/powerpc/papr/xlate.c	Fri Sep 01 13:04:02 2006 -0600
    59.3 @@ -30,12 +30,6 @@
    59.4  #include <asm/papr.h>
    59.5  #include <asm/hcalls.h>
    59.6  
    59.7 -static void not_yet(struct cpu_user_regs *regs)
    59.8 -{
    59.9 -    printk("not implemented yet: 0x%lx\n", regs->gprs[3]);
   59.10 -    for (;;);
   59.11 -}
   59.12 -
   59.13  #ifdef USE_PTE_INSERT
   59.14  static inline void pte_insert(union pte volatile *pte,
   59.15          ulong vsid, ulong rpn, ulong lrpn)
   59.16 @@ -160,13 +154,13 @@ static void h_enter(struct cpu_user_regs
   59.17          }
   59.18  
   59.19          /* get correct pgshift value */
   59.20 -        pgshift = d->arch.large_page_shift[lp_size];
   59.21 +        pgshift = d->arch.large_page_order[lp_size] + PAGE_SHIFT;
   59.22      }
   59.23  
   59.24      /* get the correct logical RPN in terms of 4K pages need to mask
   59.25       * off lp bits and unused arpn bits if this is a large page */
   59.26  
   59.27 -    lpn = ~0ULL << (pgshift - 12);
   59.28 +    lpn = ~0ULL << (pgshift - PAGE_SHIFT);
   59.29      lpn = pte.bits.rpn & lpn;
   59.30  
   59.31      rpn = pfn2mfn(d, lpn, &mtype);
   59.32 @@ -493,8 +487,42 @@ static void h_remove(struct cpu_user_reg
   59.33      pte_tlbie(&lpte, ptex);
   59.34  }
   59.35  
   59.36 +static void h_read(struct cpu_user_regs *regs)
   59.37 +{
   59.38 +    ulong flags = regs->gprs[4];
   59.39 +    ulong ptex = regs->gprs[5];
   59.40 +    struct vcpu *v = get_current();
   59.41 +    struct domain *d = v->domain;
   59.42 +    struct domain_htab *htab = &d->arch.htab;
   59.43 +    union pte volatile *pte;
   59.44 +
   59.45 +	if (flags & H_READ_4)
   59.46 +        ptex &= ~0x3UL;
   59.47 +
   59.48 +    if (ptex > (1UL << htab->log_num_ptes)) {
   59.49 +        regs->gprs[3] = H_Parameter;
   59.50 +        printk("%s: bad ptex: 0x%lx\n", __func__, ptex);
   59.51 +        return;
   59.52 +    }
   59.53 +    pte = &htab->map[ptex];
   59.54 +    regs->gprs[4] = pte[0].words.vsid;
   59.55 +    regs->gprs[5] = pte[0].words.rpn;
   59.56 +
   59.57 +    if (!(flags & H_READ_4)) {
   59.58 +        /* dump another 3 PTEs */
   59.59 +        regs->gprs[6] = pte[1].words.vsid;
   59.60 +        regs->gprs[7] = pte[1].words.rpn;
   59.61 +        regs->gprs[8] = pte[2].words.vsid;
   59.62 +        regs->gprs[9] = pte[2].words.rpn;
   59.63 +        regs->gprs[10] = pte[3].words.vsid;
   59.64 +        regs->gprs[11] = pte[3].words.rpn;
   59.65 +    }
   59.66 +
   59.67 +    regs->gprs[3] = H_Success;
   59.68 +}
   59.69 +
   59.70  __init_papr_hcall(H_ENTER, h_enter);
   59.71 -__init_papr_hcall(H_READ, not_yet);
   59.72 +__init_papr_hcall(H_READ, h_read);
   59.73  __init_papr_hcall(H_REMOVE, h_remove);
   59.74  __init_papr_hcall(H_CLEAR_MOD, h_clear_mod);
   59.75  __init_papr_hcall(H_CLEAR_REF, h_clear_ref);
    60.1 --- a/xen/arch/powerpc/powerpc64/exceptions.S	Fri Sep 01 12:52:12 2006 -0600
    60.2 +++ b/xen/arch/powerpc/powerpc64/exceptions.S	Fri Sep 01 13:04:02 2006 -0600
    60.3 @@ -514,6 +514,43 @@ 1:
    60.4      mtmsrd r3
    60.5      blr
    60.6  
    60.7 +/* The primary processor issues a firmware call to spin us up at this
    60.8 + * address, passing our CPU number in r3.  We only need a function
    60.9 + * entry point instead of a descriptor since this is never called from
   60.10 + * C code.
   60.11 + */	
   60.12      .globl spin_start
   60.13  spin_start:
   60.14 +    /* Write our processor number as an acknowledgment that we're alive.  */
   60.15 +    LOADADDR(r14, __spin_ack)
   60.16 +    stw r3, 0(r14)
   60.17 +    sync
   60.18 +    /* If NR_CPUS is too small, we should just spin forever.  */
   60.19 +    LOADADDR(r15, NR_CPUS)
   60.20 +    cmpd r3, r15
   60.21 +    blt 2f	
   60.22      b .
   60.23 +    /* Find our index in the array of processor_area struct pointers.  */
   60.24 +2:  LOADADDR(r14, global_cpu_table)
   60.25 +    muli r15, r3, 8
   60.26 +    add r14, r14, r15
   60.27 +    /* Spin until the pointer for our processor goes valid.  */
   60.28 +1:  ld r15, 0(r14)
   60.29 +    cmpldi r15, 0
   60.30 +    beq 1b
   60.31 +    /* Dereference the pointer and load our stack pointer.  */
   60.32 +    isync
   60.33 +    ld r1, PAREA_stack(r15)
   60.34 +    li r14, STACK_FRAME_OVERHEAD
   60.35 +    sub r1, r1, r14
   60.36 +    /* Load up the TOC and entry point for the C function to be called.  */
   60.37 +    LOADADDR(r14, secondary_cpu_init)
   60.38 +    ld r2, 8(r14)
   60.39 +    ld r11, 0(r14)
   60.40 +    mtctr r11
   60.41 +    /* Warning: why do we need this synchronizing instruction on 970FX?  */
   60.42 +    isync
   60.43 +    /* Jump into C code now.  */
   60.44 +    bctrl
   60.45 +    nop
   60.46 +    b .
    61.1 --- a/xen/arch/powerpc/powerpc64/ppc970.c	Fri Sep 01 12:52:12 2006 -0600
    61.2 +++ b/xen/arch/powerpc/powerpc64/ppc970.c	Fri Sep 01 13:04:02 2006 -0600
    61.3 @@ -13,9 +13,10 @@
    61.4   * along with this program; if not, write to the Free Software
    61.5   * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
    61.6   *
    61.7 - * Copyright (C) IBM Corp. 2005
    61.8 + * Copyright (C) IBM Corp. 2005, 2006
    61.9   *
   61.10   * Authors: Hollis Blanchard <hollisb@us.ibm.com>
   61.11 + *          Jimi Xenidis <jimix@watson.ibm.com>
   61.12   */
   61.13  
   61.14  #include <xen/config.h>
   61.15 @@ -31,26 +32,69 @@
   61.16  
   61.17  #undef SERIALIZE
   61.18  
   61.19 -unsigned int cpu_rma_order(void)
   61.20 +extern volatile struct processor_area * volatile global_cpu_table[];
   61.21 +
   61.22 +struct rma_settings {
   61.23 +    int order;
   61.24 +    int rmlr0;
   61.25 +    int rmlr12;
   61.26 +};
   61.27 +
   61.28 +static struct rma_settings rma_orders[] = {
   61.29 +    { .order = 26, .rmlr0 = 0, .rmlr12 = 3, }, /*  64 MB */
   61.30 +    { .order = 27, .rmlr0 = 1, .rmlr12 = 3, }, /* 128 MB */
   61.31 +    { .order = 28, .rmlr0 = 1, .rmlr12 = 0, }, /* 256 MB */
   61.32 +    { .order = 30, .rmlr0 = 0, .rmlr12 = 2, }, /*   1 GB */
   61.33 +    { .order = 34, .rmlr0 = 0, .rmlr12 = 1, }, /*  16 GB */
   61.34 +    { .order = 38, .rmlr0 = 0, .rmlr12 = 0, }, /* 256 GB */
   61.35 +};
   61.36 +
   61.37 +static uint log_large_page_sizes[] = {
   61.38 +    4 + 20, /* (1 << 4) == 16M */
   61.39 +};
   61.40 +
   61.41 +static struct rma_settings *cpu_find_rma(unsigned int order)
   61.42  {
   61.43 -    /* XXX what about non-HV mode? */
   61.44 -    uint rma_log_size = 6 + 20; /* 64M */
   61.45 -    return rma_log_size - PAGE_SHIFT;
   61.46 +    int i;
   61.47 +    for (i = 0; i < ARRAY_SIZE(rma_orders); i++) {
   61.48 +        if (rma_orders[i].order == order)
   61.49 +            return &rma_orders[i];
   61.50 +    }
   61.51 +    return NULL;
   61.52  }
   61.53  
   61.54 -void cpu_initialize(void)
   61.55 +unsigned int cpu_default_rma_order_pages(void)
   61.56 +{
   61.57 +    return rma_orders[0].order - PAGE_SHIFT;
   61.58 +}
   61.59 +
   61.60 +unsigned int cpu_large_page_orders(uint *sizes, uint max)
   61.61  {
   61.62 -    ulong stack;
   61.63 +    uint i = 0;
   61.64 +
   61.65 +    while (i < max && i < ARRAY_SIZE(log_large_page_sizes)) {
   61.66 +        sizes[i] = log_large_page_sizes[i] - PAGE_SHIFT;
   61.67 +        ++i;
   61.68 +    }
   61.69 +
   61.70 +    return i;
   61.71 +}
   61.72  
   61.73 -    parea = xmalloc(struct processor_area);
   61.74 +unsigned int cpu_extent_order(void)
   61.75 +{
   61.76 +    return log_large_page_sizes[0] - PAGE_SHIFT;
   61.77 +}
   61.78 +
   61.79 +void cpu_initialize(int cpuid)
   61.80 +{
   61.81 +    ulong r1, r2;
   61.82 +    __asm__ __volatile__ ("mr %0, 1" : "=r" (r1));
   61.83 +    __asm__ __volatile__ ("mr %0, 2" : "=r" (r2));
   61.84 +
   61.85 +    /* This is SMP safe because the compiler must use r13 for it.  */
   61.86 +    parea = global_cpu_table[cpuid];
   61.87      ASSERT(parea != NULL);
   61.88  
   61.89 -    stack = (ulong)alloc_xenheap_pages(STACK_ORDER);
   61.90 -
   61.91 -    ASSERT(stack != 0);
   61.92 -    parea->hyp_stack_base = (void *)(stack + STACK_SIZE);
   61.93 -    printk("stack is here: %p\n", parea->hyp_stack_base);
   61.94 -
   61.95      mthsprg0((ulong)parea); /* now ready for exceptions */
   61.96  
   61.97      /* Set decrementers for 1 second to keep them out of the way during
   61.98 @@ -79,7 +123,10 @@ void cpu_initialize(void)
   61.99      s |= 1UL << (63-3);     /* ser-gp */
  61.100      hid0.word |= s;
  61.101  #endif
  61.102 -    printk("hid0: 0x%016lx\n", hid0.word);
  61.103 +
  61.104 +    printk("CPU #%d: Hello World! SP = %lx TOC = %lx HID0 = %lx\n", 
  61.105 +           smp_processor_id(), r1, r2, hid0.word);
  61.106 +
  61.107      mthid0(hid0.word);
  61.108  
  61.109      union hid1 hid1;
  61.110 @@ -115,45 +162,22 @@ void cpu_init_vcpu(struct vcpu *v)
  61.111  {
  61.112      struct domain *d = v->domain;
  61.113      union hid4 hid4;
  61.114 -    ulong rma_base = page_to_maddr(d->arch.rma_page);
  61.115 -    ulong rma_size = rma_size(d->arch.rma_order);
  61.116 +    struct rma_settings *rma_settings;
  61.117  
  61.118      hid4.word = mfhid4();
  61.119  
  61.120      hid4.bits.lpes0 = 0; /* exceptions set MSR_HV=1 */
  61.121      hid4.bits.lpes1 = 1; /* RMA applies */
  61.122  
  61.123 -    hid4.bits.rmor = rma_base >> 26;
  61.124 +    hid4.bits.rmor = page_to_maddr(d->arch.rma_page) >> 26;
  61.125  
  61.126      hid4.bits.lpid01 = d->domain_id & 3;
  61.127      hid4.bits.lpid25 = (d->domain_id >> 2) & 0xf;
  61.128  
  61.129 -    switch (rma_size) {
  61.130 -        case 256ULL << 30:  /* 256 GB */
  61.131 -            hid4.bits.rmlr0 = 0;
  61.132 -            hid4.bits.rmlr12 = 0;
  61.133 -            break;
  61.134 -        case 16ULL << 30:   /* 16 GB */
  61.135 -            hid4.bits.rmlr0 = 0;
  61.136 -            hid4.bits.rmlr12 = 1;
  61.137 -            break;
  61.138 -        case 1ULL << 30:    /* 1 GB */
  61.139 -            hid4.bits.rmlr0 = 0;
  61.140 -            hid4.bits.rmlr12 = 2;
  61.141 -            break;
  61.142 -        case 64ULL << 20:   /* 64 MB */
  61.143 -            hid4.bits.rmlr0 = 0;
  61.144 -            hid4.bits.rmlr12 = 3;
  61.145 -            break;
  61.146 -        case 256ULL << 20:  /* 256 MB */
  61.147 -            hid4.bits.rmlr0 = 1;
  61.148 -            hid4.bits.rmlr12 = 0;
  61.149 -            break;
  61.150 -        case 128ULL << 20:  /* 128 MB */
  61.151 -            hid4.bits.rmlr0 = 1;
  61.152 -            hid4.bits.rmlr12 = 3;
  61.153 -            break;
  61.154 -    }
  61.155 +    rma_settings = cpu_find_rma(d->arch.rma_order + PAGE_SHIFT);
  61.156 +    ASSERT(rma_settings != NULL);
  61.157 +    hid4.bits.rmlr0 = rma_settings->rmlr0;
  61.158 +    hid4.bits.rmlr12 = rma_settings->rmlr12;
  61.159  
  61.160      v->arch.cpu.hid4.word = hid4.word;
  61.161  }
    62.1 --- a/xen/arch/powerpc/setup.c	Fri Sep 01 12:52:12 2006 -0600
    62.2 +++ b/xen/arch/powerpc/setup.c	Fri Sep 01 13:04:02 2006 -0600
    62.3 @@ -43,9 +43,9 @@
    62.4  #include <asm/percpu.h>
    62.5  #include "exceptions.h"
    62.6  #include "of-devtree.h"
    62.7 +#include "oftree.h"
    62.8  
    62.9  #define DEBUG
   62.10 -unsigned long xenheap_phys_end;
   62.11  
   62.12  /* opt_noht: If true, Hyperthreading is ignored. */
   62.13  int opt_noht = 0;
   62.14 @@ -54,6 +54,14 @@ boolean_param("noht", opt_noht);
   62.15  int opt_earlygdb = 0;
   62.16  boolean_param("earlygdb", opt_earlygdb);
   62.17  
   62.18 +/* opt_nosmp: If true, secondary processors are ignored. */
   62.19 +static int opt_nosmp = 0;
   62.20 +boolean_param("nosmp", opt_nosmp);
   62.21 +
   62.22 +/* maxcpus: maximum number of CPUs to activate. */
   62.23 +static unsigned int max_cpus = NR_CPUS;
   62.24 +integer_param("maxcpus", max_cpus);
   62.25 +
   62.26  u32 tlbflush_clock = 1U;
   62.27  DEFINE_PER_CPU(u32, tlbflush_time);
   62.28  
   62.29 @@ -61,9 +69,12 @@ unsigned int watchdog_on;
   62.30  unsigned long wait_init_idle;
   62.31  ulong oftree;
   62.32  ulong oftree_len;
   62.33 +ulong oftree_end;
   62.34  
   62.35  cpumask_t cpu_sibling_map[NR_CPUS] __read_mostly;
   62.36  cpumask_t cpu_online_map; /* missing ifdef in schedule.c */
   62.37 +cpumask_t cpu_present_map;
   62.38 +cpumask_t cpu_possible_map;
   62.39  
   62.40  /* XXX get this from ISA node in device tree */
   62.41  ulong isa_io_base;
   62.42 @@ -75,6 +86,8 @@ extern void idle_loop(void);
   62.43  /* move us to a header file */
   62.44  extern void initialize_keytable(void);
   62.45  
   62.46 +volatile struct processor_area * volatile global_cpu_table[NR_CPUS];
   62.47 +
   62.48  int is_kernel_text(unsigned long addr)
   62.49  {
   62.50      if (addr >= (unsigned long) &_start &&
   62.51 @@ -169,6 +182,21 @@ static void __init start_of_day(void)
   62.52  
   62.53      percpu_free_unused_areas();
   62.54  
   62.55 +    {
   62.56 +        /* FIXME: Xen assumes that an online CPU is a schedualable
   62.57 +         * CPU, but we just are not there yet. Remove this fragment when
   62.58 +         * scheduling processors actually works. */
   62.59 +        int cpuid;
   62.60 +
   62.61 +        printk("WARNING!: Taking all secondary CPUs offline\n");
   62.62 +
   62.63 +        for_each_online_cpu(cpuid) {
   62.64 +            if (cpuid == 0)
   62.65 +                continue;
   62.66 +            cpu_clear(cpuid, cpu_online_map);
   62.67 +        }
   62.68 +    }
   62.69 +
   62.70      initialize_keytable();
   62.71      /* Register another key that will allow for the the Harware Probe
   62.72       * to be contacted, this works with RiscWatch probes and should
   62.73 @@ -193,17 +221,60 @@ void startup_cpu_idle_loop(void)
   62.74      reset_stack_and_jump(idle_loop);
   62.75  }
   62.76  
   62.77 +static void init_parea(int cpuid)
   62.78 +{
   62.79 +    /* Be careful not to shadow the global variable.  */
   62.80 +    volatile struct processor_area *pa;
   62.81 +    void *stack;
   62.82 +
   62.83 +    pa = xmalloc(struct processor_area);
   62.84 +    if (pa == NULL)
   62.85 +        panic("%s: failed to allocate parea for cpu #%d\n", __func__, cpuid);
   62.86 +
   62.87 +    stack = alloc_xenheap_pages(STACK_ORDER);
   62.88 +    if (stack == NULL)
   62.89 +        panic("%s: failed to allocate stack (order %d) for cpu #%d\n", 
   62.90 +              __func__, STACK_ORDER, cpuid);
   62.91 +
   62.92 +    pa->whoami = cpuid;
   62.93 +    pa->hyp_stack_base = (void *)((ulong)stack + STACK_SIZE);
   62.94 +
   62.95 +    /* This store has the effect of invoking secondary_cpu_init.  */
   62.96 +    global_cpu_table[cpuid] = pa;
   62.97 +    mb();
   62.98 +}
   62.99 +
  62.100 +static int kick_secondary_cpus(int maxcpus)
  62.101 +{
  62.102 +    int cpuid;
  62.103 +
  62.104 +    for_each_present_cpu(cpuid) {
  62.105 +        if (cpuid == 0)
  62.106 +            continue;
  62.107 +        if (cpuid >= maxcpus)
  62.108 +            break;
  62.109 +        init_parea(cpuid);
  62.110 +        cpu_set(cpuid, cpu_online_map);
  62.111 +        cpu_set(cpuid, cpu_possible_map);
  62.112 +    }
  62.113 +
  62.114 +    return 0;
  62.115 +}
  62.116 +
  62.117 +/* This is the first C code that secondary processors invoke.  */
  62.118 +int secondary_cpu_init(int cpuid, unsigned long r4);
  62.119 +int secondary_cpu_init(int cpuid, unsigned long r4)
  62.120 +{
  62.121 +    cpu_initialize(cpuid);
  62.122 +    while(1);
  62.123 +}
  62.124 +
  62.125  static void __init __start_xen(multiboot_info_t *mbi)
  62.126  {
  62.127      char *cmdline;
  62.128      module_t *mod = (module_t *)((ulong)mbi->mods_addr);
  62.129 -    ulong heap_start;
  62.130 -    ulong modules_start, modules_size;
  62.131 -    ulong eomem = 0;
  62.132 -    ulong heap_size = 0;
  62.133 -    ulong bytes = 0;
  62.134 -    ulong freemem = (ulong)_end;
  62.135 -    ulong oftree_end;
  62.136 +    ulong dom0_start, dom0_len;
  62.137 +    ulong initrd_start, initrd_len;
  62.138  
  62.139      memcpy(0, exception_vectors, exception_vectors_end - exception_vectors);
  62.140      synchronize_caches(0, exception_vectors_end - exception_vectors);
  62.141 @@ -226,6 +297,9 @@ static void __init __start_xen(multiboot
  62.142      console_start_sync();
  62.143  #endif
  62.144  
  62.145 +    /* we give the first RMA to the hypervisor */
  62.146 +    xenheap_phys_end = rma_size(cpu_default_rma_order_pages());
  62.147 +
  62.148      /* Check that we have at least one Multiboot module. */
  62.149      if (!(mbi->flags & MBI_MODULES) || (mbi->mods_count == 0)) {
  62.150          panic("FATAL ERROR: Require at least one Multiboot module.\n");
  62.151 @@ -235,10 +309,6 @@ static void __init __start_xen(multiboot
  62.152          panic("FATAL ERROR: Bootloader provided no memory information.\n");
  62.153      }
  62.154  
  62.155 -    /* mark the begining of images */
  62.156 -    modules_start = mod[0].mod_start;
  62.157 -    modules_size = mod[mbi->mods_count-1].mod_end - mod[0].mod_start;
  62.158 -
  62.159      /* OF dev tree is the last module */
  62.160      oftree = mod[mbi->mods_count-1].mod_start;
  62.161      oftree_end = mod[mbi->mods_count-1].mod_end;
  62.162 @@ -249,71 +319,7 @@ static void __init __start_xen(multiboot
  62.163      mod[mbi->mods_count-1].mod_end = 0;
  62.164      --mbi->mods_count;
  62.165  
  62.166 -    printk("Physical RAM map:\n");
  62.167 -
  62.168 -    /* lets find out how much memory there is */
  62.169 -    while (bytes < mbi->mmap_length) {
  62.170 -        u64 end;
  62.171 -        u64 addr;
  62.172 -        u64 size;
  62.173 -
  62.174 -        memory_map_t *map = (memory_map_t *)((ulong)mbi->mmap_addr + bytes);
  62.175 -        addr = ((u64)map->base_addr_high << 32) | (u64)map->base_addr_low;
  62.176 -        size = ((u64)map->length_high << 32) | (u64)map->length_low;
  62.177 -        end = addr + size;
  62.178 -
  62.179 -        printk(" %016lx - %016lx (usable)\n", addr, end);
  62.180 -
  62.181 -        if (addr > eomem) {
  62.182 -            printk("found a hole skipping remainder of memory at:\n"
  62.183 -                   " %016lx and beyond\n", addr);
  62.184 -            break;
  62.185 -        }
  62.186 -        if (end > eomem) {
  62.187 -            eomem = end;
  62.188 -        }
  62.189 -        bytes += map->size + 4;
  62.190 -    }
  62.191 -
  62.192 -    printk("System RAM: %luMB (%lukB)\n", eomem >> 20, eomem >> 10);
  62.193 -
  62.194 -    /* top of memory */
  62.195 -    max_page = PFN_DOWN(ALIGN_DOWN(eomem, PAGE_SIZE));
  62.196 -    total_pages = max_page;
  62.197 -
  62.198 -    /* Architecturally the first 4 pages are exception hendlers, we
  62.199 -     * will also be copying down some code there */
  62.200 -    heap_start = init_boot_allocator(4 << PAGE_SHIFT);
  62.201 -
  62.202 -    /* we give the first RMA to the hypervisor */
  62.203 -    xenheap_phys_end = rma_size(cpu_rma_order());
  62.204 -
  62.205 -    /* allow everything else to be allocated */
  62.206 -    init_boot_pages(xenheap_phys_end, eomem);
  62.207 -    init_frametable();
  62.208 -    end_boot_allocator();
  62.209 -
  62.210 -    /* Add memory between the beginning of the heap and the beginning
  62.211 -     * of out text */
  62.212 -    init_xenheap_pages(heap_start, (ulong)_start);
  62.213 -
  62.214 -    /* move the modules to just after _end */
  62.215 -    if (modules_start) {
  62.216 -        printk("modules at: %016lx - %016lx\n", modules_start,
  62.217 -                modules_start + modules_size);
  62.218 -        freemem = ALIGN_UP(freemem, PAGE_SIZE);
  62.219 -        memmove((void *)freemem, (void *)modules_start, modules_size);
  62.220 -
  62.221 -        oftree -= modules_start - freemem;
  62.222 -        modules_start = freemem;
  62.223 -        freemem += modules_size;
  62.224 -        printk("  moved to: %016lx - %016lx\n", modules_start,
  62.225 -                modules_start + modules_size);
  62.226 -    }
  62.227 -
  62.228 -    /* the rest of the xenheap, starting at the end of modules */
  62.229 -    init_xenheap_pages(freemem, xenheap_phys_end);
  62.230 -
  62.231 +    memory_init(mod, mbi->mods_count);
  62.232  
  62.233  #ifdef OF_DEBUG
  62.234      printk("ofdump:\n");
  62.235 @@ -321,13 +327,10 @@ static void __init __start_xen(multiboot
  62.236      ofd_walk((void *)oftree, OFD_ROOT, ofd_dump_props, OFD_DUMP_ALL);
  62.237  #endif
  62.238  
  62.239 -    heap_size = xenheap_phys_end - heap_start;
  62.240 -
  62.241 -    printk("Xen heap: %luMB (%lukB)\n", heap_size >> 20, heap_size >> 10);
  62.242 -
  62.243      percpu_init_areas();
  62.244  
  62.245 -    cpu_initialize();
  62.246 +    init_parea(0);
  62.247 +    cpu_initialize(0);
  62.248  
  62.249  #ifdef CONFIG_GDB
  62.250      initialise_gdb();
  62.251 @@ -335,6 +338,14 @@ static void __init __start_xen(multiboot
  62.252          debugger_trap_immediate();
  62.253  #endif
  62.254  
  62.255 +    /* Deal with secondary processors.  */
  62.256 +    if (opt_nosmp) {
  62.257 +        printk("nosmp: leaving secondary processors spinning forever\n");
  62.258 +    } else {
  62.259 +        printk("spinning up at most %d total processors ...\n", max_cpus);
  62.260 +        kick_secondary_cpus(max_cpus);
  62.261 +    }
  62.262 +
  62.263      start_of_day();
  62.264  
  62.265      /* Create initial domain 0. */
  62.266 @@ -353,23 +364,27 @@ static void __init __start_xen(multiboot
  62.267      /* Scrub RAM that is still free and so may go to an unprivileged domain. */
  62.268      scrub_heap_pages();
  62.269  
  62.270 -    /*
  62.271 -     * We're going to setup domain0 using the module(s) that we
  62.272 -     * stashed safely above our heap. The second module, if present,
  62.273 -     * is an initrd ramdisk.  The last module is the OF devtree.
  62.274 -     */
  62.275 -    if (construct_dom0(dom0,
  62.276 -                       modules_start, 
  62.277 -                       mod[0].mod_end-mod[0].mod_start,
  62.278 -                       (mbi->mods_count == 1) ? 0 :
  62.279 -                       modules_start + 
  62.280 -                       (mod[1].mod_start-mod[0].mod_start),
  62.281 -                       (mbi->mods_count == 1) ? 0 :
  62.282 -                       mod[mbi->mods_count-1].mod_end - mod[1].mod_start,
  62.283 +    dom0_start = mod[0].mod_start;
  62.284 +    dom0_len = mod[0].mod_end - mod[0].mod_start;
  62.285 +    if (mbi->mods_count > 1) {
  62.286 +        initrd_start = mod[1].mod_start;
  62.287 +        initrd_len = mod[1].mod_end - mod[1].mod_start;
  62.288 +    } else {
  62.289 +        initrd_start = 0;
  62.290 +        initrd_len = 0;
  62.291 +    }
  62.292 +    if (construct_dom0(dom0, dom0_start, dom0_len,
  62.293 +                       initrd_start, initrd_len,
  62.294                         cmdline) != 0) {
  62.295          panic("Could not set up DOM0 guest OS\n");
  62.296      }
  62.297  
  62.298 +    init_xenheap_pages(ALIGN_UP(dom0_start, PAGE_SIZE),
  62.299 +                 ALIGN_DOWN(dom0_start + dom0_len, PAGE_SIZE));
  62.300 +    if (initrd_start)
  62.301 +        init_xenheap_pages(ALIGN_UP(initrd_start, PAGE_SIZE),
  62.302 +                     ALIGN_DOWN(initrd_start + initrd_len, PAGE_SIZE));
  62.303 +
  62.304      init_trace_bufs();
  62.305  
  62.306      console_endboot();
  62.307 @@ -408,6 +423,8 @@ void arch_get_xen_caps(xen_capabilities_
  62.308  {
  62.309  }
  62.310  
  62.311 +
  62.312 +
  62.313  /*
  62.314   * Local variables:
  62.315   * mode: C
    63.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
    63.2 +++ b/xen/arch/powerpc/shadow.c	Fri Sep 01 13:04:02 2006 -0600
    63.3 @@ -0,0 +1,159 @@
    63.4 +/*
    63.5 + * This program is free software; you can redistribute it and/or modify
    63.6 + * it under the terms of the GNU General Public License as published by
    63.7 + * the Free Software Foundation; either version 2 of the License, or
    63.8 + * (at your option) any later version.
    63.9 + *
   63.10 + * This program is distributed in the hope that it will be useful,
   63.11 + * but WITHOUT ANY WARRANTY; without even the implied warranty of
   63.12 + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
   63.13 + * GNU General Public License for more details.
   63.14 + *
   63.15 + * You should have received a copy of the GNU General Public License
   63.16 + * along with this program; if not, write to the Free Software
   63.17 + * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
   63.18 + *
   63.19 + * Copyright (C) IBM Corp. 2006
   63.20 + *
   63.21 + * Authors: Jimi Xenidis <jimix@watson.ibm.com>
   63.22 + */
   63.23 +
   63.24 +#include <xen/config.h>
   63.25 +#include <xen/types.h>
   63.26 +#include <xen/shadow.h>
   63.27 +
   63.28 +static ulong htab_calc_sdr1(ulong htab_addr, ulong log_htab_size)
   63.29 +{
   63.30 +    ulong sdr1_htabsize;
   63.31 +
   63.32 +    ASSERT((htab_addr & ((1UL << log_htab_size) - 1)) == 0);
   63.33 +    ASSERT(log_htab_size <= SDR1_HTABSIZE_MAX);
   63.34 +    ASSERT(log_htab_size >= HTAB_MIN_LOG_SIZE);
   63.35 +
   63.36 +    sdr1_htabsize = log_htab_size - LOG_PTEG_SIZE - SDR1_HTABSIZE_BASEBITS;
   63.37 +
   63.38 +    return (htab_addr | (sdr1_htabsize & SDR1_HTABSIZE_MASK));
   63.39 +}
   63.40 +
   63.41 +static ulong htab_alloc(struct domain *d, uint order)
   63.42 +{
   63.43 +    ulong htab_raddr;
   63.44 +    uint log_htab_bytes = order + PAGE_SHIFT;
   63.45 +    uint htab_bytes = 1UL << log_htab_bytes;
   63.46 +
   63.47 +    /* we use xenheap pages to keep domheap pages usefull for domains */
   63.48 +
   63.49 +    if (order < 6)
   63.50 +        order = 6;              /* architectural minimum is 2^18 */
   63.51 +    if (order > 34)
   63.52 +        order = 34;             /* architectural minimum is 2^46 */
   63.53 +
   63.54 +    htab_raddr = (ulong)alloc_xenheap_pages(order);
   63.55 +    if (htab_raddr > 0) {
   63.56 +        ASSERT((htab_raddr & (htab_bytes - 1)) == 0);
   63.57 +
   63.58 +        d->arch.htab.order = order;
   63.59 +        d->arch.htab.log_num_ptes = log_htab_bytes - LOG_PTE_SIZE;
   63.60 +        d->arch.htab.sdr1 = htab_calc_sdr1(htab_raddr, log_htab_bytes);
   63.61 +        d->arch.htab.map = (union pte *)htab_raddr;
   63.62 +    }
   63.63 +    return htab_raddr;
   63.64 +}
   63.65 +
   63.66 +static void htab_free(struct domain *d)
   63.67 +{
   63.68 +    ulong htab_raddr = GET_HTAB(d);
   63.69 +
   63.70 +    free_xenheap_pages((void *)htab_raddr, d->arch.htab.order);
   63.71 +}
   63.72 +
   63.73 +
   63.74 +unsigned int shadow_teardown(struct domain *d)
   63.75 +{
   63.76 +    htab_free(d);
   63.77 +    return 0;
   63.78 +}
   63.79 +
   63.80 +unsigned int shadow_set_allocation(struct domain *d, 
   63.81 +                                    unsigned int megabytes,
   63.82 +                                    int *preempted)
   63.83 +{
   63.84 +    unsigned int rc;
   63.85 +    uint pages;
   63.86 +    uint p;
   63.87 +    uint order;
   63.88 +    ulong addr;
   63.89 +    
   63.90 +
   63.91 +    if (d->arch.htab.order)
   63.92 +        return -EBUSY;
   63.93 +
   63.94 +    if (megabytes == 0) {
   63.95 +        /* old management tools */
   63.96 +        megabytes = 1;          /* 1/64th of 64M */
   63.97 +        printk("%s: Fix management tools to set and get shadow/htab values\n"
   63.98 +               "    using %d MiB htab\n",
   63.99 +               __func__, megabytes);
  63.100 +    }
  63.101 +    pages = megabytes << (20 - PAGE_SHIFT);
  63.102 +    order = fls(pages) - 1;     /* log2 truncated */
  63.103 +    if (pages & ((1 << order) - 1))
  63.104 +        ++order;                /* round up */
  63.105 +
  63.106 +    addr = htab_alloc(d, order);
  63.107 +
  63.108 +    printk("%s: ibm,fpt-size should be: 0x%x\n", __func__,
  63.109 +           d->arch.htab.log_num_ptes + LOG_PTE_SIZE);
  63.110 +
  63.111 +    if (addr == 0)
  63.112 +        return -ENOMEM;
  63.113 +
  63.114 +    /* XXX make this a continuation */
  63.115 +    for (p = 0; p < (1 << order); p++)
  63.116 +        clear_page((void *)(addr + (p << PAGE_SHIFT)));
  63.117 +
  63.118 +    return rc;
  63.119 +}
  63.120 +
  63.121 +int shadow_domctl(struct domain *d, 
  63.122 +				  xen_domctl_shadow_op_t *sc,
  63.123 +				  XEN_GUEST_HANDLE(xen_domctl_t) u_domctl)
  63.124 +{
  63.125 +    if ( unlikely(d == current->domain) )
  63.126 +    {
  63.127 +        DPRINTK("Don't try to do a shadow op on yourself!\n");
  63.128 +        return -EINVAL;
  63.129 +    }
  63.130 +
  63.131 +    switch ( sc->op )
  63.132 +    {
  63.133 +    case XEN_DOMCTL_SHADOW_OP_OFF:
  63.134 +         DPRINTK("Shadow is mandatory!\n");
  63.135 +         return -EINVAL;
  63.136 +
  63.137 +    case XEN_DOMCTL_SHADOW_OP_GET_ALLOCATION:
  63.138 +        sc->mb = shadow_get_allocation(d);
  63.139 +        return 0;
  63.140 +
  63.141 +    case XEN_DOMCTL_SHADOW_OP_SET_ALLOCATION: {
  63.142 +        int rc;
  63.143 +        int preempted = 0;
  63.144 +
  63.145 +        rc = shadow_set_allocation(d, sc->mb, &preempted);
  63.146 +
  63.147 +        if (preempted)
  63.148 +            /* Not finished.  Set up to re-run the call. */
  63.149 +            rc = hypercall_create_continuation(
  63.150 +                __HYPERVISOR_domctl, "h", u_domctl);
  63.151 +        else 
  63.152 +            /* Finished.  Return the new allocation */
  63.153 +            sc->mb = shadow_get_allocation(d);
  63.154 +        return rc;
  63.155 +    }
  63.156 +
  63.157 +    default:
  63.158 +        printk("Bad shadow op %u\n", sc->op);
  63.159 +        BUG();
  63.160 +        return -EINVAL;
  63.161 +    }
  63.162 +}
    64.1 --- a/xen/arch/powerpc/xen.lds.S	Fri Sep 01 12:52:12 2006 -0600
    64.2 +++ b/xen/arch/powerpc/xen.lds.S	Fri Sep 01 13:04:02 2006 -0600
    64.3 @@ -10,11 +10,15 @@ ENTRY(_start)
    64.4  SEARCH_DIR("=/usr/local/lib64"); SEARCH_DIR("=/lib64"); SEARCH_DIR("=/usr/lib64"); SEARCH_DIR("=/usr/local/lib"); SEARCH_DIR("=/lib"); SEARCH_DIR("=/usr/lib");
    64.5  /* Do we need any of these for elf?
    64.6     __DYNAMIC = 0;    */
    64.7 +PHDRS
    64.8 +{
    64.9 +  text PT_LOAD FILEHDR PHDRS;
   64.10 +}   
   64.11  SECTIONS
   64.12  {
   64.13    /* Read-only sections, merged into text segment: */
   64.14    PROVIDE (__executable_start = 0x10000000); . = 0x10000000 + SIZEOF_HEADERS;
   64.15 -  .interp         : { *(.interp) }
   64.16 +  .interp         : { *(.interp) } :text
   64.17    .hash           : { *(.hash) }
   64.18    .dynsym         : { *(.dynsym) }
   64.19    .dynstr         : { *(.dynstr) }
   64.20 @@ -103,7 +107,7 @@ SECTIONS
   64.21    PROVIDE (__fini_array_end = .);
   64.22    .data           :
   64.23    {
   64.24 -    *(.data .data.* .gnu.linkonce.d.*)
   64.25 +    *(.data .gnu.linkonce.d.*)
   64.26      SORT(CONSTRUCTORS)
   64.27    }
   64.28  
   64.29 @@ -121,7 +125,7 @@ SECTIONS
   64.30    __inithcall_end = .;
   64.31  
   64.32    __per_cpu_start = .;
   64.33 -  .data.percpu : { *(.data.percpu) } :text
   64.34 +  .data.percpu : { *(.data.percpu) }
   64.35    __per_cpu_data_end = .;
   64.36    . = __per_cpu_start + (NR_CPUS << PERCPU_SHIFT);
   64.37    . = ALIGN(STACK_SIZE);
    65.1 --- a/xen/arch/x86/hvm/io.c	Fri Sep 01 12:52:12 2006 -0600
    65.2 +++ b/xen/arch/x86/hvm/io.c	Fri Sep 01 13:04:02 2006 -0600
    65.3 @@ -646,9 +646,13 @@ static void hvm_mmio_assist(struct cpu_u
    65.4          break;
    65.5  
    65.6      case INSTR_BT:
    65.7 -        index = operand_index(src);
    65.8 -        value = get_reg_value(size, index, 0, regs);
    65.9 -
   65.10 +        if ( src & REGISTER )
   65.11 +        {
   65.12 +            index = operand_index(src);
   65.13 +            value = get_reg_value(size, index, 0, regs);
   65.14 +        }
   65.15 +        else if ( src & IMMEDIATE )
   65.16 +            value = mmio_opp->immediate;
   65.17          if (p->u.data & (1 << (value & ((1 << 5) - 1))))
   65.18              regs->eflags |= X86_EFLAGS_CF;
   65.19          else
    66.1 --- a/xen/arch/x86/hvm/platform.c	Fri Sep 01 12:52:12 2006 -0600
    66.2 +++ b/xen/arch/x86/hvm/platform.c	Fri Sep 01 13:04:02 2006 -0600
    66.3 @@ -652,6 +652,23 @@ static int hvm_decode(int realmode, unsi
    66.4          instr->operand[1] = mk_operand(instr->op_size, 0, 0, MEMORY);
    66.5          return DECODE_success;
    66.6  
    66.7 +    case 0xBA:
    66.8 +        if (((opcode[1] >> 3) & 7) == 4) /* BT $imm8, m16/32/64 */
    66.9 +        {
   66.10 +            instr->instr = INSTR_BT;
   66.11 +            GET_OP_SIZE_FOR_NONEBYTE(instr->op_size);
   66.12 +            instr->immediate =
   66.13 +                    (signed char)get_immediate(realmode, opcode+1, BYTE);
   66.14 +            instr->operand[0] = mk_operand(BYTE, 0, 0, IMMEDIATE);
   66.15 +            instr->operand[1] = mk_operand(instr->op_size, 0, 0, MEMORY);
   66.16 +            return DECODE_success;
   66.17 +        }
   66.18 +        else
   66.19 +        {
   66.20 +            printf("0f %x, This opcode subtype isn't handled yet\n", *opcode);
   66.21 +            return DECODE_failure;
   66.22 +        }
   66.23 +
   66.24      default:
   66.25          printf("0f %x, This opcode isn't handled yet\n", *opcode);
   66.26          return DECODE_failure;
   66.27 @@ -1002,10 +1019,17 @@ void handle_mmio(unsigned long va, unsig
   66.28              mmio_opp->operand[0] = mmio_inst.operand[0]; /* bit offset */
   66.29              mmio_opp->operand[1] = mmio_inst.operand[1]; /* bit base */
   66.30  
   66.31 -            index = operand_index(mmio_inst.operand[0]);
   66.32 -            size = operand_size(mmio_inst.operand[0]);
   66.33 -            value = get_reg_value(size, index, 0, regs);
   66.34 -
   66.35 +            if ( mmio_inst.operand[0] & REGISTER )
   66.36 +            { 
   66.37 +                index = operand_index(mmio_inst.operand[0]);
   66.38 +                size = operand_size(mmio_inst.operand[0]);
   66.39 +                value = get_reg_value(size, index, 0, regs);
   66.40 +            }
   66.41 +            else if ( mmio_inst.operand[0] & IMMEDIATE )
   66.42 +            {
   66.43 +                mmio_opp->immediate = mmio_inst.immediate;
   66.44 +                value = mmio_inst.immediate;
   66.45 +            } 
   66.46              send_mmio_req(IOREQ_TYPE_COPY, gpa + (value >> 5), 1,
   66.47                            mmio_inst.op_size, 0, IOREQ_READ, 0);
   66.48              break;
    67.1 --- a/xen/arch/x86/hvm/svm/intr.c	Fri Sep 01 12:52:12 2006 -0600
    67.2 +++ b/xen/arch/x86/hvm/svm/intr.c	Fri Sep 01 13:04:02 2006 -0600
    67.3 @@ -79,22 +79,22 @@ asmlinkage void svm_intr_assist(void)
    67.4      ASSERT(vmcb);
    67.5  
    67.6      /* Check if an Injection is active */
    67.7 -       /* Previous Interrupt delivery caused this Intercept? */
    67.8 -       if (vmcb->exitintinfo.fields.v && (vmcb->exitintinfo.fields.type == 0)) {
    67.9 -           v->arch.hvm_svm.saved_irq_vector = vmcb->exitintinfo.fields.vector;
   67.10 +    /* Previous Interrupt delivery caused this Intercept? */
   67.11 +    if (vmcb->exitintinfo.fields.v && (vmcb->exitintinfo.fields.type == 0)) {
   67.12 +        v->arch.hvm_svm.saved_irq_vector = vmcb->exitintinfo.fields.vector;
   67.13  //           printk("Injecting PF#: saving IRQ from ExitInfo\n");
   67.14 -           vmcb->exitintinfo.bytes = 0;
   67.15 -           re_injecting = 1;
   67.16 -       }
   67.17 +        vmcb->exitintinfo.bytes = 0;
   67.18 +        re_injecting = 1;
   67.19 +    }
   67.20  
   67.21      /* Guest's interrputs masked? */
   67.22      rflags = vmcb->rflags;
   67.23      if (irq_masked(rflags)) {
   67.24          HVM_DBG_LOG(DBG_LEVEL_1, "Guest IRQs masked: rflags: %lx", rflags);
   67.25 -       /* bail out, we won't be injecting an interrupt this time */
   67.26 -       return;
   67.27 +        /* bail out, we won't be injecting an interrupt this time */
   67.28 +        return;
   67.29      }
   67.30 -  
   67.31 +    
   67.32      /* Previous interrupt still pending? */
   67.33      if (vmcb->vintr.fields.irq) {
   67.34  //        printk("Re-injecting IRQ from Vintr\n");
   67.35 @@ -115,27 +115,24 @@ asmlinkage void svm_intr_assist(void)
   67.36        if ( v->vcpu_id == 0 )
   67.37           hvm_pic_assist(v);
   67.38  
   67.39 -      callback_irq = v->domain->arch.hvm_domain.params[HVM_PARAM_CALLBACK_IRQ];
   67.40  
   67.41 -      /* Before we deal with PIT interrupts, let's check for
   67.42 -         interrupts set by the device model or paravirtualised event
   67.43 -         channel interrupts.
   67.44 -      */
   67.45 -      if ( cpu_has_pending_irq(v) ) {
   67.46 -           intr_vector = cpu_get_interrupt(v, &intr_type);
   67.47 +      if ( (v->vcpu_id == 0) && pt->enabled && pt->pending_intr_nr ) {
   67.48 +          pic_set_irq(pic, pt->irq, 0);
   67.49 +          pic_set_irq(pic, pt->irq, 1);
   67.50        }
   67.51 -      else  if ( callback_irq != 0 && local_events_need_delivery() ) {
   67.52 +
   67.53 +      callback_irq = v->domain->arch.hvm_domain.params[HVM_PARAM_CALLBACK_IRQ];
   67.54 +      if ( callback_irq != 0 &&
   67.55 +           local_events_need_delivery() ) {
   67.56            /*inject para-device call back irq*/
   67.57            v->vcpu_info->evtchn_upcall_mask = 1;
   67.58            pic_set_irq(pic, callback_irq, 0);
   67.59            pic_set_irq(pic, callback_irq, 1);
   67.60 -          intr_vector = callback_irq;
   67.61        }
   67.62 -      else  if ( (v->vcpu_id == 0) && pt->enabled && pt->pending_intr_nr ) {
   67.63 -          pic_set_irq(pic, pt->irq, 0);
   67.64 -          pic_set_irq(pic, pt->irq, 1);
   67.65 +
   67.66 +      if ( cpu_has_pending_irq(v) )
   67.67            intr_vector = cpu_get_interrupt(v, &intr_type);
   67.68 -      }
   67.69 +
   67.70      }
   67.71  
   67.72      /* have we got an interrupt to inject? */
    68.1 --- a/xen/arch/x86/hvm/svm/svm.c	Fri Sep 01 12:52:12 2006 -0600
    68.2 +++ b/xen/arch/x86/hvm/svm/svm.c	Fri Sep 01 13:04:02 2006 -0600
    68.3 @@ -243,6 +243,7 @@ static void svm_store_cpu_guest_regs(
    68.4      {
    68.5          /* Returning the guest's regs */
    68.6          crs[0] = v->arch.hvm_svm.cpu_shadow_cr0;
    68.7 +        crs[2] = v->arch.hvm_svm.cpu_cr2;
    68.8          crs[3] = v->arch.hvm_svm.cpu_cr3;
    68.9          crs[4] = v->arch.hvm_svm.cpu_shadow_cr4;
   68.10      }
   68.11 @@ -2793,10 +2794,8 @@ asmlinkage void svm_vmexit_handler(struc
   68.12          break;
   68.13  
   68.14      case VMEXIT_INTR:
   68.15 -        raise_softirq(SCHEDULE_SOFTIRQ);
   68.16          break;
   68.17  
   68.18 -
   68.19      case VMEXIT_INVD:
   68.20          svm_vmexit_do_invd(vmcb);
   68.21          break;
    69.1 --- a/xen/arch/x86/hvm/vlapic.c	Fri Sep 01 12:52:12 2006 -0600
    69.2 +++ b/xen/arch/x86/hvm/vlapic.c	Fri Sep 01 13:04:02 2006 -0600
    69.3 @@ -919,6 +919,20 @@ int cpu_has_apic_interrupt(struct vcpu* 
    69.4      return 0;
    69.5  }
    69.6  
    69.7 +/* check to see if there is pending interrupt  */
    69.8 +int cpu_has_pending_irq(struct vcpu *v)
    69.9 +{
   69.10 +    struct hvm_domain *plat = &v->domain->arch.hvm_domain;
   69.11 +
   69.12 +    /* APIC */
   69.13 +    if ( cpu_has_apic_interrupt(v) ) return 1;
   69.14 +    
   69.15 +    /* PIC */
   69.16 +    if ( !vlapic_accept_pic_intr(v) ) return 0;
   69.17 +
   69.18 +    return plat->interrupt_request;
   69.19 +}
   69.20 +
   69.21  void vlapic_post_injection(struct vcpu *v, int vector, int deliver_mode)
   69.22  {
   69.23      struct vlapic *vlapic = VLAPIC(v);
    70.1 --- a/xen/arch/x86/hvm/vmx/io.c	Fri Sep 01 12:52:12 2006 -0600
    70.2 +++ b/xen/arch/x86/hvm/vmx/io.c	Fri Sep 01 13:04:02 2006 -0600
    70.3 @@ -68,19 +68,6 @@ static inline int is_interruptibility_st
    70.4      return interruptibility;
    70.5  }
    70.6  
    70.7 -/* check to see if there is pending interrupt  */
    70.8 -int cpu_has_pending_irq(struct vcpu *v)
    70.9 -{
   70.10 -    struct hvm_domain *plat = &v->domain->arch.hvm_domain;
   70.11 -
   70.12 -    /* APIC */
   70.13 -    if ( cpu_has_apic_interrupt(v) ) return 1;
   70.14 -    
   70.15 -    /* PIC */
   70.16 -    if ( !vlapic_accept_pic_intr(v) ) return 0;
   70.17 -
   70.18 -    return plat->interrupt_request;
   70.19 -}
   70.20  
   70.21  asmlinkage void vmx_intr_assist(void)
   70.22  {
    71.1 --- a/xen/arch/x86/hvm/vmx/vmx.c	Fri Sep 01 12:52:12 2006 -0600
    71.2 +++ b/xen/arch/x86/hvm/vmx/vmx.c	Fri Sep 01 13:04:02 2006 -0600
    71.3 @@ -46,6 +46,8 @@
    71.4  #include <asm/hvm/vpic.h>
    71.5  #include <asm/hvm/vlapic.h>
    71.6  
    71.7 +extern uint32_t vlapic_update_ppr(struct vlapic *vlapic);
    71.8 +
    71.9  static DEFINE_PER_CPU(unsigned long, trace_values[5]);
   71.10  #define TRACE_VMEXIT(index,value) this_cpu(trace_values)[index]=value
   71.11  
   71.12 @@ -518,6 +520,7 @@ static void vmx_store_cpu_guest_regs(
   71.13      if ( crs != NULL )
   71.14      {
   71.15          __vmread(CR0_READ_SHADOW, &crs[0]);
   71.16 +        crs[2] = v->arch.hvm_vmx.cpu_cr2;
   71.17          __vmread(GUEST_CR3, &crs[3]);
   71.18          __vmread(CR4_READ_SHADOW, &crs[4]);
   71.19      }
   71.20 @@ -953,8 +956,6 @@ static void vmx_vmexit_do_cpuid(struct c
   71.21                       bitmaskof(X86_FEATURE_MWAIT) );
   71.22  
   71.23              edx &= ~( bitmaskof(X86_FEATURE_HT)   |
   71.24 -                     bitmaskof(X86_FEATURE_MCA)   |
   71.25 -                     bitmaskof(X86_FEATURE_MCE)   |
   71.26                       bitmaskof(X86_FEATURE_ACPI)  |
   71.27                       bitmaskof(X86_FEATURE_ACC) );
   71.28          }
   71.29 @@ -1615,6 +1616,7 @@ static int mov_to_cr(int gp, int cr, str
   71.30      unsigned long value;
   71.31      unsigned long old_cr;
   71.32      struct vcpu *v = current;
   71.33 +    struct vlapic *vlapic = VLAPIC(v);
   71.34  
   71.35      switch ( gp ) {
   71.36      CASE_GET_REG(EAX, eax);
   71.37 @@ -1758,6 +1760,12 @@ static int mov_to_cr(int gp, int cr, str
   71.38              shadow_update_paging_modes(v);
   71.39          break;
   71.40      }
   71.41 +    case 8:
   71.42 +    {
   71.43 +        vlapic_set_reg(vlapic, APIC_TASKPRI, ((value & 0x0F) << 4));
   71.44 +        vlapic_update_ppr(vlapic);
   71.45 +        break;
   71.46 +    }
   71.47      default:
   71.48          printk("invalid cr: %d\n", gp);
   71.49          __hvm_bug(regs);
   71.50 @@ -1771,13 +1779,20 @@ static int mov_to_cr(int gp, int cr, str
   71.51   */
   71.52  static void mov_from_cr(int cr, int gp, struct cpu_user_regs *regs)
   71.53  {
   71.54 -    unsigned long value;
   71.55 +    unsigned long value = 0;
   71.56      struct vcpu *v = current;
   71.57 +    struct vlapic *vlapic = VLAPIC(v);
   71.58  
   71.59 -    if ( cr != 3 )
   71.60 +    if ( cr != 3 && cr != 8)
   71.61          __hvm_bug(regs);
   71.62  
   71.63 -    value = (unsigned long) v->arch.hvm_vmx.cpu_cr3;
   71.64 +    if ( cr == 3 )
   71.65 +        value = (unsigned long) v->arch.hvm_vmx.cpu_cr3;
   71.66 +    else if ( cr == 8 )
   71.67 +    {
   71.68 +        value = (unsigned long)vlapic_get_reg(vlapic, APIC_TASKPRI);
   71.69 +        value = (value & 0xF0) >> 4;
   71.70 +    }
   71.71  
   71.72      switch ( gp ) {
   71.73      CASE_SET_REG(EAX, eax);
   71.74 @@ -1888,7 +1903,7 @@ static inline void vmx_do_msr_read(struc
   71.75          }
   71.76  
   71.77          rdmsr_safe(regs->ecx, regs->eax, regs->edx);
   71.78 -        break;
   71.79 +        return;
   71.80      }
   71.81  
   71.82      regs->eax = msr_content & 0xFFFFFFFF;
    72.1 --- a/xen/arch/x86/mm/shadow/multi.c	Fri Sep 01 12:52:12 2006 -0600
    72.2 +++ b/xen/arch/x86/mm/shadow/multi.c	Fri Sep 01 13:04:02 2006 -0600
    72.3 @@ -2861,11 +2861,11 @@ static int sh_page_fault(struct vcpu *v,
    72.4      //      bunch of 4K maps.
    72.5      //
    72.6  
    72.7 +    shadow_lock(d);
    72.8 +
    72.9      SHADOW_PRINTK("d:v=%u:%u va=%#lx err=%u\n",
   72.10                     v->domain->domain_id, v->vcpu_id, va, regs->error_code);
   72.11      
   72.12 -    shadow_lock(d);
   72.13 -
   72.14      shadow_audit_tables(v);
   72.15                     
   72.16      if ( guest_walk_tables(v, va, &gw, 1) != 0 )
   72.17 @@ -3291,12 +3291,6 @@ sh_update_linear_entries(struct vcpu *v)
   72.18          {
   72.19              ml3e = __linear_l3_table;
   72.20              l3mfn = _mfn(l4e_get_pfn(__linear_l4_table[0]));
   72.21 -#if GUEST_PAGING_LEVELS == 2
   72.22 -            /* Shadow l3 tables are made up by update_cr3 */
   72.23 -            sl3e = v->arch.hvm_vcpu.hvm_lowmem_l3tab;
   72.24 -#else
   72.25 -            sl3e = v->arch.shadow_vtable;
   72.26 -#endif
   72.27          }
   72.28          else 
   72.29          {   
   72.30 @@ -3306,13 +3300,15 @@ sh_update_linear_entries(struct vcpu *v)
   72.31              l3mfn = _mfn(l4e_get_pfn(ml4e[0]));
   72.32              ml3e = sh_map_domain_page(l3mfn);
   72.33              sh_unmap_domain_page(ml4e);
   72.34 +        }
   72.35 +
   72.36  #if GUEST_PAGING_LEVELS == 2
   72.37 -            /* Shadow l3 tables are made up by update_cr3 */
   72.38 -            sl3e = v->arch.hvm_vcpu.hvm_lowmem_l3tab;
   72.39 +        /* Shadow l3 tables are made up by update_cr3 */
   72.40 +        sl3e = v->arch.hvm_vcpu.hvm_lowmem_l3tab;
   72.41  #else
   72.42 -            sl3e = sh_map_domain_page(pagetable_get_mfn(v->arch.shadow_table));
   72.43 +        /* Always safe to use shadow_vtable, because it's globally mapped */
   72.44 +        sl3e = v->arch.shadow_vtable;
   72.45  #endif
   72.46 -        }
   72.47  
   72.48          for ( i = 0; i < SHADOW_L3_PAGETABLE_ENTRIES; i++ )
   72.49          {
   72.50 @@ -3324,12 +3320,7 @@ sh_update_linear_entries(struct vcpu *v)
   72.51          }
   72.52  
   72.53          if ( v != current ) 
   72.54 -        {
   72.55              sh_unmap_domain_page(ml3e);
   72.56 -#if GUEST_PAGING_LEVELS != 2
   72.57 -            sh_unmap_domain_page(sl3e);
   72.58 -#endif
   72.59 -        }
   72.60      }
   72.61  
   72.62  #elif CONFIG_PAGING_LEVELS == 3
   72.63 @@ -3361,31 +3352,10 @@ sh_update_linear_entries(struct vcpu *v)
   72.64          
   72.65  #else /* GUEST_PAGING_LEVELS == 3 */
   72.66          
   72.67 -        /* Use local vcpu's mappings if we can; otherwise make new mappings */
   72.68 -        if ( v == current ) 
   72.69 -        {
   72.70 -            shadow_l3e = v->arch.shadow_vtable;
   72.71 -            if ( !shadow_mode_external(d) )
   72.72 -                guest_l3e = v->arch.guest_vtable;
   72.73 -        }
   72.74 -        else 
   72.75 -        {
   72.76 -            mfn_t smfn;
   72.77 -            int idx;
   72.78 -            
   72.79 -            /* Map the shadow l3 */
   72.80 -            smfn = pagetable_get_mfn(v->arch.shadow_table);
   72.81 -            idx = shadow_l3_index(&smfn, guest_index(v->arch.shadow_vtable));
   72.82 -            shadow_l3e = sh_map_domain_page(smfn);
   72.83 -            shadow_l3e += idx;
   72.84 -            if ( !shadow_mode_external(d) )
   72.85 -            {
   72.86 -                /* Also the guest l3 */
   72.87 -                mfn_t gmfn = pagetable_get_mfn(v->arch.guest_table); 
   72.88 -                guest_l3e = sh_map_domain_page(gmfn);
   72.89 -                guest_l3e += guest_index(v->arch.guest_vtable);
   72.90 -            }
   72.91 -        }
   72.92 +        /* Always safe to use *_vtable, because they're globally mapped */
   72.93 +        shadow_l3e = v->arch.shadow_vtable;
   72.94 +        guest_l3e = v->arch.guest_vtable;
   72.95 +
   72.96  #endif /* GUEST_PAGING_LEVELS */
   72.97          
   72.98          /* Choose where to write the entries, using linear maps if possible */
   72.99 @@ -3443,14 +3413,6 @@ sh_update_linear_entries(struct vcpu *v)
  72.100          if ( v != current || !shadow_mode_external(d) )
  72.101              sh_unmap_domain_page(l2e);
  72.102          
  72.103 -#if GUEST_PAGING_LEVELS == 3
  72.104 -        if ( v != current) 
  72.105 -        {
  72.106 -            sh_unmap_domain_page(shadow_l3e);
  72.107 -            if ( !shadow_mode_external(d) )
  72.108 -                sh_unmap_domain_page(guest_l3e);
  72.109 -        }
  72.110 -#endif
  72.111      }
  72.112  
  72.113  #elif CONFIG_PAGING_LEVELS == 2
  72.114 @@ -3601,7 +3563,7 @@ sh_detach_old_tables(struct vcpu *v)
  72.115           v->arch.shadow_vtable )
  72.116      {
  72.117          // Q: why does this need to use (un)map_domain_page_*global* ?
  72.118 -        //
  72.119 +        /* A: so sh_update_linear_entries can operate on other vcpus */
  72.120          sh_unmap_domain_page_global(v->arch.shadow_vtable);
  72.121          v->arch.shadow_vtable = NULL;
  72.122      }
    73.1 --- a/xen/arch/x86/physdev.c	Fri Sep 01 12:52:12 2006 -0600
    73.2 +++ b/xen/arch/x86/physdev.c	Fri Sep 01 13:04:02 2006 -0600
    73.3 @@ -96,10 +96,11 @@ long do_physdev_op(int cmd, XEN_GUEST_HA
    73.4          if ( !IS_PRIV(current->domain) )
    73.5              break;
    73.6  
    73.7 +        irq = irq_op.irq;
    73.8          ret = -EINVAL;
    73.9 -        if ( (irq = irq_op.irq) >= NR_IRQS )
   73.10 +        if ( (irq < 0) || (irq >= NR_IRQS) )
   73.11              break;
   73.12 -        
   73.13 +
   73.14          irq_op.vector = assign_irq_vector(irq);
   73.15          ret = copy_to_guest(arg, &irq_op, 1) ? -EFAULT : 0;
   73.16          break;
    74.1 --- a/xen/arch/x86/time.c	Fri Sep 01 12:52:12 2006 -0600
    74.2 +++ b/xen/arch/x86/time.c	Fri Sep 01 13:04:02 2006 -0600
    74.3 @@ -676,7 +676,7 @@ static inline void __update_vcpu_system_
    74.4      struct vcpu_time_info *u;
    74.5  
    74.6      t = &this_cpu(cpu_time);
    74.7 -    u = &v->domain->shared_info->vcpu_info[v->vcpu_id].time;
    74.8 +    u = &v->vcpu_info->time;
    74.9  
   74.10      version_update_begin(&u->version);
   74.11  
   74.12 @@ -690,7 +690,7 @@ static inline void __update_vcpu_system_
   74.13  
   74.14  void update_vcpu_system_time(struct vcpu *v)
   74.15  {
   74.16 -    if ( v->domain->shared_info->vcpu_info[v->vcpu_id].time.tsc_timestamp != 
   74.17 +    if ( v->vcpu_info->time.tsc_timestamp !=
   74.18           this_cpu(cpu_time).local_tsc_stamp )
   74.19          __update_vcpu_system_time(v);
   74.20  }
    75.1 --- a/xen/arch/x86/traps.c	Fri Sep 01 12:52:12 2006 -0600
    75.2 +++ b/xen/arch/x86/traps.c	Fri Sep 01 13:04:02 2006 -0600
    75.3 @@ -339,7 +339,6 @@ void show_execution_state(struct cpu_use
    75.4  asmlinkage void fatal_trap(int trapnr, struct cpu_user_regs *regs)
    75.5  {
    75.6      int cpu = smp_processor_id();
    75.7 -    unsigned long cr2;
    75.8      static char *trapstr[] = { 
    75.9          "divide error", "debug", "nmi", "bkpt", "overflow", "bounds", 
   75.10          "invalid opcode", "device not available", "double fault", 
   75.11 @@ -356,7 +355,7 @@ asmlinkage void fatal_trap(int trapnr, s
   75.12  
   75.13      if ( trapnr == TRAP_page_fault )
   75.14      {
   75.15 -        __asm__ __volatile__ ("mov %%cr2,%0" : "=r" (cr2) : );
   75.16 +        unsigned long cr2 = read_cr2();
   75.17          printk("Faulting linear address: %p\n", _p(cr2));
   75.18          show_page_walk(cr2);
   75.19      }
   75.20 @@ -911,7 +910,7 @@ asmlinkage int do_page_fault(struct cpu_
   75.21  
   75.22      ASSERT(!in_irq());
   75.23  
   75.24 -    __asm__ __volatile__ ("mov %%cr2,%0" : "=r" (addr) : );
   75.25 +    addr = read_cr2();
   75.26  
   75.27      DEBUGGER_trap_entry(TRAP_page_fault, regs);
   75.28  
    76.1 --- a/xen/arch/x86/x86_32/traps.c	Fri Sep 01 12:52:12 2006 -0600
    76.2 +++ b/xen/arch/x86/x86_32/traps.c	Fri Sep 01 13:04:02 2006 -0600
    76.3 @@ -21,11 +21,28 @@
    76.4  /* All CPUs have their own IDT to allow int80 direct trap. */
    76.5  idt_entry_t *idt_tables[NR_CPUS] __read_mostly;
    76.6  
    76.7 +static void print_xen_info(void)
    76.8 +{
    76.9 +    char taint_str[TAINT_STRING_MAX_LEN];
   76.10 +    char debug = 'n', *arch = "x86_32";
   76.11 +
   76.12 +#ifndef NDEBUG
   76.13 +    debug = 'y';
   76.14 +#endif
   76.15 +
   76.16 +#ifdef CONFIG_X86_PAE
   76.17 +    arch = "x86_32p";
   76.18 +#endif
   76.19 +
   76.20 +    printk("----[ Xen-%d.%d%s  %s  debug=%c  %s ]----\n",
   76.21 +           xen_major_version(), xen_minor_version(), xen_extra_version(),
   76.22 +           arch, debug, print_tainted(taint_str));
   76.23 +}
   76.24 +
   76.25  void show_registers(struct cpu_user_regs *regs)
   76.26  {
   76.27      struct cpu_user_regs fault_regs = *regs;
   76.28      unsigned long fault_crs[8];
   76.29 -    char taint_str[TAINT_STRING_MAX_LEN];
   76.30      const char *context;
   76.31  
   76.32      if ( hvm_guest(current) && guest_mode(regs) )
   76.33 @@ -35,25 +52,29 @@ void show_registers(struct cpu_user_regs
   76.34      }
   76.35      else
   76.36      {
   76.37 -        context = guest_mode(regs) ? "guest" : "hypervisor";
   76.38 -
   76.39          if ( !guest_mode(regs) )
   76.40          {
   76.41 +            context = "hypervisor";
   76.42              fault_regs.esp = (unsigned long)&regs->esp;
   76.43              fault_regs.ss = read_segment_register(ss);
   76.44              fault_regs.ds = read_segment_register(ds);
   76.45              fault_regs.es = read_segment_register(es);
   76.46              fault_regs.fs = read_segment_register(fs);
   76.47              fault_regs.gs = read_segment_register(gs);
   76.48 +            fault_crs[2] = read_cr2();
   76.49 +        }
   76.50 +        else
   76.51 +        {
   76.52 +            context = "guest";
   76.53 +            fault_crs[2] = current->vcpu_info->arch.cr2;
   76.54          }
   76.55  
   76.56          fault_crs[0] = read_cr0();
   76.57          fault_crs[3] = read_cr3();
   76.58 +        fault_crs[4] = read_cr4();
   76.59      }
   76.60  
   76.61 -    printk("----[ Xen-%d.%d%s    %s ]----\n",
   76.62 -           xen_major_version(), xen_minor_version(), xen_extra_version(),
   76.63 -           print_tainted(taint_str));
   76.64 +    print_xen_info();
   76.65      printk("CPU:    %d\nEIP:    %04x:[<%08x>]",
   76.66             smp_processor_id(), fault_regs.cs, fault_regs.eip);
   76.67      if ( !guest_mode(regs) )
   76.68 @@ -63,7 +84,8 @@ void show_registers(struct cpu_user_regs
   76.69             fault_regs.eax, fault_regs.ebx, fault_regs.ecx, fault_regs.edx);
   76.70      printk("esi: %08x   edi: %08x   ebp: %08x   esp: %08x\n",
   76.71             fault_regs.esi, fault_regs.edi, fault_regs.ebp, fault_regs.esp);
   76.72 -    printk("cr0: %08lx   cr3: %08lx\n", fault_crs[0], fault_crs[3]);
   76.73 +    printk("cr0: %08lx   cr4: %08lx   cr3: %08lx   cr2: %08lx\n",
   76.74 +           fault_crs[0], fault_crs[4], fault_crs[3], fault_crs[2]);
   76.75      printk("ds: %04x   es: %04x   fs: %04x   gs: %04x   "
   76.76             "ss: %04x   cs: %04x\n",
   76.77             fault_regs.ds, fault_regs.es, fault_regs.fs,
   76.78 @@ -125,7 +147,6 @@ asmlinkage void do_double_fault(void)
   76.79  {
   76.80      struct tss_struct *tss = &doublefault_tss;
   76.81      unsigned int cpu = ((tss->back_link>>3)-__FIRST_TSS_ENTRY)>>1;
   76.82 -    char taint_str[TAINT_STRING_MAX_LEN];
   76.83  
   76.84      watchdog_disable();
   76.85  
   76.86 @@ -133,9 +154,8 @@ asmlinkage void do_double_fault(void)
   76.87  
   76.88      /* Find information saved during fault and dump it to the console. */
   76.89      tss = &init_tss[cpu];
   76.90 -    printk("*** DOUBLE FAULT: Xen-%d.%d%s    %s\n",
   76.91 -           xen_major_version(), xen_minor_version(), xen_extra_version(),
   76.92 -           print_tainted(taint_str));
   76.93 +    printk("*** DOUBLE FAULT ***\n");
   76.94 +    print_xen_info();
   76.95      printk("CPU:    %d\nEIP:    %04x:[<%08x>]",
   76.96             cpu, tss->cs, tss->eip);
   76.97      print_symbol(" %s\n", tss->eip);
    77.1 --- a/xen/arch/x86/x86_64/traps.c	Fri Sep 01 12:52:12 2006 -0600
    77.2 +++ b/xen/arch/x86/x86_64/traps.c	Fri Sep 01 13:04:02 2006 -0600
    77.3 @@ -21,11 +21,24 @@
    77.4  
    77.5  #include <public/callback.h>
    77.6  
    77.7 +static void print_xen_info(void)
    77.8 +{
    77.9 +    char taint_str[TAINT_STRING_MAX_LEN];
   77.10 +    char debug = 'n';
   77.11 +
   77.12 +#ifndef NDEBUG
   77.13 +    debug = 'y';
   77.14 +#endif
   77.15 +
   77.16 +    printk("----[ Xen-%d.%d%s  x86_64  debug=%c  %s ]----\n",
   77.17 +           xen_major_version(), xen_minor_version(), xen_extra_version(),
   77.18 +           debug, print_tainted(taint_str));
   77.19 +}
   77.20 +
   77.21  void show_registers(struct cpu_user_regs *regs)
   77.22  {
   77.23      struct cpu_user_regs fault_regs = *regs;
   77.24      unsigned long fault_crs[8];
   77.25 -    char taint_str[TAINT_STRING_MAX_LEN];
   77.26      const char *context;
   77.27  
   77.28      if ( hvm_guest(current) && guest_mode(regs) )
   77.29 @@ -35,18 +48,27 @@ void show_registers(struct cpu_user_regs
   77.30      }
   77.31      else
   77.32      {
   77.33 -        context = guest_mode(regs) ? "guest" : "hypervisor";
   77.34 +        if ( guest_mode(regs) )
   77.35 +        {
   77.36 +            context = "guest";
   77.37 +            fault_crs[2] = current->vcpu_info->arch.cr2;
   77.38 +        }
   77.39 +        else
   77.40 +        {
   77.41 +            context = "hypervisor";
   77.42 +            fault_crs[2] = read_cr2();
   77.43 +        }
   77.44 +
   77.45          fault_crs[0] = read_cr0();
   77.46          fault_crs[3] = read_cr3();
   77.47 +        fault_crs[4] = read_cr4();
   77.48          fault_regs.ds = read_segment_register(ds);
   77.49          fault_regs.es = read_segment_register(es);
   77.50          fault_regs.fs = read_segment_register(fs);
   77.51          fault_regs.gs = read_segment_register(gs);
   77.52      }
   77.53  
   77.54 -    printk("----[ Xen-%d.%d%s    %s ]----\n",
   77.55 -           xen_major_version(), xen_minor_version(), xen_extra_version(),
   77.56 -           print_tainted(taint_str));
   77.57 +    print_xen_info();
   77.58      printk("CPU:    %d\nRIP:    %04x:[<%016lx>]",
   77.59             smp_processor_id(), fault_regs.cs, fault_regs.rip);
   77.60      if ( !guest_mode(regs) )
   77.61 @@ -62,8 +84,9 @@ void show_registers(struct cpu_user_regs
   77.62             fault_regs.r9,  fault_regs.r10, fault_regs.r11);
   77.63      printk("r12: %016lx   r13: %016lx   r14: %016lx\n",
   77.64             fault_regs.r12, fault_regs.r13, fault_regs.r14);
   77.65 -    printk("r15: %016lx   cr0: %016lx   cr3: %016lx\n",
   77.66 -           fault_regs.r15, fault_crs[0], fault_crs[3]);
   77.67 +    printk("r15: %016lx   cr0: %016lx   cr4: %016lx\n",
   77.68 +           fault_regs.r15, fault_crs[0], fault_crs[4]);
   77.69 +    printk("cr3: %016lx   cr2: %016lx\n", fault_crs[3], fault_crs[2]);
   77.70      printk("ds: %04x   es: %04x   fs: %04x   gs: %04x   "
   77.71             "ss: %04x   cs: %04x\n",
   77.72             fault_regs.ds, fault_regs.es, fault_regs.fs,
   77.73 @@ -121,7 +144,6 @@ asmlinkage void double_fault(void);
   77.74  asmlinkage void do_double_fault(struct cpu_user_regs *regs)
   77.75  {
   77.76      unsigned int cpu, tr;
   77.77 -    char taint_str[TAINT_STRING_MAX_LEN];
   77.78  
   77.79      asm ( "str %0" : "=r" (tr) );
   77.80      cpu = ((tr >> 3) - __FIRST_TSS_ENTRY) >> 2;
   77.81 @@ -131,9 +153,8 @@ asmlinkage void do_double_fault(struct c
   77.82      console_force_unlock();
   77.83  
   77.84      /* Find information saved during fault and dump it to the console. */
   77.85 -    printk("*** DOUBLE FAULT: Xen-%d.%d%s    %s\n",
   77.86 -           xen_major_version(), xen_minor_version(), xen_extra_version(),
   77.87 -           print_tainted(taint_str));
   77.88 +    printk("*** DOUBLE FAULT ***\n");
   77.89 +    print_xen_info();
   77.90      printk("CPU:    %d\nRIP:    %04x:[<%016lx>]",
   77.91             cpu, regs->cs, regs->rip);
   77.92      print_symbol(" %s", regs->rip);
    78.1 --- a/xen/common/perfc.c	Fri Sep 01 12:52:12 2006 -0600
    78.2 +++ b/xen/common/perfc.c	Fri Sep 01 13:04:02 2006 -0600
    78.3 @@ -136,8 +136,8 @@ static xen_sysctl_perfc_desc_t perfc_d[N
    78.4  static xen_sysctl_perfc_val_t *perfc_vals;
    78.5  static int               perfc_nbr_vals;
    78.6  static int               perfc_init = 0;
    78.7 -static int perfc_copy_info(XEN_GUEST_HANDLE_64(xen_sysctl_perfc_desc_t) desc,
    78.8 -                           XEN_GUEST_HANDLE_64(xen_sysctl_perfc_val_t) val)
    78.9 +static int perfc_copy_info(XEN_GUEST_HANDLE(xen_sysctl_perfc_desc_t) desc,
   78.10 +                           XEN_GUEST_HANDLE(xen_sysctl_perfc_val_t) val)
   78.11  {
   78.12      unsigned int i, j;
   78.13      unsigned int v = 0;
    79.1 --- a/xen/include/asm-ia64/mm.h	Fri Sep 01 12:52:12 2006 -0600
    79.2 +++ b/xen/include/asm-ia64/mm.h	Fri Sep 01 13:04:02 2006 -0600
    79.3 @@ -451,7 +451,6 @@ extern u64 translate_domain_pte(u64 ptev
    79.4  
    79.5  #define INVALID_M2P_ENTRY        (~0UL)
    79.6  #define VALID_M2P(_e)            (!((_e) & (1UL<<63)))
    79.7 -#define IS_INVALID_M2P_ENTRY(_e) (!VALID_M2P(_e))
    79.8  
    79.9  #define set_gpfn_from_mfn(mfn, pfn) (machine_to_phys_mapping[(mfn)] = (pfn))
   79.10  #define get_gpfn_from_mfn(mfn)      (machine_to_phys_mapping[(mfn)])
    80.1 --- a/xen/include/asm-powerpc/config.h	Fri Sep 01 12:52:12 2006 -0600
    80.2 +++ b/xen/include/asm-powerpc/config.h	Fri Sep 01 13:04:02 2006 -0600
    80.3 @@ -47,11 +47,13 @@ extern char __bss_start[];
    80.4  /* this should be per processor, but for now */
    80.5  #define CACHE_LINE_SIZE 128
    80.6  
    80.7 +/* 256M - 64M of Xen space seems like a nice number */
    80.8 +#define CONFIG_MIN_DOM0_PAGES (192 << (20 - PAGE_SHIFT))
    80.9  #define CONFIG_SHADOW 1
   80.10  #define CONFIG_GDB 1
   80.11  #define CONFIG_SMP 1
   80.12  #define CONFIG_PCI 1
   80.13 -#define NR_CPUS 1
   80.14 +#define NR_CPUS 16
   80.15  
   80.16  #ifndef ELFSIZE
   80.17  #define ELFSIZE 64
    81.1 --- a/xen/include/asm-powerpc/current.h	Fri Sep 01 12:52:12 2006 -0600
    81.2 +++ b/xen/include/asm-powerpc/current.h	Fri Sep 01 13:04:02 2006 -0600
    81.3 @@ -27,7 +27,7 @@
    81.4  
    81.5  struct vcpu;
    81.6  
    81.7 -register struct processor_area *parea asm("r13");
    81.8 +register volatile struct processor_area *parea asm("r13");
    81.9  
   81.10  static inline struct vcpu *get_current(void)
   81.11  {
   81.12 @@ -66,7 +66,7 @@ static inline struct cpu_user_regs *gues
   81.13  
   81.14  static inline void reset_stack_and_jump(void (*f)(void))
   81.15  {
   81.16 -    void _reset_stack_and_jump(void (*f)(void), struct cpu_user_regs *regs);
   81.17 +    void _reset_stack_and_jump(void (*)(void), struct cpu_user_regs *);
   81.18      struct cpu_user_regs *regs = guest_cpu_user_regs();
   81.19  
   81.20  #ifdef TRACK_RESUME
    82.1 --- a/xen/include/asm-powerpc/domain.h	Fri Sep 01 12:52:12 2006 -0600
    82.2 +++ b/xen/include/asm-powerpc/domain.h	Fri Sep 01 13:04:02 2006 -0600
    82.3 @@ -38,15 +38,14 @@ struct arch_domain {
    82.4      struct page_info *rma_page;
    82.5      uint rma_order;
    82.6  
    82.7 -    /* This is regular memory, only available thru translataion */
    82.8 -    ulong logical_base_pfn;
    82.9 -    ulong logical_end_pfn;
   82.10 +    /* list of extents beyond RMA */
   82.11 +    struct list_head extent_list;
   82.12  
   82.13      /* I/O-port access bitmap mask. */
   82.14      u8 *iobmp_mask;       /* Address of IO bitmap mask, or NULL.      */
   82.15  
   82.16      uint large_page_sizes;
   82.17 -    char large_page_shift[4];
   82.18 +    uint large_page_order[4];
   82.19  } __cacheline_aligned;
   82.20  
   82.21  struct slb_entry {
    83.1 --- a/xen/include/asm-powerpc/htab.h	Fri Sep 01 12:52:12 2006 -0600
    83.2 +++ b/xen/include/asm-powerpc/htab.h	Fri Sep 01 13:04:02 2006 -0600
    83.3 @@ -133,8 +133,4 @@ struct domain_htab {
    83.4      union pte *map;     /* access the htab like an array */
    83.5      ulong *shadow;      /* idx -> logical translation array */
    83.6  };
    83.7 -
    83.8 -struct domain;
    83.9 -extern void htab_alloc(struct domain *d, uint order);
   83.10 -extern void htab_free(struct domain *d);
   83.11  #endif
    84.1 --- a/xen/include/asm-powerpc/mm.h	Fri Sep 01 12:52:12 2006 -0600
    84.2 +++ b/xen/include/asm-powerpc/mm.h	Fri Sep 01 13:04:02 2006 -0600
    84.3 @@ -24,6 +24,7 @@
    84.4  #include <public/xen.h>
    84.5  #include <xen/list.h>
    84.6  #include <xen/types.h>
    84.7 +#include <xen/mm.h>
    84.8  #include <asm/misc.h>
    84.9  #include <asm/system.h>
   84.10  #include <asm/flushtlb.h>
   84.11 @@ -33,7 +34,6 @@
   84.12  #define memguard_unguard_range(_p,_l)    ((void)0)
   84.13  
   84.14  extern unsigned long xenheap_phys_end;
   84.15 -#define IS_XEN_HEAP_FRAME(_pfn) (page_to_maddr(_pfn) < xenheap_phys_end)
   84.16  
   84.17  /*
   84.18   * Per-page-frame information.
   84.19 @@ -43,7 +43,6 @@ extern unsigned long xenheap_phys_end;
   84.20   *  2. Provide a PFN_ORDER() macro for accessing the order of a free page.
   84.21   */
   84.22  #define PFN_ORDER(_pfn) ((_pfn)->u.free.order)
   84.23 -#define PRtype_info "016lx"
   84.24  
   84.25  /* XXX copy-and-paste job; re-examine me */
   84.26  struct page_info
   84.27 @@ -63,7 +62,7 @@ struct page_info
   84.28          /* Page is in use: ((count_info & PGC_count_mask) != 0). */
   84.29          struct {
   84.30              /* Owner of this page (NULL if page is anonymous). */
   84.31 -            struct domain *_domain;
   84.32 +            u32 _domain;
   84.33              /* Type reference count and various PGT_xxx flags and fields. */
   84.34              unsigned long type_info;
   84.35          } inuse;
   84.36 @@ -80,80 +79,132 @@ struct page_info
   84.37  
   84.38  };
   84.39  
   84.40 +struct page_extents {
   84.41 +    /* Each frame can be threaded onto a doubly-linked list. */
   84.42 +    struct list_head pe_list;
   84.43 +
   84.44 +    /* page extent */
   84.45 +    struct page_info *pg;
   84.46 +    uint order;
   84.47 +    ulong pfn;
   84.48 +};
   84.49 +
   84.50   /* The following page types are MUTUALLY EXCLUSIVE. */
   84.51  #define PGT_none            (0<<29) /* no special uses of this page */
   84.52 -#define PGT_l1_page_table   (1<<29) /* using this page as an L1 page table? */
   84.53 -#define PGT_l2_page_table   (2<<29) /* using this page as an L2 page table? */
   84.54 -#define PGT_l3_page_table   (3<<29) /* using this page as an L3 page table? */
   84.55 -#define PGT_l4_page_table   (4<<29) /* using this page as an L4 page table? */
   84.56 -#define PGT_gdt_page        (5<<29) /* using this page in a GDT? */
   84.57 -#define PGT_ldt_page        (6<<29) /* using this page in an LDT? */
   84.58 +#define PGT_RMA             (1<<29) /* This page is an RMA page? */
   84.59  #define PGT_writable_page   (7<<29) /* has writable mappings of this page? */
   84.60  #define PGT_type_mask       (7<<29) /* Bits 29-31. */
   84.61 - /* Has this page been validated for use as its current type? */
   84.62 -#define _PGT_validated      28
   84.63 -#define PGT_validated       (1U<<_PGT_validated)
   84.64 +
   84.65   /* Owning guest has pinned this page to its current type? */
   84.66 -#define _PGT_pinned         27
   84.67 +#define _PGT_pinned         28
   84.68  #define PGT_pinned          (1U<<_PGT_pinned)
   84.69 - /* The 10 most significant bits of virt address if this is a page table. */
   84.70 -#define PGT_va_shift        17
   84.71 -#define PGT_va_mask         (((1U<<10)-1)<<PGT_va_shift)
   84.72 + /* Has this page been validated for use as its current type? */
   84.73 +#define _PGT_validated      27
   84.74 +#define PGT_validated       (1U<<_PGT_validated)
   84.75 +
   84.76 + /* The 27 most significant bits of virt address if this is a page table. */
   84.77 +#define PGT_va_shift        32
   84.78 +#define PGT_va_mask         ((unsigned long)((1U<<28)-1)<<PGT_va_shift)
   84.79   /* Is the back pointer still mutable (i.e. not fixed yet)? */
   84.80 -#define PGT_va_mutable      (((1U<<10)-1)<<PGT_va_shift)
   84.81 +#define PGT_va_mutable      ((unsigned long)((1U<<28)-1)<<PGT_va_shift)
   84.82   /* Is the back pointer unknown (e.g., p.t. is mapped at multiple VAs)? */
   84.83 -#define PGT_va_unknown      (((1U<<10)-2)<<PGT_va_shift)
   84.84 - /* 17-bit count of uses of this frame as its current type. */
   84.85 -#define PGT_count_mask      ((1U<<17)-1)
   84.86 +#define PGT_va_unknown      ((unsigned long)((1U<<28)-2)<<PGT_va_shift)
   84.87 +
   84.88 + /* 16-bit count of uses of this frame as its current type. */
   84.89 +#define PGT_count_mask      ((1U<<16)-1)
   84.90  
   84.91   /* Cleared when the owning guest 'frees' this page. */
   84.92  #define _PGC_allocated      31
   84.93  #define PGC_allocated       (1U<<_PGC_allocated)
   84.94 - /* 31-bit count of references to this frame. */
   84.95 -#define PGC_count_mask      ((1U<<31)-1)
   84.96 + /* Set on a *guest* page to mark it out-of-sync with its shadow */
   84.97 +#define _PGC_out_of_sync     30
   84.98 +#define PGC_out_of_sync     (1U<<_PGC_out_of_sync)
   84.99 + /* Set when is using a page as a page table */
  84.100 +#define _PGC_page_table      29
  84.101 +#define PGC_page_table      (1U<<_PGC_page_table)
  84.102 + /* 29-bit count of references to this frame. */
  84.103 +#define PGC_count_mask      ((1U<<29)-1)
  84.104 +
  84.105 +#define IS_XEN_HEAP_FRAME(_pfn) (page_to_maddr(_pfn) < xenheap_phys_end)
  84.106 +
  84.107 +static inline struct domain *unpickle_domptr(u32 _domain)
  84.108 +{ return ((_domain == 0) || (_domain & 1)) ? NULL : __va(_domain); }
  84.109 +
  84.110 +static inline u32 pickle_domptr(struct domain *domain)
  84.111 +{ return (domain == NULL) ? 0 : (u32)__pa(domain); }
  84.112 +
  84.113 +#define PRtype_info "016lx"/* should only be used for printk's */
  84.114 +
  84.115 +#define page_get_owner(_p)    (unpickle_domptr((_p)->u.inuse._domain))
  84.116 +#define page_set_owner(_p,_d) ((_p)->u.inuse._domain = pickle_domptr(_d))
  84.117 +
  84.118 +extern struct page_info *frame_table;
  84.119 +extern unsigned long max_page;
  84.120 +extern unsigned long total_pages;
  84.121 +void init_frametable(void);
  84.122  
  84.123  static inline void put_page(struct page_info *page)
  84.124  {
  84.125 -#if 0
  84.126 -    int count;
  84.127 -
  84.128 -    count = atomic_dec_return(&page->count_info);
  84.129 +    u32 nx, x, y = page->count_info;
  84.130  
  84.131 -    if ( unlikely((count & PGC_count_mask) == 0) )
  84.132 +    do {
  84.133 +        x  = y;
  84.134 +        nx = x - 1;
  84.135 +    }
  84.136 +    while ( unlikely((y = cmpxchg(&page->count_info, x, nx)) != x) );
  84.137 +
  84.138 +    if ( unlikely((nx & PGC_count_mask) == 0) ) {
  84.139 +        panic("about to free page\n");
  84.140          free_domheap_page(page);
  84.141 -#else
  84.142 -    trap();
  84.143 -#endif
  84.144 +    }
  84.145  }
  84.146  
  84.147  static inline int get_page(struct page_info *page,
  84.148                             struct domain *domain)
  84.149  {
  84.150 -#if 0
  84.151 -    int count;
  84.152 -
  84.153 -    count = atomic_inc_return(&page->count_info);
  84.154 +    u32 x, nx, y = page->count_info;
  84.155 +    u32 d, nd = page->u.inuse._domain;
  84.156 +    u32 _domain = pickle_domptr(domain);
  84.157  
  84.158 -    if (((count & PGC_count_mask) == 0) ||      /* Count overflow? */
  84.159 -            ((count & PGC_count_mask) == 1) ||  /* Wasn't allocated? */
  84.160 -            ((page->domain != domain)))         /* Wrong owner? */
  84.161 -    {
  84.162 -        atomic_dec(&page->count_info);
  84.163 -        return 0;
  84.164 +    do {
  84.165 +        x  = y;
  84.166 +        nx = x + 1;
  84.167 +        d  = nd;
  84.168 +        if ( unlikely((x & PGC_count_mask) == 0) ||  /* Not allocated? */
  84.169 +             unlikely((nx & PGC_count_mask) == 0) || /* Count overflow? */
  84.170 +             unlikely(d != _domain) )                /* Wrong owner? */
  84.171 +        {
  84.172 +            return 0;
  84.173 +        }
  84.174 +        y = cmpxchg(&page->count_info, x, nx);
  84.175      }
  84.176 +    while ( unlikely(y != x) );
  84.177  
  84.178 -#else
  84.179 -    trap();
  84.180 -#endif
  84.181      return 1;
  84.182  }
  84.183  
  84.184 +extern void put_page_type(struct page_info *page);
  84.185 +extern int  get_page_type(struct page_info *page, unsigned long type);
  84.186 +
  84.187 +static inline void put_page_and_type(struct page_info *page)
  84.188 +{
  84.189 +    put_page_type(page);
  84.190 +    put_page(page);
  84.191 +}
  84.192 +
  84.193  static inline int get_page_and_type(struct page_info *page,
  84.194                                      struct domain *domain,
  84.195 -                                    u32 type)
  84.196 +                                    unsigned long type)
  84.197  {
  84.198 -    trap();
  84.199 -    return 1;
  84.200 +    int rc = get_page(page, domain);
  84.201 +
  84.202 +    if ( likely(rc) && unlikely(!get_page_type(page, type)) )
  84.203 +    {
  84.204 +        put_page(page);
  84.205 +        rc = 0;
  84.206 +    }
  84.207 +
  84.208 +    return rc;
  84.209  }
  84.210  
  84.211  static inline int page_is_removable(struct page_info *page)
  84.212 @@ -161,17 +212,10 @@ static inline int page_is_removable(stru
  84.213      return ((page->count_info & PGC_count_mask) == 1);
  84.214  }
  84.215  
  84.216 -int get_page_type(struct page_info *page, u32 type);
  84.217 -
  84.218  #define set_machinetophys(_mfn, _pfn) (trap(), 0)
  84.219  
  84.220  extern void synchronise_pagetables(unsigned long cpu_mask);
  84.221  
  84.222 -static inline void put_page_and_type(struct page_info *page)
  84.223 -{
  84.224 -    trap();
  84.225 -}
  84.226 -
  84.227  /* XXX don't know what this is for */
  84.228  typedef struct {
  84.229      void (*enable)(struct domain *);
  84.230 @@ -179,17 +223,10 @@ typedef struct {
  84.231  } vm_assist_info_t;
  84.232  extern vm_assist_info_t vm_assist_info[];
  84.233  
  84.234 -#define page_get_owner(_p)    ((_p)->u.inuse._domain)
  84.235 -#define page_set_owner(_p,_d) ((_p)->u.inuse._domain = _d)
  84.236 -
  84.237  #define share_xen_page_with_guest(p, d, r) do { } while (0)
  84.238  #define share_xen_page_with_privileged_guests(p, r) do { } while (0)
  84.239  
  84.240 -extern struct page_info *frame_table;
  84.241  extern unsigned long frame_table_size;
  84.242 -extern unsigned long max_page;
  84.243 -extern unsigned long total_pages;
  84.244 -void init_frametable(void);
  84.245  
  84.246  /* hope that accesses to this will fail spectacularly */
  84.247  #define machine_to_phys_mapping ((u32 *)-1UL)
  84.248 @@ -199,12 +236,12 @@ extern int update_grant_va_mapping(unsig
  84.249                                     struct domain *,
  84.250                                     struct vcpu *);
  84.251  
  84.252 -extern void put_page_type(struct page_info *page);
  84.253 +#define PFN_TYPE_RMA 1
  84.254 +#define PFN_TYPE_LOGICAL 2
  84.255 +#define PFN_TYPE_IO 3
  84.256 +#define PFN_TYPE_REMOTE 4
  84.257  
  84.258 -#define PFN_TYPE_RMA 0
  84.259 -#define PFN_TYPE_LOGICAL 1
  84.260 -#define PFN_TYPE_IO 2
  84.261 -extern ulong pfn2mfn(struct domain *d, long mfn, int *type);
  84.262 +extern ulong pfn2mfn(struct domain *d, long pfn, int *type);
  84.263  
  84.264  /* Arch-specific portion of memory_op hypercall. */
  84.265  long arch_memory_op(int op, XEN_GUEST_HANDLE(void) arg);
  84.266 @@ -221,6 +258,10 @@ static inline unsigned long gmfn_to_mfn(
  84.267  
  84.268  #define mfn_to_gmfn(_d, mfn) (mfn)
  84.269  
  84.270 +extern int allocate_rma(struct domain *d, unsigned int order_pages);
  84.271 +extern uint allocate_extents(struct domain *d, uint nrpages, uint rma_nrpages);
  84.272 +extern void free_extents(struct domain *d);
  84.273 +
  84.274  extern int steal_page(struct domain *d, struct page_info *page,
  84.275                          unsigned int memflags);
  84.276  
    85.1 --- a/xen/include/asm-powerpc/powerpc64/procarea.h	Fri Sep 01 12:52:12 2006 -0600
    85.2 +++ b/xen/include/asm-powerpc/powerpc64/procarea.h	Fri Sep 01 13:04:02 2006 -0600
    85.3 @@ -28,6 +28,7 @@ struct gdb_state;
    85.4  
    85.5  struct processor_area
    85.6  {
    85.7 +    unsigned int whoami;
    85.8      struct vcpu *cur_vcpu;
    85.9      void *hyp_stack_base;
   85.10      ulong saved_regs[2];
    86.1 --- a/xen/include/asm-powerpc/processor.h	Fri Sep 01 12:52:12 2006 -0600
    86.2 +++ b/xen/include/asm-powerpc/processor.h	Fri Sep 01 13:04:02 2006 -0600
    86.3 @@ -39,8 +39,11 @@ struct vcpu;
    86.4  struct cpu_user_regs;
    86.5  extern void show_registers(struct cpu_user_regs *);
    86.6  extern void show_execution_state(struct cpu_user_regs *);
    86.7 -extern unsigned int cpu_rma_order(void);
    86.8 -extern void cpu_initialize(void);
    86.9 +extern void show_backtrace(ulong sp, ulong lr, ulong pc);
   86.10 +extern unsigned int cpu_extent_order(void);
   86.11 +extern unsigned int cpu_default_rma_order_pages(void);
   86.12 +extern uint cpu_large_page_orders(uint *sizes, uint max);
   86.13 +extern void cpu_initialize(int cpuid);
   86.14  extern void cpu_init_vcpu(struct vcpu *);
   86.15  extern void save_cpu_sprs(struct vcpu *);
   86.16  extern void load_cpu_sprs(struct vcpu *);
    87.1 --- a/xen/include/asm-powerpc/shadow.h	Fri Sep 01 12:52:12 2006 -0600
    87.2 +++ b/xen/include/asm-powerpc/shadow.h	Fri Sep 01 13:04:02 2006 -0600
    87.3 @@ -13,7 +13,7 @@
    87.4   * along with this program; if not, write to the Free Software
    87.5   * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
    87.6   *
    87.7 - * Copyright (C) IBM Corp. 2005
    87.8 + * Copyright (C) IBM Corp. 2005, 2006
    87.9   *
   87.10   * Authors: Hollis Blanchard <hollisb@us.ibm.com>
   87.11   */
   87.12 @@ -55,4 +55,18 @@ static inline void mark_dirty(struct dom
   87.13  {
   87.14      return;
   87.15  }
   87.16 +#define gnttab_mark_dirty(d, f) mark_dirty((d), (f))
   87.17 +
   87.18 +extern int shadow_domctl(struct domain *d, 
   87.19 +                   xen_domctl_shadow_op_t *sc,
   87.20 +                   XEN_GUEST_HANDLE(xen_domctl_t) u_domctl);
   87.21 +extern unsigned int shadow_teardown(struct domain *d);
   87.22 +extern unsigned int shadow_set_allocation(
   87.23 +    struct domain *d, unsigned int megabytes, int *preempted);
   87.24 +
   87.25 +/* Return the size of the shadow pool, rounded up to the nearest MB */
   87.26 +static inline unsigned int shadow_get_allocation(struct domain *d)
   87.27 +{
   87.28 +    return (1ULL << (d->arch.htab.order + PAGE_SHIFT)) >> 20;
   87.29 +}
   87.30  #endif
    88.1 --- a/xen/include/asm-powerpc/smp.h	Fri Sep 01 12:52:12 2006 -0600
    88.2 +++ b/xen/include/asm-powerpc/smp.h	Fri Sep 01 13:04:02 2006 -0600
    88.3 @@ -28,8 +28,8 @@ extern int smp_num_siblings;
    88.4  
    88.5  /* revisit when we support SMP */
    88.6  #define get_hard_smp_processor_id(i) i
    88.7 -#define hard_smp_processor_id() 0
    88.8 -#define raw_smp_processor_id() 0
    88.9 +#define raw_smp_processor_id() (parea->whoami)
   88.10 +#define hard_smp_processor_id() raw_smp_processor_id()
   88.11  extern cpumask_t cpu_sibling_map[];
   88.12  extern cpumask_t cpu_core_map[];
   88.13  
    89.1 --- a/xen/include/asm-powerpc/types.h	Fri Sep 01 12:52:12 2006 -0600
    89.2 +++ b/xen/include/asm-powerpc/types.h	Fri Sep 01 13:04:02 2006 -0600
    89.3 @@ -3,9 +3,19 @@
    89.4  #ifndef _PPC_TYPES_H
    89.5  #define _PPC_TYPES_H
    89.6  
    89.7 +#include <xen/config.h>
    89.8 +
    89.9 +#if defined(__ppc__)
   89.10 +#define BYTES_PER_LONG 4
   89.11 +#define BITS_PER_LONG 32
   89.12 +#elif defined(__PPC64__)
   89.13 +#define BYTES_PER_LONG 8
   89.14 +#define BITS_PER_LONG 64
   89.15 +#endif
   89.16 +
   89.17 +#ifndef __ASSEMBLY__
   89.18  typedef unsigned short umode_t;
   89.19  
   89.20 -
   89.21  /*
   89.22   * __xx is ok: it doesn't pollute the POSIX namespace. Use these in the
   89.23   * header files exported to user space
   89.24 @@ -31,8 +41,6 @@ typedef unsigned long __u64;
   89.25  #endif
   89.26  #endif
   89.27  
   89.28 -#include <xen/config.h>
   89.29 -
   89.30  typedef signed char s8;
   89.31  typedef unsigned char u8;
   89.32  
   89.33 @@ -45,14 +53,10 @@ typedef unsigned int u32;
   89.34  #if defined(__ppc__)
   89.35  typedef signed long long s64;
   89.36  typedef unsigned long long u64;
   89.37 -#define BYTES_PER_LONG 4
   89.38 -#define BITS_PER_LONG 32
   89.39  typedef unsigned int size_t;
   89.40  #elif defined(__PPC64__)
   89.41  typedef signed long s64;
   89.42  typedef unsigned long u64;
   89.43 -#define BYTES_PER_LONG 8
   89.44 -#define BITS_PER_LONG 64
   89.45  typedef unsigned long size_t;
   89.46  #endif
   89.47  
   89.48 @@ -66,4 +70,5 @@ typedef u64 dma64_addr_t;
   89.49  
   89.50  typedef unsigned short xmem_bufctl_t;
   89.51  
   89.52 +#endif  /* __ASSEMBLY__ */
   89.53  #endif
    90.1 --- a/xen/include/asm-x86/mm.h	Fri Sep 01 12:52:12 2006 -0600
    90.2 +++ b/xen/include/asm-x86/mm.h	Fri Sep 01 13:04:02 2006 -0600
    90.3 @@ -338,7 +338,6 @@ int check_descriptor(struct desc_struct 
    90.4  #define machine_to_phys_mapping  ((unsigned long *)RDWR_MPT_VIRT_START)
    90.5  #define INVALID_M2P_ENTRY        (~0UL)
    90.6  #define VALID_M2P(_e)            (!((_e) & (1UL<<(BITS_PER_LONG-1))))
    90.7 -#define IS_INVALID_M2P_ENTRY(_e) (!VALID_M2P(_e))
    90.8  
    90.9  #define set_gpfn_from_mfn(mfn, pfn) (machine_to_phys_mapping[(mfn)] = (pfn))
   90.10  #define get_gpfn_from_mfn(mfn)      (machine_to_phys_mapping[(mfn)])
    91.1 --- a/xen/include/asm-x86/page.h	Fri Sep 01 12:52:12 2006 -0600
    91.2 +++ b/xen/include/asm-x86/page.h	Fri Sep 01 13:04:02 2006 -0600
    91.3 @@ -300,13 +300,6 @@ void setup_idle_pagetable(void);
    91.4  #define _PAGE_GNTTAB   0
    91.5  #endif
    91.6  
    91.7 -/*
    91.8 - * Disallow unused flag bits plus PAT, PSE and GLOBAL.
    91.9 - * Also disallow GNTTAB if we are using it for grant-table debugging.
   91.10 - * Permit the NX bit if the hardware supports it.
   91.11 - */
   91.12 -#define BASE_DISALLOW_MASK ((0xFFFFF180U | _PAGE_GNTTAB) & ~_PAGE_NX)
   91.13 -
   91.14  #define __PAGE_HYPERVISOR \
   91.15      (_PAGE_PRESENT | _PAGE_RW | _PAGE_DIRTY | _PAGE_ACCESSED)
   91.16  #define __PAGE_HYPERVISOR_NOCACHE \
    92.1 --- a/xen/include/asm-x86/processor.h	Fri Sep 01 12:52:12 2006 -0600
    92.2 +++ b/xen/include/asm-x86/processor.h	Fri Sep 01 13:04:02 2006 -0600
    92.3 @@ -290,6 +290,13 @@ static inline void write_cr0(unsigned lo
    92.4  	__asm__("mov %0,%%cr0": :"r" ((unsigned long)val));
    92.5  }
    92.6  
    92.7 +static inline unsigned long read_cr2(void)
    92.8 +{
    92.9 +    unsigned long __cr2;
   92.10 +    __asm__("mov %%cr2,%0\n\t" :"=r" (__cr2));
   92.11 +    return __cr2;
   92.12 +}
   92.13 +
   92.14  static inline unsigned long read_cr4(void)
   92.15  {
   92.16      unsigned long __cr4;
    93.1 --- a/xen/include/asm-x86/x86_32/page-2level.h	Fri Sep 01 12:52:12 2006 -0600
    93.2 +++ b/xen/include/asm-x86/x86_32/page-2level.h	Fri Sep 01 13:04:02 2006 -0600
    93.3 @@ -53,7 +53,4 @@ typedef l2_pgentry_t root_pgentry_t;
    93.4  #define get_pte_flags(x) ((int)(x) & 0xFFF)
    93.5  #define put_pte_flags(x) ((intpte_t)((x) & 0xFFF))
    93.6  
    93.7 -#define L1_DISALLOW_MASK BASE_DISALLOW_MASK
    93.8 -#define L2_DISALLOW_MASK BASE_DISALLOW_MASK
    93.9 -
   93.10  #endif /* __X86_32_PAGE_2LEVEL_H__ */
    94.1 --- a/xen/include/asm-x86/x86_32/page-3level.h	Fri Sep 01 12:52:12 2006 -0600
    94.2 +++ b/xen/include/asm-x86/x86_32/page-3level.h	Fri Sep 01 13:04:02 2006 -0600
    94.3 @@ -66,8 +66,6 @@ typedef l3_pgentry_t root_pgentry_t;
    94.4  #define get_pte_flags(x) (((int)((x) >> 32) & ~0xFFF) | ((int)(x) & 0xFFF))
    94.5  #define put_pte_flags(x) (((intpte_t)((x) & ~0xFFF) << 32) | ((x) & 0xFFF))
    94.6  
    94.7 -#define L1_DISALLOW_MASK BASE_DISALLOW_MASK
    94.8 -#define L2_DISALLOW_MASK BASE_DISALLOW_MASK
    94.9  #define L3_DISALLOW_MASK 0xFFFFF1E6U /* must-be-zero */
   94.10  
   94.11  #endif /* __X86_32_PAGE_3LEVEL_H__ */
    95.1 --- a/xen/include/asm-x86/x86_32/page.h	Fri Sep 01 12:52:12 2006 -0600
    95.2 +++ b/xen/include/asm-x86/x86_32/page.h	Fri Sep 01 13:04:02 2006 -0600
    95.3 @@ -26,6 +26,15 @@ extern unsigned int PAGE_HYPERVISOR_NOCA
    95.4  #define GRANT_PTE_FLAGS \
    95.5      (_PAGE_PRESENT|_PAGE_ACCESSED|_PAGE_DIRTY|_PAGE_GNTTAB)
    95.6  
    95.7 +/*
    95.8 + * Disallow unused flag bits plus PAT, PSE and GLOBAL.
    95.9 + * Permit the NX bit if the hardware supports it.
   95.10 + */
   95.11 +#define BASE_DISALLOW_MASK (0xFFFFF180U & ~_PAGE_NX)
   95.12 +
   95.13 +#define L1_DISALLOW_MASK (BASE_DISALLOW_MASK | _PAGE_GNTTAB)
   95.14 +#define L2_DISALLOW_MASK (BASE_DISALLOW_MASK)
   95.15 +
   95.16  #endif /* __X86_32_PAGE_H__ */
   95.17  
   95.18  /*
    96.1 --- a/xen/include/asm-x86/x86_64/page.h	Fri Sep 01 12:52:12 2006 -0600
    96.2 +++ b/xen/include/asm-x86/x86_64/page.h	Fri Sep 01 13:04:02 2006 -0600
    96.3 @@ -75,8 +75,15 @@ typedef l4_pgentry_t root_pgentry_t;
    96.4  #define _PAGE_NX_BIT (1U<<23)
    96.5  #define _PAGE_NX     (cpu_has_nx ? _PAGE_NX_BIT : 0U)
    96.6  
    96.7 -#define L1_DISALLOW_MASK BASE_DISALLOW_MASK
    96.8 -#define L2_DISALLOW_MASK BASE_DISALLOW_MASK
    96.9 +/*
   96.10 + * Disallow unused flag bits plus PAT, PSE and GLOBAL.
   96.11 + * Permit the NX bit if the hardware supports it.
   96.12 + * Note that range [62:52] is available for software use on x86/64.
   96.13 + */
   96.14 +#define BASE_DISALLOW_MASK (0xFF000180U & ~_PAGE_NX)
   96.15 +
   96.16 +#define L1_DISALLOW_MASK (BASE_DISALLOW_MASK | _PAGE_GNTTAB)
   96.17 +#define L2_DISALLOW_MASK (BASE_DISALLOW_MASK)
   96.18  #define L3_DISALLOW_MASK (BASE_DISALLOW_MASK | 0x180U /* must-be-zero */)
   96.19  #define L4_DISALLOW_MASK (BASE_DISALLOW_MASK | 0x180U /* must-be-zero */)
   96.20  
    97.1 --- a/xen/include/public/arch-ia64.h	Fri Sep 01 12:52:12 2006 -0600
    97.2 +++ b/xen/include/public/arch-ia64.h	Fri Sep 01 13:04:02 2006 -0600
    97.3 @@ -18,15 +18,12 @@
    97.4  
    97.5  #define DEFINE_XEN_GUEST_HANDLE(name)   __DEFINE_XEN_GUEST_HANDLE(name, name)
    97.6  #define XEN_GUEST_HANDLE(name)          __guest_handle_ ## name
    97.7 -#define XEN_GUEST_HANDLE_64(name)       __guest_handle_ ## name
    97.8  #define set_xen_guest_handle(hnd, val)  do { (hnd).p = val; } while (0)
    97.9  #ifdef __XEN_TOOLS__
   97.10  #define get_xen_guest_handle(val, hnd)  do { val = (hnd).p; } while (0)
   97.11  #endif
   97.12  
   97.13  #ifndef __ASSEMBLY__
   97.14 -typedef uint64_t uint64_aligned_t;
   97.15 -
   97.16  /* Guest handles for primitive C types. */
   97.17  __DEFINE_XEN_GUEST_HANDLE(uchar, unsigned char);
   97.18  __DEFINE_XEN_GUEST_HANDLE(uint,  unsigned int);
    98.1 --- a/xen/include/public/arch-powerpc.h	Fri Sep 01 12:52:12 2006 -0600
    98.2 +++ b/xen/include/public/arch-powerpc.h	Fri Sep 01 13:04:02 2006 -0600
    98.3 @@ -29,7 +29,6 @@
    98.4  
    98.5  #define DEFINE_XEN_GUEST_HANDLE(name) __DEFINE_XEN_GUEST_HANDLE(name, name)
    98.6  #define XEN_GUEST_HANDLE(name)        __guest_handle_ ## name
    98.7 -#define XEN_GUEST_HANDLE_64(name)     __guest_handle_ ## name
    98.8  #define set_xen_guest_handle(hnd, val) \
    98.9      do { \
   98.10          if (sizeof ((hnd).__pad)) \
   98.11 @@ -42,8 +41,6 @@
   98.12  #endif
   98.13  
   98.14  #ifndef __ASSEMBLY__
   98.15 -typedef uint64_t uint64_aligned_t;
   98.16 -
   98.17  /* Guest handles for primitive C types. */
   98.18  __DEFINE_XEN_GUEST_HANDLE(uchar, unsigned char);
   98.19  __DEFINE_XEN_GUEST_HANDLE(uint,  unsigned int);
    99.1 --- a/xen/include/public/arch-x86_32.h	Fri Sep 01 12:52:12 2006 -0600
    99.2 +++ b/xen/include/public/arch-x86_32.h	Fri Sep 01 13:04:02 2006 -0600
    99.3 @@ -28,14 +28,7 @@
    99.4  #endif
    99.5  
    99.6  /* Structural guest handles introduced in 0x00030201. */
    99.7 -#if (defined(__XEN__) || defined(__XEN_TOOLS__)) && !defined(__ASSEMBLY__)
    99.8 -typedef uint64_t __attribute__((aligned(8))) uint64_aligned_t;
    99.9 -#define __DEFINE_XEN_GUEST_HANDLE(name, type)                   \
   99.10 -    typedef struct { type *p; }                                 \
   99.11 -        __guest_handle_ ## name;                                \
   99.12 -    typedef struct { union { type *p; uint64_aligned_t q; }; }  \
   99.13 -        __guest_handle_64_ ## name
   99.14 -#elif __XEN_INTERFACE_VERSION__ >= 0x00030201
   99.15 +#if __XEN_INTERFACE_VERSION__ >= 0x00030201
   99.16  #define __DEFINE_XEN_GUEST_HANDLE(name, type) \
   99.17      typedef struct { type *p; } __guest_handle_ ## name
   99.18  #else
   99.19 @@ -45,15 +38,9 @@ typedef uint64_t __attribute__((aligned(
   99.20  
   99.21  #define DEFINE_XEN_GUEST_HANDLE(name)   __DEFINE_XEN_GUEST_HANDLE(name, name)
   99.22  #define XEN_GUEST_HANDLE(name)          __guest_handle_ ## name
   99.23 -#define XEN_GUEST_HANDLE_64(name)       __guest_handle_64_ ## name
   99.24 +#define set_xen_guest_handle(hnd, val)  do { (hnd).p = val; } while (0)
   99.25  #ifdef __XEN_TOOLS__
   99.26  #define get_xen_guest_handle(val, hnd)  do { val = (hnd).p; } while (0)
   99.27 -#define set_xen_guest_handle(hnd, val)                      \
   99.28 -    do { if ( sizeof(hnd) == 8 ) *(uint64_t *)&(hnd) = 0;   \
   99.29 -         (hnd).p = val;                                     \
   99.30 -    } while ( 0 )
   99.31 -#else
   99.32 -#define set_xen_guest_handle(hnd, val)  do { (hnd).p = val; } while (0)
   99.33  #endif
   99.34  
   99.35  #ifndef __ASSEMBLY__
   100.1 --- a/xen/include/public/arch-x86_64.h	Fri Sep 01 12:52:12 2006 -0600
   100.2 +++ b/xen/include/public/arch-x86_64.h	Fri Sep 01 13:04:02 2006 -0600
   100.3 @@ -39,15 +39,12 @@
   100.4  
   100.5  #define DEFINE_XEN_GUEST_HANDLE(name)   __DEFINE_XEN_GUEST_HANDLE(name, name)
   100.6  #define XEN_GUEST_HANDLE(name)          __guest_handle_ ## name
   100.7 -#define XEN_GUEST_HANDLE_64(name)       __guest_handle_ ## name
   100.8  #define set_xen_guest_handle(hnd, val)  do { (hnd).p = val; } while (0)
   100.9  #ifdef __XEN_TOOLS__
  100.10  #define get_xen_guest_handle(val, hnd)  do { val = (hnd).p; } while (0)
  100.11  #endif
  100.12  
  100.13  #ifndef __ASSEMBLY__
  100.14 -typedef uint64_t uint64_aligned_t;
  100.15 -
  100.16  /* Guest handles for primitive C types. */
  100.17  __DEFINE_XEN_GUEST_HANDLE(uchar, unsigned char);
  100.18  __DEFINE_XEN_GUEST_HANDLE(uint,  unsigned int);
   101.1 --- a/xen/include/public/domctl.h	Fri Sep 01 12:52:12 2006 -0600
   101.2 +++ b/xen/include/public/domctl.h	Fri Sep 01 13:04:02 2006 -0600
   101.3 @@ -16,12 +16,10 @@
   101.4  
   101.5  #include "xen.h"
   101.6  
   101.7 -#define XEN_DOMCTL_INTERFACE_VERSION 0x00000001
   101.8 -
   101.9 -#define uint64_t uint64_aligned_t
  101.10 +#define XEN_DOMCTL_INTERFACE_VERSION 0x00000003
  101.11  
  101.12  struct xenctl_cpumap {
  101.13 -    XEN_GUEST_HANDLE_64(uint8_t) bitmap;
  101.14 +    XEN_GUEST_HANDLE(uint8_t) bitmap;
  101.15      uint32_t nr_cpus;
  101.16  };
  101.17  
  101.18 @@ -72,8 +70,11 @@ DEFINE_XEN_GUEST_HANDLE(xen_domctl_getdo
  101.19  #define XEN_DOMCTL_getmemlist         6
  101.20  struct xen_domctl_getmemlist {
  101.21      /* IN variables. */
  101.22 +    /* Max entries to write to output buffer. */
  101.23      uint64_t max_pfns;
  101.24 -    XEN_GUEST_HANDLE_64(ulong) buffer;
  101.25 +    /* Start index in guest's page list. */
  101.26 +    uint64_t start_pfn;
  101.27 +    XEN_GUEST_HANDLE(xen_pfn_t) buffer;
  101.28      /* OUT variables. */
  101.29      uint64_t num_pfns;
  101.30  };
  101.31 @@ -110,7 +111,7 @@ struct xen_domctl_getpageframeinfo2 {
  101.32      /* IN variables. */
  101.33      uint64_t num;
  101.34      /* IN/OUT variables. */
  101.35 -    XEN_GUEST_HANDLE_64(ulong) array;
  101.36 +    XEN_GUEST_HANDLE(ulong) array;
  101.37  };
  101.38  typedef struct xen_domctl_getpageframeinfo2 xen_domctl_getpageframeinfo2_t;
  101.39  DEFINE_XEN_GUEST_HANDLE(xen_domctl_getpageframeinfo2_t);
  101.40 @@ -184,7 +185,7 @@ struct xen_domctl_shadow_op {
  101.41      uint32_t       mb;       /* Shadow memory allocation in MB */
  101.42  
  101.43      /* OP_PEEK / OP_CLEAN */
  101.44 -    XEN_GUEST_HANDLE_64(ulong) dirty_bitmap;
  101.45 +    XEN_GUEST_HANDLE(ulong) dirty_bitmap;
  101.46      uint64_t       pages;    /* Size of buffer. Updated with actual size. */
  101.47      struct xen_domctl_shadow_op_stats stats;
  101.48  };
  101.49 @@ -204,8 +205,8 @@ DEFINE_XEN_GUEST_HANDLE(xen_domctl_max_m
  101.50  #define XEN_DOMCTL_setvcpucontext    12
  101.51  #define XEN_DOMCTL_getvcpucontext    13
  101.52  struct xen_domctl_vcpucontext {
  101.53 -    uint32_t              vcpu;                     /* IN */
  101.54 -    XEN_GUEST_HANDLE_64(vcpu_guest_context_t) ctxt; /* IN/OUT */
  101.55 +    uint32_t              vcpu;                  /* IN */
  101.56 +    XEN_GUEST_HANDLE(vcpu_guest_context_t) ctxt; /* IN/OUT */
  101.57  };
  101.58  typedef struct xen_domctl_vcpucontext xen_domctl_vcpucontext_t;
  101.59  DEFINE_XEN_GUEST_HANDLE(xen_domctl_vcpucontext_t);
  101.60 @@ -378,8 +379,6 @@ struct xen_domctl {
  101.61  typedef struct xen_domctl xen_domctl_t;
  101.62  DEFINE_XEN_GUEST_HANDLE(xen_domctl_t);
  101.63  
  101.64 -#undef uint64_t
  101.65 -
  101.66  #endif /* __XEN_PUBLIC_DOMCTL_H__ */
  101.67  
  101.68  /*
   102.1 --- a/xen/include/public/sysctl.h	Fri Sep 01 12:52:12 2006 -0600
   102.2 +++ b/xen/include/public/sysctl.h	Fri Sep 01 13:04:02 2006 -0600
   102.3 @@ -16,9 +16,7 @@
   102.4  #include "xen.h"
   102.5  #include "domctl.h"
   102.6  
   102.7 -#define XEN_SYSCTL_INTERFACE_VERSION 0x00000001
   102.8 -
   102.9 -#define uint64_t uint64_aligned_t
  102.10 +#define XEN_SYSCTL_INTERFACE_VERSION 0x00000002
  102.11  
  102.12  /*
  102.13   * Read console content from Xen buffer ring.
  102.14 @@ -26,8 +24,8 @@
  102.15  #define XEN_SYSCTL_readconsole       1
  102.16  struct xen_sysctl_readconsole {
  102.17      /* IN variables. */
  102.18 -    uint32_t clear;                   /* Non-zero -> clear after reading. */
  102.19 -    XEN_GUEST_HANDLE_64(char) buffer; /* Buffer start */
  102.20 +    uint32_t clear;                /* Non-zero -> clear after reading. */
  102.21 +    XEN_GUEST_HANDLE(char) buffer; /* Buffer start */
  102.22      /* IN/OUT variables. */
  102.23      uint32_t count;            /* In: Buffer size;  Out: Used buffer size  */
  102.24  };
  102.25 @@ -105,9 +103,9 @@ struct xen_sysctl_perfc_op {
  102.26      uint32_t       nr_counters;       /*  number of counters description  */
  102.27      uint32_t       nr_vals;			  /*  number of values  */
  102.28      /* counter information (or NULL) */
  102.29 -    XEN_GUEST_HANDLE_64(xen_sysctl_perfc_desc_t) desc;
  102.30 +    XEN_GUEST_HANDLE(xen_sysctl_perfc_desc_t) desc;
  102.31      /* counter values (or NULL) */
  102.32 -    XEN_GUEST_HANDLE_64(xen_sysctl_perfc_val_t) val;
  102.33 +    XEN_GUEST_HANDLE(xen_sysctl_perfc_val_t) val;
  102.34  };
  102.35  typedef struct xen_sysctl_perfc_op xen_sysctl_perfc_op_t;
  102.36  DEFINE_XEN_GUEST_HANDLE(xen_sysctl_perfc_op_t);
  102.37 @@ -117,7 +115,7 @@ struct xen_sysctl_getdomaininfolist {
  102.38      /* IN variables. */
  102.39      domid_t               first_domain;
  102.40      uint32_t              max_domains;
  102.41 -    XEN_GUEST_HANDLE_64(xen_domctl_getdomaininfo_t) buffer;
  102.42 +    XEN_GUEST_HANDLE(xen_domctl_getdomaininfo_t) buffer;
  102.43      /* OUT variables. */
  102.44      uint32_t              num_domains;
  102.45  };
  102.46 @@ -140,8 +138,6 @@ struct xen_sysctl {
  102.47  typedef struct xen_sysctl xen_sysctl_t;
  102.48  DEFINE_XEN_GUEST_HANDLE(xen_sysctl_t);
  102.49  
  102.50 -#undef uint64_t
  102.51 -
  102.52  #endif /* __XEN_PUBLIC_SYSCTL_H__ */
  102.53  
  102.54  /*
   103.1 --- a/xen/include/public/xen.h	Fri Sep 01 12:52:12 2006 -0600
   103.2 +++ b/xen/include/public/xen.h	Fri Sep 01 13:04:02 2006 -0600
   103.3 @@ -63,6 +63,7 @@
   103.4  #define __HYPERVISOR_hvm_op               34
   103.5  #define __HYPERVISOR_sysctl               35
   103.6  #define __HYPERVISOR_domctl               36
   103.7 +#define __HYPERVISOR_kexec_op             37
   103.8  
   103.9  /* Architecture-specific hypercall definitions. */
  103.10  #define __HYPERVISOR_arch_0               48