ia64/xen-unstable

changeset 12002:3e26719ab827

merge with xen-unstable.hg
author awilliam@xenbuild.aw
date Wed Oct 25 12:12:01 2006 -0600 (2006-10-25)
parents d246b79986d1 abee5c6b930d
children fd1d742487f8
files unmodified_drivers/linux-2.6/platform-pci/Kbuild unmodified_drivers/linux-2.6/platform-pci/platform-pci.c xen/arch/ia64/vmx/mmio.c xen/arch/ia64/vmx/vmx_init.c xen/include/asm-ia64/vmx_vcpu.h xen/include/asm-x86/hvm/vpit.h xen/include/public/arch-ia64.h
line diff
     1.1 --- a/linux-2.6-xen-sparse/drivers/xen/blkback/blkback.c	Tue Oct 24 11:21:48 2006 -0600
     1.2 +++ b/linux-2.6-xen-sparse/drivers/xen/blkback/blkback.c	Wed Oct 25 12:12:01 2006 -0600
     1.3 @@ -392,10 +392,15 @@ static void dispatch_rw_block_io(blkif_t
     1.4  	for (i = 0; i < nseg; i++) {
     1.5  		if (unlikely(map[i].status != 0)) {
     1.6  			DPRINTK("invalid buffer -- could not remap it\n");
     1.7 -			goto fail_flush;
     1.8 +			map[i].handle = BLKBACK_INVALID_HANDLE;
     1.9 +			ret |= 1;
    1.10  		}
    1.11  
    1.12  		pending_handle(pending_req, i) = map[i].handle;
    1.13 +
    1.14 +		if (ret)
    1.15 +			continue;
    1.16 +
    1.17  		set_phys_to_machine(__pa(vaddr(
    1.18  			pending_req, i)) >> PAGE_SHIFT,
    1.19  			FOREIGN_FRAME(map[i].dev_bus_addr >> PAGE_SHIFT));
    1.20 @@ -403,6 +408,9 @@ static void dispatch_rw_block_io(blkif_t
    1.21  			(req->seg[i].first_sect << 9);
    1.22  	}
    1.23  
    1.24 +	if (ret)
    1.25 +		goto fail_flush;
    1.26 +
    1.27  	if (vbd_translate(&preq, blkif, operation) != 0) {
    1.28  		DPRINTK("access denied: %s of [%llu,%llu] on dev=%04x\n", 
    1.29  			operation == READ ? "read" : "write",
     2.1 --- a/linux-2.6-xen-sparse/drivers/xen/blkfront/blkfront.c	Tue Oct 24 11:21:48 2006 -0600
     2.2 +++ b/linux-2.6-xen-sparse/drivers/xen/blkfront/blkfront.c	Wed Oct 25 12:12:01 2006 -0600
     2.3 @@ -48,6 +48,10 @@
     2.4  #include <asm/hypervisor.h>
     2.5  #include <asm/maddr.h>
     2.6  
     2.7 +#ifdef HAVE_XEN_PLATFORM_COMPAT_H
     2.8 +#include <xen/platform-compat.h>
     2.9 +#endif
    2.10 +
    2.11  #define BLKIF_STATE_DISCONNECTED 0
    2.12  #define BLKIF_STATE_CONNECTED    1
    2.13  #define BLKIF_STATE_SUSPENDED    2
    2.14 @@ -468,6 +472,27 @@ int blkif_ioctl(struct inode *inode, str
    2.15  		      command, (long)argument, inode->i_rdev);
    2.16  
    2.17  	switch (command) {
    2.18 +#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,16)
    2.19 +	case HDIO_GETGEO: {
    2.20 +		struct block_device *bd = inode->i_bdev;
    2.21 +		struct hd_geometry geo;
    2.22 +		int ret;
    2.23 +
    2.24 +                if (!argument)
    2.25 +                        return -EINVAL;
    2.26 +
    2.27 +		geo.start = get_start_sect(bd);
    2.28 +		ret = blkif_getgeo(bd, &geo);
    2.29 +		if (ret)
    2.30 +			return ret;
    2.31 +
    2.32 +		if (copy_to_user((struct hd_geometry __user *)argument, &geo,
    2.33 +				 sizeof(geo)))
    2.34 +                        return -EFAULT;
    2.35 +
    2.36 +                return 0;
    2.37 +	}
    2.38 +#endif
    2.39  	case CDROMMULTISESSION:
    2.40  		DPRINTK("FIXME: support multisession CDs later\n");
    2.41  		for (i = 0; i < sizeof(struct cdrom_multisession); i++)
     3.1 --- a/linux-2.6-xen-sparse/drivers/xen/blkfront/vbd.c	Tue Oct 24 11:21:48 2006 -0600
     3.2 +++ b/linux-2.6-xen-sparse/drivers/xen/blkfront/vbd.c	Wed Oct 25 12:12:01 2006 -0600
     3.3 @@ -36,6 +36,10 @@
     3.4  #include <linux/blkdev.h>
     3.5  #include <linux/list.h>
     3.6  
     3.7 +#ifdef HAVE_XEN_PLATFORM_COMPAT_H
     3.8 +#include <xen/platform-compat.h>
     3.9 +#endif
    3.10 +
    3.11  #define BLKIF_MAJOR(dev) ((dev)>>8)
    3.12  #define BLKIF_MINOR(dev) ((dev) & 0xff)
    3.13  
    3.14 @@ -91,7 +95,9 @@ static struct block_device_operations xl
    3.15  	.open = blkif_open,
    3.16  	.release = blkif_release,
    3.17  	.ioctl  = blkif_ioctl,
    3.18 +#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,16)
    3.19  	.getgeo = blkif_getgeo
    3.20 +#endif
    3.21  };
    3.22  
    3.23  DEFINE_SPINLOCK(blkif_io_lock);
    3.24 @@ -186,7 +192,11 @@ xlvbd_init_blk_queue(struct gendisk *gd,
    3.25  	if (rq == NULL)
    3.26  		return -1;
    3.27  
    3.28 +#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,10)
    3.29  	elevator_init(rq, "noop");
    3.30 +#else
    3.31 +	elevator_init(rq, &elevator_noop);
    3.32 +#endif
    3.33  
    3.34  	/* Hard sector size and max sectors impersonate the equiv. hardware. */
    3.35  	blk_queue_hardsect_size(rq, sector_size);
     4.1 --- a/linux-2.6-xen-sparse/drivers/xen/core/features.c	Tue Oct 24 11:21:48 2006 -0600
     4.2 +++ b/linux-2.6-xen-sparse/drivers/xen/core/features.c	Wed Oct 25 12:12:01 2006 -0600
     4.3 @@ -11,6 +11,10 @@
     4.4  #include <asm/hypervisor.h>
     4.5  #include <xen/features.h>
     4.6  
     4.7 +#ifdef HAVE_XEN_PLATFORM_COMPAT_H
     4.8 +#include <xen/platform-compat.h>
     4.9 +#endif
    4.10 +
    4.11  u8 xen_features[XENFEAT_NR_SUBMAPS * 32] __read_mostly;
    4.12  /* Not a GPL symbol: used in ubiquitous macros, so too restrictive. */
    4.13  EXPORT_SYMBOL(xen_features);
     5.1 --- a/linux-2.6-xen-sparse/drivers/xen/core/gnttab.c	Tue Oct 24 11:21:48 2006 -0600
     5.2 +++ b/linux-2.6-xen-sparse/drivers/xen/core/gnttab.c	Wed Oct 25 12:12:01 2006 -0600
     5.3 @@ -44,6 +44,10 @@
     5.4  #include <asm/io.h>
     5.5  #include <xen/interface/memory.h>
     5.6  
     5.7 +#ifdef HAVE_XEN_PLATFORM_COMPAT_H
     5.8 +#include <xen/platform-compat.h>
     5.9 +#endif
    5.10 +
    5.11  /* External tools reserve first few grant table entries. */
    5.12  #define NR_RESERVED_ENTRIES 8
    5.13  
     6.1 --- a/linux-2.6-xen-sparse/drivers/xen/netfront/netfront.c	Tue Oct 24 11:21:48 2006 -0600
     6.2 +++ b/linux-2.6-xen-sparse/drivers/xen/netfront/netfront.c	Wed Oct 25 12:12:01 2006 -0600
     6.3 @@ -64,6 +64,10 @@
     6.4  #include <xen/interface/grant_table.h>
     6.5  #include <xen/gnttab.h>
     6.6  
     6.7 +#ifdef HAVE_XEN_PLATFORM_COMPAT_H
     6.8 +#include <xen/platform-compat.h>
     6.9 +#endif
    6.10 +
    6.11  /*
    6.12   * Mutually-exclusive module options to select receive data path:
    6.13   *  rx_copy : Packets are copied by network backend into local memory
     7.1 --- a/linux-2.6-xen-sparse/drivers/xen/xenbus/Makefile	Tue Oct 24 11:21:48 2006 -0600
     7.2 +++ b/linux-2.6-xen-sparse/drivers/xen/xenbus/Makefile	Wed Oct 25 12:12:01 2006 -0600
     7.3 @@ -9,4 +9,5 @@ xenbus-objs += xenbus_client.o
     7.4  xenbus-objs += xenbus_comms.o
     7.5  xenbus-objs += xenbus_xs.o
     7.6  xenbus-objs += xenbus_probe.o
     7.7 +obj-$(CONFIG_XEN_BACKEND) += xenbus_probe_backend.o
     7.8  obj-$(CONFIG_XEN_XENBUS_DEV) += xenbus_dev.o
     8.1 --- a/linux-2.6-xen-sparse/drivers/xen/xenbus/xenbus_client.c	Tue Oct 24 11:21:48 2006 -0600
     8.2 +++ b/linux-2.6-xen-sparse/drivers/xen/xenbus/xenbus_client.c	Wed Oct 25 12:12:01 2006 -0600
     8.3 @@ -35,6 +35,10 @@
     8.4  #include <xen/xenbus.h>
     8.5  #include <xen/driver_util.h>
     8.6  
     8.7 +#ifdef HAVE_XEN_PLATFORM_COMPAT_H
     8.8 +#include <xen/platform-compat.h>
     8.9 +#endif
    8.10 +
    8.11  /* xenbus_probe.c */
    8.12  extern char *kasprintf(const char *fmt, ...);
    8.13  
     9.1 --- a/linux-2.6-xen-sparse/drivers/xen/xenbus/xenbus_comms.c	Tue Oct 24 11:21:48 2006 -0600
     9.2 +++ b/linux-2.6-xen-sparse/drivers/xen/xenbus/xenbus_comms.c	Wed Oct 25 12:12:01 2006 -0600
     9.3 @@ -39,6 +39,10 @@
     9.4  #include <xen/xenbus.h>
     9.5  #include "xenbus_comms.h"
     9.6  
     9.7 +#ifdef HAVE_XEN_PLATFORM_COMPAT_H
     9.8 +#include <xen/platform-compat.h>
     9.9 +#endif
    9.10 +
    9.11  static int xenbus_irq;
    9.12  
    9.13  extern void xenbus_probe(void *);
    10.1 --- a/linux-2.6-xen-sparse/drivers/xen/xenbus/xenbus_dev.c	Tue Oct 24 11:21:48 2006 -0600
    10.2 +++ b/linux-2.6-xen-sparse/drivers/xen/xenbus/xenbus_dev.c	Wed Oct 25 12:12:01 2006 -0600
    10.3 @@ -40,6 +40,7 @@
    10.4  #include <linux/wait.h>
    10.5  #include <linux/fs.h>
    10.6  #include <linux/poll.h>
    10.7 +#include <linux/mutex.h>
    10.8  
    10.9  #include "xenbus_comms.h"
   10.10  
   10.11 @@ -49,6 +50,10 @@
   10.12  #include <xen/xen_proc.h>
   10.13  #include <asm/hypervisor.h>
   10.14  
   10.15 +#ifdef HAVE_XEN_PLATFORM_COMPAT_H
   10.16 +#include <xen/platform-compat.h>
   10.17 +#endif
   10.18 +
   10.19  struct xenbus_dev_transaction {
   10.20  	struct list_head list;
   10.21  	struct xenbus_transaction handle;
    11.1 --- a/linux-2.6-xen-sparse/drivers/xen/xenbus/xenbus_probe.c	Tue Oct 24 11:21:48 2006 -0600
    11.2 +++ b/linux-2.6-xen-sparse/drivers/xen/xenbus/xenbus_probe.c	Wed Oct 25 12:12:01 2006 -0600
    11.3 @@ -42,6 +42,7 @@
    11.4  #include <linux/mm.h>
    11.5  #include <linux/notifier.h>
    11.6  #include <linux/kthread.h>
    11.7 +#include <linux/mutex.h>
    11.8  
    11.9  #include <asm/io.h>
   11.10  #include <asm/page.h>
   11.11 @@ -55,6 +56,11 @@
   11.12  #include <xen/hvm.h>
   11.13  
   11.14  #include "xenbus_comms.h"
   11.15 +#include "xenbus_probe.h"
   11.16 +
   11.17 +#ifdef HAVE_XEN_PLATFORM_COMPAT_H
   11.18 +#include <xen/platform-compat.h>
   11.19 +#endif
   11.20  
   11.21  int xen_store_evtchn;
   11.22  struct xenstore_domain_interface *xen_store_interface;
   11.23 @@ -67,12 +73,7 @@ static struct notifier_block *xenstore_c
   11.24  static void wait_for_devices(struct xenbus_driver *xendrv);
   11.25  
   11.26  static int xenbus_probe_frontend(const char *type, const char *name);
   11.27 -static int xenbus_uevent_backend(struct device *dev, char **envp,
   11.28 -				 int num_envp, char *buffer, int buffer_size);
   11.29 -static int xenbus_probe_backend(const char *type, const char *domid);
   11.30  
   11.31 -static int xenbus_dev_probe(struct device *_dev);
   11.32 -static int xenbus_dev_remove(struct device *_dev);
   11.33  static void xenbus_dev_shutdown(struct device *_dev);
   11.34  
   11.35  /* If something in array of ids matches this device, return it. */
   11.36 @@ -86,7 +87,7 @@ match_device(const struct xenbus_device_
   11.37  	return NULL;
   11.38  }
   11.39  
   11.40 -static int xenbus_match(struct device *_dev, struct device_driver *_drv)
   11.41 +int xenbus_match(struct device *_dev, struct device_driver *_drv)
   11.42  {
   11.43  	struct xenbus_driver *drv = to_xenbus_driver(_drv);
   11.44  
   11.45 @@ -96,17 +97,6 @@ static int xenbus_match(struct device *_
   11.46  	return match_device(drv->ids, to_xenbus_device(_dev)) != NULL;
   11.47  }
   11.48  
   11.49 -struct xen_bus_type
   11.50 -{
   11.51 -	char *root;
   11.52 -	unsigned int levels;
   11.53 -	int (*get_bus_id)(char bus_id[BUS_ID_SIZE], const char *nodename);
   11.54 -	int (*probe)(const char *type, const char *dir);
   11.55 -	struct bus_type bus;
   11.56 -	struct device dev;
   11.57 -};
   11.58 -
   11.59 -
   11.60  /* device/<type>/<id> => <type>-<id> */
   11.61  static int frontend_bus_id(char bus_id[BUS_ID_SIZE], const char *nodename)
   11.62  {
   11.63 @@ -143,7 +133,7 @@ static void free_otherend_watch(struct x
   11.64  }
   11.65  
   11.66  
   11.67 -static int read_otherend_details(struct xenbus_device *xendev,
   11.68 +int read_otherend_details(struct xenbus_device *xendev,
   11.69  				 char *id_node, char *path_node)
   11.70  {
   11.71  	int err = xenbus_gather(XBT_NIL, xendev->nodename,
   11.72 @@ -176,12 +166,6 @@ static int read_backend_details(struct x
   11.73  }
   11.74  
   11.75  
   11.76 -static int read_frontend_details(struct xenbus_device *xendev)
   11.77 -{
   11.78 -	return read_otherend_details(xendev, "frontend-id", "frontend");
   11.79 -}
   11.80 -
   11.81 -
   11.82  /* Bus type for frontend drivers. */
   11.83  static struct xen_bus_type xenbus_frontend = {
   11.84  	.root = "device",
   11.85 @@ -191,115 +175,17 @@ static struct xen_bus_type xenbus_fronte
   11.86  	.bus = {
   11.87  		.name     = "xen",
   11.88  		.match    = xenbus_match,
   11.89 +#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,16)
   11.90  		.probe    = xenbus_dev_probe,
   11.91  		.remove   = xenbus_dev_remove,
   11.92  		.shutdown = xenbus_dev_shutdown,
   11.93 +#endif
   11.94  	},
   11.95  	.dev = {
   11.96  		.bus_id = "xen",
   11.97  	},
   11.98  };
   11.99  
  11.100 -/* backend/<type>/<fe-uuid>/<id> => <type>-<fe-domid>-<id> */
  11.101 -static int backend_bus_id(char bus_id[BUS_ID_SIZE], const char *nodename)
  11.102 -{
  11.103 -	int domid, err;
  11.104 -	const char *devid, *type, *frontend;
  11.105 -	unsigned int typelen;
  11.106 -
  11.107 -	type = strchr(nodename, '/');
  11.108 -	if (!type)
  11.109 -		return -EINVAL;
  11.110 -	type++;
  11.111 -	typelen = strcspn(type, "/");
  11.112 -	if (!typelen || type[typelen] != '/')
  11.113 -		return -EINVAL;
  11.114 -
  11.115 -	devid = strrchr(nodename, '/') + 1;
  11.116 -
  11.117 -	err = xenbus_gather(XBT_NIL, nodename, "frontend-id", "%i", &domid,
  11.118 -			    "frontend", NULL, &frontend,
  11.119 -			    NULL);
  11.120 -	if (err)
  11.121 -		return err;
  11.122 -	if (strlen(frontend) == 0)
  11.123 -		err = -ERANGE;
  11.124 -	if (!err && !xenbus_exists(XBT_NIL, frontend, ""))
  11.125 -		err = -ENOENT;
  11.126 -
  11.127 -	kfree(frontend);
  11.128 -
  11.129 -	if (err)
  11.130 -		return err;
  11.131 -
  11.132 -	if (snprintf(bus_id, BUS_ID_SIZE,
  11.133 -		     "%.*s-%i-%s", typelen, type, domid, devid) >= BUS_ID_SIZE)
  11.134 -		return -ENOSPC;
  11.135 -	return 0;
  11.136 -}
  11.137 -
  11.138 -static struct xen_bus_type xenbus_backend = {
  11.139 -	.root = "backend",
  11.140 -	.levels = 3, 		/* backend/type/<frontend>/<id> */
  11.141 -	.get_bus_id = backend_bus_id,
  11.142 -	.probe = xenbus_probe_backend,
  11.143 -	.bus = {
  11.144 -		.name     = "xen-backend",
  11.145 -		.match    = xenbus_match,
  11.146 -		.probe    = xenbus_dev_probe,
  11.147 -		.remove   = xenbus_dev_remove,
  11.148 -//		.shutdown = xenbus_dev_shutdown,
  11.149 -		.uevent   = xenbus_uevent_backend,
  11.150 -	},
  11.151 -	.dev = {
  11.152 -		.bus_id = "xen-backend",
  11.153 -	},
  11.154 -};
  11.155 -
  11.156 -static int xenbus_uevent_backend(struct device *dev, char **envp,
  11.157 -				 int num_envp, char *buffer, int buffer_size)
  11.158 -{
  11.159 -	struct xenbus_device *xdev;
  11.160 -	struct xenbus_driver *drv;
  11.161 -	int i = 0;
  11.162 -	int length = 0;
  11.163 -
  11.164 -	DPRINTK("");
  11.165 -
  11.166 -	if (dev == NULL)
  11.167 -		return -ENODEV;
  11.168 -
  11.169 -	xdev = to_xenbus_device(dev);
  11.170 -	if (xdev == NULL)
  11.171 -		return -ENODEV;
  11.172 -
  11.173 -	/* stuff we want to pass to /sbin/hotplug */
  11.174 -	add_uevent_var(envp, num_envp, &i, buffer, buffer_size, &length,
  11.175 -		       "XENBUS_TYPE=%s", xdev->devicetype);
  11.176 -
  11.177 -	add_uevent_var(envp, num_envp, &i, buffer, buffer_size, &length,
  11.178 -		       "XENBUS_PATH=%s", xdev->nodename);
  11.179 -
  11.180 -	add_uevent_var(envp, num_envp, &i, buffer, buffer_size, &length,
  11.181 -		       "XENBUS_BASE_PATH=%s", xenbus_backend.root);
  11.182 -
  11.183 -	/* terminate, set to next free slot, shrink available space */
  11.184 -	envp[i] = NULL;
  11.185 -	envp = &envp[i];
  11.186 -	num_envp -= i;
  11.187 -	buffer = &buffer[length];
  11.188 -	buffer_size -= length;
  11.189 -
  11.190 -	if (dev->driver) {
  11.191 -		drv = to_xenbus_driver(dev->driver);
  11.192 -		if (drv && drv->uevent)
  11.193 -			return drv->uevent(xdev, envp, num_envp, buffer,
  11.194 -					   buffer_size);
  11.195 -	}
  11.196 -
  11.197 -	return 0;
  11.198 -}
  11.199 -
  11.200  static void otherend_changed(struct xenbus_watch *watch,
  11.201  			     const char **vec, unsigned int len)
  11.202  {
  11.203 @@ -359,7 +245,7 @@ static int watch_otherend(struct xenbus_
  11.204  }
  11.205  
  11.206  
  11.207 -static int xenbus_dev_probe(struct device *_dev)
  11.208 +int xenbus_dev_probe(struct device *_dev)
  11.209  {
  11.210  	struct xenbus_device *dev = to_xenbus_device(_dev);
  11.211  	struct xenbus_driver *drv = to_xenbus_driver(_dev->driver);
  11.212 @@ -406,7 +292,7 @@ fail:
  11.213  	return -ENODEV;
  11.214  }
  11.215  
  11.216 -static int xenbus_dev_remove(struct device *_dev)
  11.217 +int xenbus_dev_remove(struct device *_dev)
  11.218  {
  11.219  	struct xenbus_device *dev = to_xenbus_device(_dev);
  11.220  	struct xenbus_driver *drv = to_xenbus_driver(_dev->driver);
  11.221 @@ -444,14 +330,21 @@ static void xenbus_dev_shutdown(struct d
  11.222  	put_device(&dev->dev);
  11.223  }
  11.224  
  11.225 -static int xenbus_register_driver_common(struct xenbus_driver *drv,
  11.226 -					 struct xen_bus_type *bus)
  11.227 +int xenbus_register_driver_common(struct xenbus_driver *drv,
  11.228 +				  struct xen_bus_type *bus)
  11.229  {
  11.230  	int ret;
  11.231  
  11.232  	drv->driver.name = drv->name;
  11.233  	drv->driver.bus = &bus->bus;
  11.234 +#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,10)
  11.235  	drv->driver.owner = drv->owner;
  11.236 +#endif
  11.237 +#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,16)
  11.238 +	drv->driver.probe = xenbus_dev_probe;
  11.239 +	drv->driver.remove = xenbus_dev_remove;
  11.240 +	drv->driver.shutdown = xenbus_dev_shutdown;
  11.241 +#endif
  11.242  
  11.243  	mutex_lock(&xenwatch_mutex);
  11.244  	ret = driver_register(&drv->driver);
  11.245 @@ -476,14 +369,6 @@ int xenbus_register_frontend(struct xenb
  11.246  }
  11.247  EXPORT_SYMBOL_GPL(xenbus_register_frontend);
  11.248  
  11.249 -int xenbus_register_backend(struct xenbus_driver *drv)
  11.250 -{
  11.251 -	drv->read_otherend_details = read_frontend_details;
  11.252 -
  11.253 -	return xenbus_register_driver_common(drv, &xenbus_backend);
  11.254 -}
  11.255 -EXPORT_SYMBOL_GPL(xenbus_register_backend);
  11.256 -
  11.257  void xenbus_unregister_driver(struct xenbus_driver *drv)
  11.258  {
  11.259  	driver_unregister(&drv->driver);
  11.260 @@ -581,23 +466,29 @@ char *kasprintf(const char *fmt, ...)
  11.261  }
  11.262  
  11.263  static ssize_t xendev_show_nodename(struct device *dev,
  11.264 -				    struct device_attribute *attr, char *buf)
  11.265 +#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,13)
  11.266 +				    struct device_attribute *attr,
  11.267 +#endif
  11.268 +				    char *buf)
  11.269  {
  11.270  	return sprintf(buf, "%s\n", to_xenbus_device(dev)->nodename);
  11.271  }
  11.272  DEVICE_ATTR(nodename, S_IRUSR | S_IRGRP | S_IROTH, xendev_show_nodename, NULL);
  11.273  
  11.274  static ssize_t xendev_show_devtype(struct device *dev,
  11.275 -				   struct device_attribute *attr, char *buf)
  11.276 +#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,13)
  11.277 +				   struct device_attribute *attr,
  11.278 +#endif
  11.279 +				   char *buf)
  11.280  {
  11.281  	return sprintf(buf, "%s\n", to_xenbus_device(dev)->devicetype);
  11.282  }
  11.283  DEVICE_ATTR(devtype, S_IRUSR | S_IRGRP | S_IROTH, xendev_show_devtype, NULL);
  11.284  
  11.285  
  11.286 -static int xenbus_probe_node(struct xen_bus_type *bus,
  11.287 -			     const char *type,
  11.288 -			     const char *nodename)
  11.289 +int xenbus_probe_node(struct xen_bus_type *bus,
  11.290 +		      const char *type,
  11.291 +		      const char *nodename)
  11.292  {
  11.293  	int err;
  11.294  	struct xenbus_device *xendev;
  11.295 @@ -667,55 +558,6 @@ static int xenbus_probe_frontend(const c
  11.296  	return err;
  11.297  }
  11.298  
  11.299 -/* backend/<typename>/<frontend-uuid>/<name> */
  11.300 -static int xenbus_probe_backend_unit(const char *dir,
  11.301 -				     const char *type,
  11.302 -				     const char *name)
  11.303 -{
  11.304 -	char *nodename;
  11.305 -	int err;
  11.306 -
  11.307 -	nodename = kasprintf("%s/%s", dir, name);
  11.308 -	if (!nodename)
  11.309 -		return -ENOMEM;
  11.310 -
  11.311 -	DPRINTK("%s\n", nodename);
  11.312 -
  11.313 -	err = xenbus_probe_node(&xenbus_backend, type, nodename);
  11.314 -	kfree(nodename);
  11.315 -	return err;
  11.316 -}
  11.317 -
  11.318 -/* backend/<typename>/<frontend-domid> */
  11.319 -static int xenbus_probe_backend(const char *type, const char *domid)
  11.320 -{
  11.321 -	char *nodename;
  11.322 -	int err = 0;
  11.323 -	char **dir;
  11.324 -	unsigned int i, dir_n = 0;
  11.325 -
  11.326 -	DPRINTK("");
  11.327 -
  11.328 -	nodename = kasprintf("%s/%s/%s", xenbus_backend.root, type, domid);
  11.329 -	if (!nodename)
  11.330 -		return -ENOMEM;
  11.331 -
  11.332 -	dir = xenbus_directory(XBT_NIL, nodename, "", &dir_n);
  11.333 -	if (IS_ERR(dir)) {
  11.334 -		kfree(nodename);
  11.335 -		return PTR_ERR(dir);
  11.336 -	}
  11.337 -
  11.338 -	for (i = 0; i < dir_n; i++) {
  11.339 -		err = xenbus_probe_backend_unit(nodename, type, dir[i]);
  11.340 -		if (err)
  11.341 -			break;
  11.342 -	}
  11.343 -	kfree(dir);
  11.344 -	kfree(nodename);
  11.345 -	return err;
  11.346 -}
  11.347 -
  11.348  static int xenbus_probe_device_type(struct xen_bus_type *bus, const char *type)
  11.349  {
  11.350  	int err = 0;
  11.351 @@ -736,7 +578,7 @@ static int xenbus_probe_device_type(stru
  11.352  	return err;
  11.353  }
  11.354  
  11.355 -static int xenbus_probe_devices(struct xen_bus_type *bus)
  11.356 +int xenbus_probe_devices(struct xen_bus_type *bus)
  11.357  {
  11.358  	int err = 0;
  11.359  	char **dir;
  11.360 @@ -778,7 +620,7 @@ static int strsep_len(const char *str, c
  11.361  	return (len == 0) ? i : -ERANGE;
  11.362  }
  11.363  
  11.364 -static void dev_changed(const char *node, struct xen_bus_type *bus)
  11.365 +void dev_changed(const char *node, struct xen_bus_type *bus)
  11.366  {
  11.367  	int exists, rootlen;
  11.368  	struct xenbus_device *dev;
  11.369 @@ -823,25 +665,12 @@ static void frontend_changed(struct xenb
  11.370  	dev_changed(vec[XS_WATCH_PATH], &xenbus_frontend);
  11.371  }
  11.372  
  11.373 -static void backend_changed(struct xenbus_watch *watch,
  11.374 -			    const char **vec, unsigned int len)
  11.375 -{
  11.376 -	DPRINTK("");
  11.377 -
  11.378 -	dev_changed(vec[XS_WATCH_PATH], &xenbus_backend);
  11.379 -}
  11.380 -
  11.381  /* We watch for devices appearing and vanishing. */
  11.382  static struct xenbus_watch fe_watch = {
  11.383  	.node = "device",
  11.384  	.callback = frontend_changed,
  11.385  };
  11.386  
  11.387 -static struct xenbus_watch be_watch = {
  11.388 -	.node = "backend",
  11.389 -	.callback = backend_changed,
  11.390 -};
  11.391 -
  11.392  static int suspend_dev(struct device *dev, void *data)
  11.393  {
  11.394  	int err = 0;
  11.395 @@ -912,7 +741,7 @@ void xenbus_suspend(void)
  11.396  	DPRINTK("");
  11.397  
  11.398  	bus_for_each_dev(&xenbus_frontend.bus, NULL, NULL, suspend_dev);
  11.399 -	bus_for_each_dev(&xenbus_backend.bus, NULL, NULL, suspend_dev);
  11.400 +	xenbus_backend_suspend(suspend_dev);
  11.401  	xs_suspend();
  11.402  }
  11.403  EXPORT_SYMBOL_GPL(xenbus_suspend);
  11.404 @@ -922,7 +751,7 @@ void xenbus_resume(void)
  11.405  	xb_init_comms();
  11.406  	xs_resume();
  11.407  	bus_for_each_dev(&xenbus_frontend.bus, NULL, NULL, resume_dev);
  11.408 -	bus_for_each_dev(&xenbus_backend.bus, NULL, NULL, resume_dev);
  11.409 +	xenbus_backend_resume(resume_dev);
  11.410  }
  11.411  EXPORT_SYMBOL_GPL(xenbus_resume);
  11.412  
  11.413 @@ -955,20 +784,17 @@ void xenbus_probe(void *unused)
  11.414  {
  11.415  	BUG_ON((xenstored_ready <= 0));
  11.416  
  11.417 -	/* Enumerate devices in xenstore. */
  11.418 +	/* Enumerate devices in xenstore and watch for changes. */
  11.419  	xenbus_probe_devices(&xenbus_frontend);
  11.420 -	xenbus_probe_devices(&xenbus_backend);
  11.421 -
  11.422 -	/* Watch for changes. */
  11.423  	register_xenbus_watch(&fe_watch);
  11.424 -	register_xenbus_watch(&be_watch);
  11.425 +	xenbus_backend_probe_and_watch();
  11.426  
  11.427  	/* Notify others that xenstore is up */
  11.428  	notifier_call_chain(&xenstore_chain, 0, NULL);
  11.429  }
  11.430  
  11.431  
  11.432 -#ifdef CONFIG_PROC_FS
  11.433 +#if defined(CONFIG_PROC_FS) && defined(CONFIG_XEN_PRIVILEGED_GUEST)
  11.434  static struct file_operations xsd_kva_fops;
  11.435  static struct proc_dir_entry *xsd_kva_intf;
  11.436  static struct proc_dir_entry *xsd_port_intf;
  11.437 @@ -1020,7 +846,7 @@ static int __init xenbus_probe_init(void
  11.438  
  11.439  	/* Register ourselves with the kernel bus subsystem */
  11.440  	bus_register(&xenbus_frontend.bus);
  11.441 -	bus_register(&xenbus_backend.bus);
  11.442 +	xenbus_backend_bus_register();
  11.443  
  11.444  	/*
  11.445  	 * Domain0 doesn't have a store_evtchn or store_mfn yet.
  11.446 @@ -1049,7 +875,7 @@ static int __init xenbus_probe_init(void
  11.447  		xen_store_evtchn = xen_start_info->store_evtchn =
  11.448  			alloc_unbound.port;
  11.449  
  11.450 -#ifdef CONFIG_PROC_FS
  11.451 +#if defined(CONFIG_PROC_FS) && defined(CONFIG_XEN_PRIVILEGED_GUEST)
  11.452  		/* And finally publish the above info in /proc/xen */
  11.453  		xsd_kva_intf = create_xen_proc_entry("xsd_kva", 0600);
  11.454  		if (xsd_kva_intf) {
  11.455 @@ -1091,7 +917,7 @@ static int __init xenbus_probe_init(void
  11.456  
  11.457  	/* Register ourselves with the kernel device subsystem */
  11.458  	device_register(&xenbus_frontend.dev);
  11.459 -	device_register(&xenbus_backend.dev);
  11.460 +	xenbus_backend_device_register();
  11.461  
  11.462  	if (!is_initial_xendomain())
  11.463  		xenbus_probe(NULL);
    12.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
    12.2 +++ b/linux-2.6-xen-sparse/drivers/xen/xenbus/xenbus_probe.h	Wed Oct 25 12:12:01 2006 -0600
    12.3 @@ -0,0 +1,77 @@
    12.4 +/******************************************************************************
    12.5 + * xenbus_probe.h
    12.6 + *
    12.7 + * Talks to Xen Store to figure out what devices we have.
    12.8 + *
    12.9 + * Copyright (C) 2005 Rusty Russell, IBM Corporation
   12.10 + * Copyright (C) 2005 XenSource Ltd.
   12.11 + * 
   12.12 + * This program is free software; you can redistribute it and/or
   12.13 + * modify it under the terms of the GNU General Public License version 2
   12.14 + * as published by the Free Software Foundation; or, when distributed
   12.15 + * separately from the Linux kernel or incorporated into other
   12.16 + * software packages, subject to the following license:
   12.17 + * 
   12.18 + * Permission is hereby granted, free of charge, to any person obtaining a copy
   12.19 + * of this source file (the "Software"), to deal in the Software without
   12.20 + * restriction, including without limitation the rights to use, copy, modify,
   12.21 + * merge, publish, distribute, sublicense, and/or sell copies of the Software,
   12.22 + * and to permit persons to whom the Software is furnished to do so, subject to
   12.23 + * the following conditions:
   12.24 + * 
   12.25 + * The above copyright notice and this permission notice shall be included in
   12.26 + * all copies or substantial portions of the Software.
   12.27 + * 
   12.28 + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
   12.29 + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
   12.30 + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
   12.31 + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
   12.32 + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
   12.33 + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
   12.34 + * IN THE SOFTWARE.
   12.35 + */
   12.36 +
   12.37 +#ifndef _XENBUS_PROBE_H
   12.38 +#define _XENBUS_PROBE_H
   12.39 +
   12.40 +#ifdef CONFIG_XEN_BACKEND
   12.41 +extern void xenbus_backend_suspend(int (*fn)(struct device *, void *));
   12.42 +extern void xenbus_backend_resume(int (*fn)(struct device *, void *));
   12.43 +extern void xenbus_backend_probe_and_watch(void);
   12.44 +extern void xenbus_backend_bus_register(void);
   12.45 +extern void xenbus_backend_device_register(void);
   12.46 +#else
   12.47 +static inline void xenbus_backend_suspend(int (*fn)(struct device *, void *)) {}
   12.48 +static inline void xenbus_backend_resume(int (*fn)(struct device *, void *)) {}
   12.49 +static inline void xenbus_backend_probe_and_watch(void) {}
   12.50 +static inline void xenbus_backend_bus_register(void) {}
   12.51 +static inline void xenbus_backend_device_register(void) {}
   12.52 +#endif
   12.53 +
   12.54 +struct xen_bus_type
   12.55 +{
   12.56 +	char *root;
   12.57 +	unsigned int levels;
   12.58 +	int (*get_bus_id)(char bus_id[BUS_ID_SIZE], const char *nodename);
   12.59 +	int (*probe)(const char *type, const char *dir);
   12.60 +	struct bus_type bus;
   12.61 +	struct device dev;
   12.62 +};
   12.63 +
   12.64 +extern int xenbus_match(struct device *_dev, struct device_driver *_drv);
   12.65 +extern int xenbus_dev_probe(struct device *_dev);
   12.66 +extern int xenbus_dev_remove(struct device *_dev);
   12.67 +extern int xenbus_register_driver_common(struct xenbus_driver *drv,
   12.68 +					 struct xen_bus_type *bus);
   12.69 +extern int xenbus_probe_node(struct xen_bus_type *bus,
   12.70 +			     const char *type,
   12.71 +			     const char *nodename);
   12.72 +extern int xenbus_probe_devices(struct xen_bus_type *bus);
   12.73 +
   12.74 +extern void dev_changed(const char *node, struct xen_bus_type *bus);
   12.75 +
   12.76 +/* Simplified asprintf. Probably belongs in lib */
   12.77 +extern char *kasprintf(const char *fmt, ...);
   12.78 +
   12.79 +#endif
   12.80 +
    13.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
    13.2 +++ b/linux-2.6-xen-sparse/drivers/xen/xenbus/xenbus_probe_backend.c	Wed Oct 25 12:12:01 2006 -0600
    13.3 @@ -0,0 +1,271 @@
    13.4 +/******************************************************************************
    13.5 + * Talks to Xen Store to figure out what devices we have (backend half).
    13.6 + *
    13.7 + * Copyright (C) 2005 Rusty Russell, IBM Corporation
    13.8 + * Copyright (C) 2005 Mike Wray, Hewlett-Packard
    13.9 + * Copyright (C) 2005, 2006 XenSource Ltd
   13.10 + * 
   13.11 + * This program is free software; you can redistribute it and/or
   13.12 + * modify it under the terms of the GNU General Public License version 2
   13.13 + * as published by the Free Software Foundation; or, when distributed
   13.14 + * separately from the Linux kernel or incorporated into other
   13.15 + * software packages, subject to the following license:
   13.16 + * 
   13.17 + * Permission is hereby granted, free of charge, to any person obtaining a copy
   13.18 + * of this source file (the "Software"), to deal in the Software without
   13.19 + * restriction, including without limitation the rights to use, copy, modify,
   13.20 + * merge, publish, distribute, sublicense, and/or sell copies of the Software,
   13.21 + * and to permit persons to whom the Software is furnished to do so, subject to
   13.22 + * the following conditions:
   13.23 + * 
   13.24 + * The above copyright notice and this permission notice shall be included in
   13.25 + * all copies or substantial portions of the Software.
   13.26 + * 
   13.27 + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
   13.28 + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
   13.29 + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
   13.30 + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
   13.31 + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
   13.32 + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
   13.33 + * IN THE SOFTWARE.
   13.34 + */
   13.35 +
   13.36 +#define DPRINTK(fmt, args...)				\
   13.37 +	pr_debug("xenbus_probe (%s:%d) " fmt ".\n",	\
   13.38 +		 __FUNCTION__, __LINE__, ##args)
   13.39 +
   13.40 +#include <linux/kernel.h>
   13.41 +#include <linux/err.h>
   13.42 +#include <linux/string.h>
   13.43 +#include <linux/ctype.h>
   13.44 +#include <linux/fcntl.h>
   13.45 +#include <linux/mm.h>
   13.46 +#include <linux/notifier.h>
   13.47 +#include <linux/kthread.h>
   13.48 +
   13.49 +#include <asm/io.h>
   13.50 +#include <asm/page.h>
   13.51 +#include <asm/maddr.h>
   13.52 +#include <asm/pgtable.h>
   13.53 +#include <asm/hypervisor.h>
   13.54 +#include <xen/xenbus.h>
   13.55 +#include <xen/xen_proc.h>
   13.56 +#include <xen/evtchn.h>
   13.57 +#include <xen/features.h>
   13.58 +#include <xen/hvm.h>
   13.59 +
   13.60 +#include "xenbus_comms.h"
   13.61 +#include "xenbus_probe.h"
   13.62 +
   13.63 +#ifdef HAVE_XEN_PLATFORM_COMPAT_H
   13.64 +#include <xen/platform-compat.h>
   13.65 +#endif
   13.66 +
   13.67 +static int xenbus_uevent_backend(struct device *dev, char **envp,
   13.68 +				 int num_envp, char *buffer, int buffer_size);
   13.69 +static int xenbus_probe_backend(const char *type, const char *domid);
   13.70 +
   13.71 +extern int read_otherend_details(struct xenbus_device *xendev,
   13.72 +				 char *id_node, char *path_node);
   13.73 +
   13.74 +static int read_frontend_details(struct xenbus_device *xendev)
   13.75 +{
   13.76 +	return read_otherend_details(xendev, "frontend-id", "frontend");
   13.77 +}
   13.78 +
   13.79 +/* backend/<type>/<fe-uuid>/<id> => <type>-<fe-domid>-<id> */
   13.80 +static int backend_bus_id(char bus_id[BUS_ID_SIZE], const char *nodename)
   13.81 +{
   13.82 +	int domid, err;
   13.83 +	const char *devid, *type, *frontend;
   13.84 +	unsigned int typelen;
   13.85 +
   13.86 +	type = strchr(nodename, '/');
   13.87 +	if (!type)
   13.88 +		return -EINVAL;
   13.89 +	type++;
   13.90 +	typelen = strcspn(type, "/");
   13.91 +	if (!typelen || type[typelen] != '/')
   13.92 +		return -EINVAL;
   13.93 +
   13.94 +	devid = strrchr(nodename, '/') + 1;
   13.95 +
   13.96 +	err = xenbus_gather(XBT_NIL, nodename, "frontend-id", "%i", &domid,
   13.97 +			    "frontend", NULL, &frontend,
   13.98 +			    NULL);
   13.99 +	if (err)
  13.100 +		return err;
  13.101 +	if (strlen(frontend) == 0)
  13.102 +		err = -ERANGE;
  13.103 +	if (!err && !xenbus_exists(XBT_NIL, frontend, ""))
  13.104 +		err = -ENOENT;
  13.105 +	kfree(frontend);
  13.106 +
  13.107 +	if (err)
  13.108 +		return err;
  13.109 +
  13.110 +	if (snprintf(bus_id, BUS_ID_SIZE,
  13.111 +		     "%.*s-%i-%s", typelen, type, domid, devid) >= BUS_ID_SIZE)
  13.112 +		return -ENOSPC;
  13.113 +	return 0;
  13.114 +}
  13.115 +
  13.116 +static struct xen_bus_type xenbus_backend = {
  13.117 +	.root = "backend",
  13.118 +	.levels = 3, 		/* backend/type/<frontend>/<id> */
  13.119 +	.get_bus_id = backend_bus_id,
  13.120 +	.probe = xenbus_probe_backend,
  13.121 +	.bus = {
  13.122 +		.name     = "xen-backend",
  13.123 +		.match    = xenbus_match,
  13.124 +		.probe    = xenbus_dev_probe,
  13.125 +		.remove   = xenbus_dev_remove,
  13.126 +//		.shutdown = xenbus_dev_shutdown,
  13.127 +		.uevent   = xenbus_uevent_backend,
  13.128 +	},
  13.129 +	.dev = {
  13.130 +		.bus_id = "xen-backend",
  13.131 +	},
  13.132 +};
  13.133 +
  13.134 +static int xenbus_uevent_backend(struct device *dev, char **envp,
  13.135 +				 int num_envp, char *buffer, int buffer_size)
  13.136 +{
  13.137 +	struct xenbus_device *xdev;
  13.138 +	struct xenbus_driver *drv;
  13.139 +	int i = 0;
  13.140 +	int length = 0;
  13.141 +
  13.142 +	DPRINTK("");
  13.143 +
  13.144 +	if (dev == NULL)
  13.145 +		return -ENODEV;
  13.146 +
  13.147 +	xdev = to_xenbus_device(dev);
  13.148 +	if (xdev == NULL)
  13.149 +		return -ENODEV;
  13.150 +
  13.151 +	/* stuff we want to pass to /sbin/hotplug */
  13.152 +	add_uevent_var(envp, num_envp, &i, buffer, buffer_size, &length,
  13.153 +		       "XENBUS_TYPE=%s", xdev->devicetype);
  13.154 +
  13.155 +	add_uevent_var(envp, num_envp, &i, buffer, buffer_size, &length,
  13.156 +		       "XENBUS_PATH=%s", xdev->nodename);
  13.157 +
  13.158 +	add_uevent_var(envp, num_envp, &i, buffer, buffer_size, &length,
  13.159 +		       "XENBUS_BASE_PATH=%s", xenbus_backend.root);
  13.160 +
  13.161 +	/* terminate, set to next free slot, shrink available space */
  13.162 +	envp[i] = NULL;
  13.163 +	envp = &envp[i];
  13.164 +	num_envp -= i;
  13.165 +	buffer = &buffer[length];
  13.166 +	buffer_size -= length;
  13.167 +
  13.168 +	if (dev->driver) {
  13.169 +		drv = to_xenbus_driver(dev->driver);
  13.170 +		if (drv && drv->uevent)
  13.171 +			return drv->uevent(xdev, envp, num_envp, buffer,
  13.172 +					   buffer_size);
  13.173 +	}
  13.174 +
  13.175 +	return 0;
  13.176 +}
  13.177 +
  13.178 +int xenbus_register_backend(struct xenbus_driver *drv)
  13.179 +{
  13.180 +	drv->read_otherend_details = read_frontend_details;
  13.181 +
  13.182 +	return xenbus_register_driver_common(drv, &xenbus_backend);
  13.183 +}
  13.184 +EXPORT_SYMBOL_GPL(xenbus_register_backend);
  13.185 +
  13.186 +/* backend/<typename>/<frontend-uuid>/<name> */
  13.187 +static int xenbus_probe_backend_unit(const char *dir,
  13.188 +				     const char *type,
  13.189 +				     const char *name)
  13.190 +{
  13.191 +	char *nodename;
  13.192 +	int err;
  13.193 +
  13.194 +	nodename = kasprintf("%s/%s", dir, name);
  13.195 +	if (!nodename)
  13.196 +		return -ENOMEM;
  13.197 +
  13.198 +	DPRINTK("%s\n", nodename);
  13.199 +
  13.200 +	err = xenbus_probe_node(&xenbus_backend, type, nodename);
  13.201 +	kfree(nodename);
  13.202 +	return err;
  13.203 +}
  13.204 +
  13.205 +/* backend/<typename>/<frontend-domid> */
  13.206 +static int xenbus_probe_backend(const char *type, const char *domid)
  13.207 +{
  13.208 +	char *nodename;
  13.209 +	int err = 0;
  13.210 +	char **dir;
  13.211 +	unsigned int i, dir_n = 0;
  13.212 +
  13.213 +	DPRINTK("");
  13.214 +
  13.215 +	nodename = kasprintf("%s/%s/%s", xenbus_backend.root, type, domid);
  13.216 +	if (!nodename)
  13.217 +		return -ENOMEM;
  13.218 +
  13.219 +	dir = xenbus_directory(XBT_NIL, nodename, "", &dir_n);
  13.220 +	if (IS_ERR(dir)) {
  13.221 +		kfree(nodename);
  13.222 +		return PTR_ERR(dir);
  13.223 +	}
  13.224 +
  13.225 +	for (i = 0; i < dir_n; i++) {
  13.226 +		err = xenbus_probe_backend_unit(nodename, type, dir[i]);
  13.227 +		if (err)
  13.228 +			break;
  13.229 +	}
  13.230 +	kfree(dir);
  13.231 +	kfree(nodename);
  13.232 +	return err;
  13.233 +}
  13.234 +
  13.235 +static void backend_changed(struct xenbus_watch *watch,
  13.236 +			    const char **vec, unsigned int len)
  13.237 +{
  13.238 +	DPRINTK("");
  13.239 +
  13.240 +	dev_changed(vec[XS_WATCH_PATH], &xenbus_backend);
  13.241 +}
  13.242 +
  13.243 +static struct xenbus_watch be_watch = {
  13.244 +	.node = "backend",
  13.245 +	.callback = backend_changed,
  13.246 +};
  13.247 +
  13.248 +void xenbus_backend_suspend(int (*fn)(struct device *, void *))
  13.249 +{
  13.250 +	DPRINTK("");
  13.251 +	bus_for_each_dev(&xenbus_backend.bus, NULL, NULL, fn);
  13.252 +}
  13.253 +
  13.254 +void xenbus_backend_resume(int (*fn)(struct device *, void *))
  13.255 +{
  13.256 +	DPRINTK("");
  13.257 +	bus_for_each_dev(&xenbus_backend.bus, NULL, NULL, fn);
  13.258 +}
  13.259 +
  13.260 +void xenbus_backend_probe_and_watch(void)
  13.261 +{
  13.262 +	xenbus_probe_devices(&xenbus_backend);
  13.263 +	register_xenbus_watch(&be_watch);
  13.264 +}
  13.265 +
  13.266 +void xenbus_backend_bus_register(void)
  13.267 +{
  13.268 +	bus_register(&xenbus_backend.bus);
  13.269 +}
  13.270 +
  13.271 +void xenbus_backend_device_register(void)
  13.272 +{
  13.273 +	device_register(&xenbus_backend.dev);
  13.274 +}
    14.1 --- a/linux-2.6-xen-sparse/drivers/xen/xenbus/xenbus_xs.c	Tue Oct 24 11:21:48 2006 -0600
    14.2 +++ b/linux-2.6-xen-sparse/drivers/xen/xenbus/xenbus_xs.c	Wed Oct 25 12:12:01 2006 -0600
    14.3 @@ -42,9 +42,15 @@
    14.4  #include <linux/fcntl.h>
    14.5  #include <linux/kthread.h>
    14.6  #include <linux/rwsem.h>
    14.7 +#include <linux/module.h>
    14.8 +#include <linux/mutex.h>
    14.9  #include <xen/xenbus.h>
   14.10  #include "xenbus_comms.h"
   14.11  
   14.12 +#ifdef HAVE_XEN_PLATFORM_COMPAT_H
   14.13 +#include <xen/platform-compat.h>
   14.14 +#endif
   14.15 +
   14.16  /* xenbus_probe.c */
   14.17  extern char *kasprintf(const char *fmt, ...);
   14.18  
    15.1 --- a/linux-2.6-xen-sparse/include/asm-i386/mach-xen/asm/synch_bitops.h	Tue Oct 24 11:21:48 2006 -0600
    15.2 +++ b/linux-2.6-xen-sparse/include/asm-i386/mach-xen/asm/synch_bitops.h	Wed Oct 25 12:12:01 2006 -0600
    15.3 @@ -9,6 +9,10 @@
    15.4  
    15.5  #include <linux/config.h>
    15.6  
    15.7 +#ifdef HAVE_XEN_PLATFORM_COMPAT_H
    15.8 +#include <xen/platform-compat.h>
    15.9 +#endif
   15.10 +
   15.11  #define ADDR (*(volatile long *) addr)
   15.12  
   15.13  static __inline__ void synch_set_bit(int nr, volatile void * addr)
    16.1 --- a/linux-2.6-xen-sparse/include/xen/xenbus.h	Tue Oct 24 11:21:48 2006 -0600
    16.2 +++ b/linux-2.6-xen-sparse/include/xen/xenbus.h	Wed Oct 25 12:12:01 2006 -0600
    16.3 @@ -38,6 +38,7 @@
    16.4  #include <linux/notifier.h>
    16.5  #include <linux/mutex.h>
    16.6  #include <linux/completion.h>
    16.7 +#include <linux/init.h>
    16.8  #include <xen/interface/xen.h>
    16.9  #include <xen/interface/grant_table.h>
   16.10  #include <xen/interface/io/xenbus.h>
    17.1 --- a/tools/blktap/drivers/blktapctrl.c	Tue Oct 24 11:21:48 2006 -0600
    17.2 +++ b/tools/blktap/drivers/blktapctrl.c	Wed Oct 25 12:12:01 2006 -0600
    17.3 @@ -204,81 +204,49 @@ static blkif_t *test_path(char *path, ch
    17.4  
    17.5  static void add_disktype(blkif_t *blkif, int type)
    17.6  {
    17.7 -	driver_list_entry_t *entry, *ptr, *last;
    17.8 +	driver_list_entry_t *entry, **pprev;
    17.9  
   17.10 -	if (type > MAX_DISK_TYPES) return;
   17.11 +	if (type > MAX_DISK_TYPES)
   17.12 +		return;
   17.13  
   17.14  	entry = malloc(sizeof(driver_list_entry_t));
   17.15  	entry->blkif = blkif;
   17.16 -	entry->next = NULL;
   17.17 -	ptr = active_disks[type];
   17.18 -
   17.19 -	if (ptr == NULL) {
   17.20 -		active_disks[type] = entry;
   17.21 -		entry->prev = NULL;
   17.22 -		return;
   17.23 -	}
   17.24 +	entry->next  = NULL;
   17.25  
   17.26 -	while (ptr != NULL) {
   17.27 -		last = ptr;
   17.28 -		ptr = ptr->next;
   17.29 -	}
   17.30 +	pprev = &active_disks[type];
   17.31 +	while (*pprev != NULL)
   17.32 +		pprev = &(*pprev)->next;
   17.33  
   17.34 -	/*We've found the end of the list*/
   17.35 -        last->next = entry;
   17.36 -	entry->prev = last;
   17.37 -	
   17.38 -	return;
   17.39 +	*pprev = entry;
   17.40 +	entry->pprev = pprev;
   17.41  }
   17.42  
   17.43  static int del_disktype(blkif_t *blkif)
   17.44  {
   17.45 -	driver_list_entry_t *ptr, *cur, *last;
   17.46 +	driver_list_entry_t *entry, **pprev;
   17.47  	int type = blkif->drivertype, count = 0, close = 0;
   17.48  
   17.49 -	if (type > MAX_DISK_TYPES) return 1;
   17.50 +	if (type > MAX_DISK_TYPES)
   17.51 +		return 1;
   17.52  
   17.53 -	ptr = active_disks[type];
   17.54 -	last = NULL;
   17.55 -	while (ptr != NULL) {
   17.56 -		count++;
   17.57 -		if (blkif == ptr->blkif) {
   17.58 -			cur = ptr;
   17.59 -			if (ptr->next != NULL) {
   17.60 -				/*There's more later in the chain*/
   17.61 -				if (!last) {
   17.62 -					/*We're first in the list*/
   17.63 -					active_disks[type] = ptr->next;
   17.64 -					ptr = ptr->next;
   17.65 -					ptr->prev = NULL;
   17.66 -				}
   17.67 -				else {
   17.68 -					/*We're sandwiched*/
   17.69 -					last->next = ptr->next;
   17.70 -					ptr = ptr->next;
   17.71 -					ptr->prev = last;
   17.72 -				}
   17.73 -				
   17.74 -			} else if (last) {
   17.75 -				/*There's more earlier in the chain*/
   17.76 -				last->next = NULL;
   17.77 -			} else {
   17.78 -				/*We're the only entry*/
   17.79 -				active_disks[type] = NULL;
   17.80 -				if(dtypes[type]->single_handler == 1) 
   17.81 -					close = 1;
   17.82 -			}
   17.83 -			DPRINTF("DEL_DISKTYPE: Freeing entry\n");
   17.84 -			free(cur);
   17.85 -			if (dtypes[type]->single_handler == 0) close = 1;
   17.86 +	pprev = &active_disks[type];
   17.87 +	while ((*pprev != NULL) && ((*pprev)->blkif != blkif))
   17.88 +		pprev = &(*pprev)->next;
   17.89  
   17.90 -			return close;
   17.91 -		}
   17.92 -		last = ptr;
   17.93 -		ptr = ptr->next;
   17.94 +	if ((entry = *pprev) == NULL) {
   17.95 +		DPRINTF("DEL_DISKTYPE: No match\n");
   17.96 +		return 1;
   17.97  	}
   17.98 -	DPRINTF("DEL_DISKTYPE: No match\n");
   17.99 -	return 1;
  17.100 +
  17.101 +	*pprev = entry->next;
  17.102 +	if (entry->next)
  17.103 +		entry->next->pprev = pprev;
  17.104 +
  17.105 +	DPRINTF("DEL_DISKTYPE: Freeing entry\n");
  17.106 +	free(entry);
  17.107 +
  17.108 +	/* Caller should close() if no single controller, or list is empty. */
  17.109 +	return (!dtypes[type]->single_handler || (active_disks[type] == NULL));
  17.110  }
  17.111  
  17.112  static int write_msg(int fd, int msgtype, void *ptr, void *ptr2)
  17.113 @@ -592,8 +560,8 @@ int unmap_blktapctrl(blkif_t *blkif)
  17.114  	if (del_disktype(blkif)) {
  17.115  		close(blkif->fds[WRITE]);
  17.116  		close(blkif->fds[READ]);
  17.117 +	}
  17.118  
  17.119 -	}
  17.120  	return 0;
  17.121  }
  17.122  
    18.1 --- a/tools/blktap/drivers/tapdisk.c	Tue Oct 24 11:21:48 2006 -0600
    18.2 +++ b/tools/blktap/drivers/tapdisk.c	Wed Oct 25 12:12:01 2006 -0600
    18.3 @@ -79,31 +79,17 @@ static void unmap_disk(struct td_state *
    18.4  {
    18.5  	tapdev_info_t *info = s->ring_info;
    18.6  	struct tap_disk *drv = s->drv;
    18.7 -	fd_list_entry_t *ptr, *prev;
    18.8 +	fd_list_entry_t *entry;
    18.9  
   18.10  	drv->td_close(s);
   18.11  
   18.12  	if (info != NULL && info->mem > 0)
   18.13  	        munmap(info->mem, getpagesize() * BLKTAP_MMAP_REGION_SIZE);
   18.14  
   18.15 -	ptr = s->fd_entry;
   18.16 -	prev = ptr->prev;
   18.17 -
   18.18 -	if (prev) {
   18.19 -		/*There are entries earlier in the list*/
   18.20 -		prev->next = ptr->next;
   18.21 -		if (ptr->next) {
   18.22 -			ptr = ptr->next;
   18.23 -			ptr->prev = prev;
   18.24 -		}
   18.25 -	} else {
   18.26 -		/*We are the first entry in list*/
   18.27 -		if (ptr->next) {
   18.28 -			ptr = ptr->next;
   18.29 -			fd_start = ptr;
   18.30 -			ptr->prev = NULL;
   18.31 -		} else fd_start = NULL;
   18.32 -	}
   18.33 +	entry = s->fd_entry;
   18.34 +	*entry->pprev = entry->next;
   18.35 +	if (entry->next)
   18.36 +		entry->next->pprev = entry->pprev;
   18.37  
   18.38  	close(info->fd);
   18.39  
   18.40 @@ -144,35 +130,29 @@ static inline int LOCAL_FD_SET(fd_set *r
   18.41  	return 0;
   18.42  }
   18.43  
   18.44 -static inline fd_list_entry_t *add_fd_entry(int tap_fd, int io_fd[MAX_IOFD], struct td_state *s)
   18.45 +static inline fd_list_entry_t *add_fd_entry(
   18.46 +	int tap_fd, int io_fd[MAX_IOFD], struct td_state *s)
   18.47  {
   18.48 -	fd_list_entry_t *ptr, *last, *entry;
   18.49 +	fd_list_entry_t **pprev, *entry;
   18.50  	int i;
   18.51 +
   18.52  	DPRINTF("Adding fd_list_entry\n");
   18.53  
   18.54  	/*Add to linked list*/
   18.55  	s->fd_entry = entry = malloc(sizeof(fd_list_entry_t));
   18.56  	entry->tap_fd = tap_fd;
   18.57 -	for (i = 0; i < MAX_IOFD; i++) entry->io_fd[i] = io_fd[i];
   18.58 +	for (i = 0; i < MAX_IOFD; i++)
   18.59 +		entry->io_fd[i] = io_fd[i];
   18.60  	entry->s = s;
   18.61  	entry->next = NULL;
   18.62  
   18.63 -	ptr = fd_start;
   18.64 -	if (ptr == NULL) {
   18.65 -		/*We are the first entry*/
   18.66 -		fd_start = entry;
   18.67 -		entry->prev = NULL;
   18.68 -		goto finish;
   18.69 -	}
   18.70 +	pprev = &fd_start;
   18.71 +	while (*pprev != NULL)
   18.72 +		pprev = &(*pprev)->next;
   18.73  
   18.74 -	while (ptr != NULL) {
   18.75 -		last = ptr;
   18.76 -		ptr = ptr->next;
   18.77 -	}
   18.78 -	last->next = entry;
   18.79 -	entry->prev = last;
   18.80 +	*pprev = entry;
   18.81 +	entry->pprev = pprev;
   18.82  
   18.83 - finish:
   18.84  	return entry;
   18.85  }
   18.86  
    19.1 --- a/tools/blktap/drivers/tapdisk.h	Tue Oct 24 11:21:48 2006 -0600
    19.2 +++ b/tools/blktap/drivers/tapdisk.h	Wed Oct 25 12:12:01 2006 -0600
    19.3 @@ -191,9 +191,8 @@ static disk_info_t *dtypes[] = {
    19.4  };
    19.5  
    19.6  typedef struct driver_list_entry {
    19.7 -	void *blkif;
    19.8 -	void *prev;
    19.9 -	void *next;
   19.10 +	struct blkif *blkif;
   19.11 +	struct driver_list_entry **pprev, *next;
   19.12  } driver_list_entry_t;
   19.13  
   19.14  typedef struct fd_list_entry {
   19.15 @@ -201,8 +200,7 @@ typedef struct fd_list_entry {
   19.16  	int  tap_fd;
   19.17  	int  io_fd[MAX_IOFD];
   19.18  	struct td_state *s;
   19.19 -	void *prev;
   19.20 -	void *next;
   19.21 +	struct fd_list_entry **pprev, *next;
   19.22  } fd_list_entry_t;
   19.23  
   19.24  int qcow_create(const char *filename, uint64_t total_size,
    20.1 --- a/tools/firmware/acpi/acpi_fadt.h	Tue Oct 24 11:21:48 2006 -0600
    20.2 +++ b/tools/firmware/acpi/acpi_fadt.h	Wed Oct 25 12:12:01 2006 -0600
    20.3 @@ -18,6 +18,8 @@
    20.4  #ifndef _FADT_H_
    20.5  #define _FADT_H_
    20.6  
    20.7 +#include <xen/hvm/ioreq.h>
    20.8 +
    20.9  //
   20.10  // FADT Definitions, see ACPI 2.0 specification for details.
   20.11  //
   20.12 @@ -51,7 +53,9 @@
   20.13  //
   20.14  // Fixed Feature Flags
   20.15  // 
   20.16 -#define ACPI_FIXED_FEATURE_FLAGS (ACPI_PROC_C1|ACPI_SLP_BUTTON|ACPI_WBINVD|ACPI_PWR_BUTTON|ACPI_FIX_RTC)
   20.17 +#define ACPI_FIXED_FEATURE_FLAGS (ACPI_PROC_C1 | ACPI_SLP_BUTTON | \
   20.18 +                                  ACPI_WBINVD | ACPI_PWR_BUTTON | \
   20.19 +                                  ACPI_FIX_RTC | ACPI_TMR_VAL_EXT)
   20.20  
   20.21  //
   20.22  // PM1A Event Register Block Generic Address Information
   20.23 @@ -59,7 +63,6 @@
   20.24  #define ACPI_PM1A_EVT_BLK_ADDRESS_SPACE_ID  ACPI_SYSTEM_IO
   20.25  #define ACPI_PM1A_EVT_BLK_BIT_WIDTH         0x20
   20.26  #define ACPI_PM1A_EVT_BLK_BIT_OFFSET        0x00
   20.27 -#define ACPI_PM1A_EVT_BLK_ADDRESS           0x000000000000c010
   20.28  
   20.29  //
   20.30  // PM1B Event Register Block Generic Address Information
   20.31 @@ -75,7 +78,6 @@
   20.32  #define ACPI_PM1A_CNT_BLK_ADDRESS_SPACE_ID  ACPI_SYSTEM_IO
   20.33  #define ACPI_PM1A_CNT_BLK_BIT_WIDTH         0x10
   20.34  #define ACPI_PM1A_CNT_BLK_BIT_OFFSET        0x00
   20.35 -#define ACPI_PM1A_CNT_BLK_ADDRESS           (ACPI_PM1A_EVT_BLK_ADDRESS + 0x04)
   20.36  
   20.37  //
   20.38  // PM1B Control Register Block Generic Address Information
   20.39 @@ -100,7 +102,6 @@
   20.40  #define ACPI_PM_TMR_BLK_ADDRESS_SPACE_ID    ACPI_SYSTEM_IO
   20.41  #define ACPI_PM_TMR_BLK_BIT_WIDTH           0x20
   20.42  #define ACPI_PM_TMR_BLK_BIT_OFFSET          0x00
   20.43 -#define ACPI_PM_TMR_BLK_ADDRESS             (ACPI_PM1A_EVT_BLK_ADDRESS + 0x08)
   20.44  
   20.45  //
   20.46  // General Purpose Event 0 Register Block Generic Address
    21.1 --- a/tools/ioemu/vl.c	Tue Oct 24 11:21:48 2006 -0600
    21.2 +++ b/tools/ioemu/vl.c	Wed Oct 25 12:12:01 2006 -0600
    21.3 @@ -6448,7 +6448,6 @@ int main(int argc, char **argv)
    21.4      fprintf(logfile, "shared page at pfn:%lx, mfn: %"PRIx64"\n",
    21.5              shared_page_nr, (uint64_t)(page_array[shared_page_nr]));
    21.6  
    21.7 -    /* not yet add for IA64 */
    21.8      buffered_io_page = xc_map_foreign_range(xc_handle, domid, PAGE_SIZE,
    21.9                                              PROT_READ|PROT_WRITE,
   21.10                                              page_array[shared_page_nr - 2]);
   21.11 @@ -6465,7 +6464,7 @@ int main(int argc, char **argv)
   21.12  #elif defined(__ia64__)
   21.13    
   21.14      if (xc_ia64_get_pfn_list(xc_handle, domid, page_array,
   21.15 -                             IO_PAGE_START >> PAGE_SHIFT, 1) != 1) {
   21.16 +                             IO_PAGE_START >> PAGE_SHIFT, 3) != 3) {
   21.17          fprintf(logfile, "xc_ia64_get_pfn_list returned error %d\n", errno);
   21.18          exit(-1);
   21.19      }
   21.20 @@ -6477,6 +6476,12 @@ int main(int argc, char **argv)
   21.21      fprintf(logfile, "shared page at pfn:%lx, mfn: %016lx\n",
   21.22              IO_PAGE_START >> PAGE_SHIFT, page_array[0]);
   21.23  
   21.24 +    buffered_io_page =xc_map_foreign_range(xc_handle, domid, PAGE_SIZE,
   21.25 +                                       PROT_READ|PROT_WRITE,
   21.26 +                                       page_array[2]);
   21.27 +    fprintf(logfile, "Buffered IO page at pfn:%lx, mfn: %016lx\n",
   21.28 +            BUFFER_IO_PAGE_START >> PAGE_SHIFT, page_array[2]);
   21.29 +
   21.30      if (xc_ia64_get_pfn_list(xc_handle, domid,
   21.31                               page_array, 0, nr_pages) != nr_pages) {
   21.32          fprintf(logfile, "xc_ia64_get_pfn_list returned error %d\n", errno);
   21.33 @@ -6496,6 +6501,7 @@ int main(int argc, char **argv)
   21.34          fprintf(logfile, "xc_map_foreign_batch returned error %d\n", errno);
   21.35          exit(-1);
   21.36      }
   21.37 +    free(page_array);
   21.38  #endif
   21.39  #else  /* !CONFIG_DM */
   21.40  
    22.1 --- a/tools/ioemu/vnc.c	Tue Oct 24 11:21:48 2006 -0600
    22.2 +++ b/tools/ioemu/vnc.c	Wed Oct 25 12:12:01 2006 -0600
    22.3 @@ -203,6 +203,8 @@ static void set_bits_in_row(VncState *vs
    22.4  	mask = ~(0ULL);
    22.5  
    22.6      h += y;
    22.7 +    if (h > vs->ds->height)
    22.8 +        h = vs->ds->height;
    22.9      for (; y < h; y++)
   22.10  	row[y] |= mask;
   22.11  }
    23.1 --- a/tools/libxc/Makefile	Tue Oct 24 11:21:48 2006 -0600
    23.2 +++ b/tools/libxc/Makefile	Wed Oct 25 12:12:01 2006 -0600
    23.3 @@ -31,7 +31,7 @@ GUEST_SRCS-$(CONFIG_HVM) += xc_hvm_build
    23.4  
    23.5  -include $(XEN_TARGET_ARCH)/Makefile
    23.6  
    23.7 -CFLAGS   += -Werror
    23.8 +CFLAGS   += -Werror -Wmissing-prototypes
    23.9  CFLAGS   += -fno-strict-aliasing
   23.10  CFLAGS   += $(INCLUDES) -I.
   23.11  
    24.1 --- a/tools/libxc/ia64/xc_ia64_hvm_build.c	Tue Oct 24 11:21:48 2006 -0600
    24.2 +++ b/tools/libxc/ia64/xc_ia64_hvm_build.c	Wed Oct 25 12:12:01 2006 -0600
    24.3 @@ -551,8 +551,9 @@ setup_guest(int xc_handle, uint32_t dom,
    24.4              char *image, unsigned long image_size, uint32_t vcpus,
    24.5              unsigned int store_evtchn, unsigned long *store_mfn)
    24.6  {
    24.7 -    unsigned long page_array[2];
    24.8 +    unsigned long page_array[3];
    24.9      shared_iopage_t *sp;
   24.10 +    void *ioreq_buffer_page;
   24.11      unsigned long dom_memsize = (memsize << 20);
   24.12      DECLARE_DOMCTL;
   24.13  
   24.14 @@ -587,7 +588,7 @@ setup_guest(int xc_handle, uint32_t dom,
   24.15  
   24.16      /* Retrieve special pages like io, xenstore, etc. */
   24.17      if (xc_ia64_get_pfn_list(xc_handle, dom, page_array,
   24.18 -                             IO_PAGE_START>>PAGE_SHIFT, 2) != 2) {
   24.19 +                             IO_PAGE_START>>PAGE_SHIFT, 3) != 3) {
   24.20          PERROR("Could not get the page frame list");
   24.21          goto error_out;
   24.22      }
   24.23 @@ -604,7 +605,10 @@ setup_guest(int xc_handle, uint32_t dom,
   24.24  
   24.25      memset(sp, 0, PAGE_SIZE);
   24.26      munmap(sp, PAGE_SIZE);
   24.27 -
   24.28 +    ioreq_buffer_page = xc_map_foreign_range(xc_handle, dom,
   24.29 +                               PAGE_SIZE, PROT_READ|PROT_WRITE, page_array[2]); 
   24.30 +    memset(ioreq_buffer_page,0,PAGE_SIZE);
   24.31 +    munmap(ioreq_buffer_page, PAGE_SIZE);
   24.32      return 0;
   24.33  
   24.34  error_out:
    25.1 --- a/tools/libxc/xc_linux_build.c	Tue Oct 24 11:21:48 2006 -0600
    25.2 +++ b/tools/libxc/xc_linux_build.c	Wed Oct 25 12:12:01 2006 -0600
    25.3 @@ -128,7 +128,7 @@ static int probeimageformat(const char *
    25.4      return 0;
    25.5  }
    25.6  
    25.7 -int load_initrd(int xc_handle, domid_t dom,
    25.8 +static int load_initrd(int xc_handle, domid_t dom,
    25.9                  struct initrd_info *initrd,
   25.10                  unsigned long physbase,
   25.11                  xen_pfn_t *phys_to_mach)
    26.1 --- a/tools/libxc/xc_linux_restore.c	Tue Oct 24 11:21:48 2006 -0600
    26.2 +++ b/tools/libxc/xc_linux_restore.c	Wed Oct 25 12:12:01 2006 -0600
    26.3 @@ -57,7 +57,7 @@ read_exact(int fd, void *buf, size_t cou
    26.4  ** This function inverts that operation, replacing the pfn values with
    26.5  ** the (now known) appropriate mfn values.
    26.6  */
    26.7 -int uncanonicalize_pagetable(unsigned long type, void *page)
    26.8 +static int uncanonicalize_pagetable(unsigned long type, void *page)
    26.9  {
   26.10      int i, pte_last;
   26.11      unsigned long pfn;
    27.1 --- a/tools/libxc/xc_linux_save.c	Tue Oct 24 11:21:48 2006 -0600
    27.2 +++ b/tools/libxc/xc_linux_save.c	Wed Oct 25 12:12:01 2006 -0600
    27.3 @@ -413,7 +413,7 @@ static int suspend_and_state(int (*suspe
    27.4  ** which entries do not require canonicalization (in particular, those
    27.5  ** entries which map the virtual address reserved for the hypervisor).
    27.6  */
    27.7 -int canonicalize_pagetable(unsigned long type, unsigned long pfn,
    27.8 +static int canonicalize_pagetable(unsigned long type, unsigned long pfn,
    27.9                             const void *spage, void *dpage)
   27.10  {
   27.11  
    28.1 --- a/tools/libxc/xc_private.c	Tue Oct 24 11:21:48 2006 -0600
    28.2 +++ b/tools/libxc/xc_private.c	Wed Oct 25 12:12:01 2006 -0600
    28.3 @@ -6,6 +6,7 @@
    28.4  
    28.5  #include <inttypes.h>
    28.6  #include "xc_private.h"
    28.7 +#include "xg_private.h"
    28.8  
    28.9  int lock_pages(void *addr, size_t len)
   28.10  {
   28.11 @@ -35,23 +36,6 @@ int xc_get_pfn_type_batch(int xc_handle,
   28.12      return do_domctl(xc_handle, &domctl);
   28.13  }
   28.14  
   28.15 -#define GETPFN_ERR (~0U)
   28.16 -unsigned int get_pfn_type(int xc_handle,
   28.17 -                          unsigned long mfn,
   28.18 -                          uint32_t dom)
   28.19 -{
   28.20 -    DECLARE_DOMCTL;
   28.21 -    domctl.cmd = XEN_DOMCTL_getpageframeinfo;
   28.22 -    domctl.u.getpageframeinfo.gmfn   = mfn;
   28.23 -    domctl.domain = (domid_t)dom;
   28.24 -    if ( do_domctl(xc_handle, &domctl) < 0 )
   28.25 -    {
   28.26 -        PERROR("Unexpected failure when getting page frame info!");
   28.27 -        return GETPFN_ERR;
   28.28 -    }
   28.29 -    return domctl.u.getpageframeinfo.type;
   28.30 -}
   28.31 -
   28.32  int xc_mmuext_op(
   28.33      int xc_handle,
   28.34      struct mmuext_op *op,
    29.1 --- a/tools/python/xen/xend/image.py	Tue Oct 24 11:21:48 2006 -0600
    29.2 +++ b/tools/python/xen/xend/image.py	Wed Oct 25 12:12:01 2006 -0600
    29.3 @@ -471,7 +471,7 @@ class IA64_HVM_ImageHandler(HVMImageHand
    29.4      def getRequiredAvailableMemory(self, mem_kb):
    29.5          page_kb = 16
    29.6          # ROM size for guest firmware, ioreq page and xenstore page
    29.7 -        extra_pages = 1024 + 2
    29.8 +        extra_pages = 1024 + 3
    29.9          return mem_kb + extra_pages * page_kb
   29.10  
   29.11      def getRequiredShadowMemory(self, shadow_mem_kb, maxmem_kb):
   29.12 @@ -500,9 +500,12 @@ class X86_HVM_ImageHandler(HVMImageHandl
   29.13          # overhead due to getRequiredInitialReservation.
   29.14          maxmem_kb = self.getRequiredInitialReservation(maxmem_kb)
   29.15  
   29.16 -        # 1MB per vcpu plus 4Kib/Mib of RAM.  This is higher than 
   29.17 -        # the minimum that Xen would allocate if no value were given.
   29.18 -        return max(1024 * self.vm.getVCpuCount() + maxmem_kb / 256,
   29.19 +        # 256 pages (1MB) per vcpu,
   29.20 +        # plus 1 page per MiB of RAM for the P2M map,
   29.21 +        # plus 1 page per MiB of RAM to shadow the resident processes.  
   29.22 +        # This is higher than the minimum that Xen would allocate if no value 
   29.23 +        # were given (but the Xen minimum is for safety, not performance).
   29.24 +        return max(4 * (256 * self.vm.getVCpuCount() + 2 * (maxmem_kb / 1024)),
   29.25                     shadow_mem_kb)
   29.26  
   29.27  
    30.1 --- a/tools/python/xen/xend/server/SrvDaemon.py	Tue Oct 24 11:21:48 2006 -0600
    30.2 +++ b/tools/python/xen/xend/server/SrvDaemon.py	Wed Oct 25 12:12:01 2006 -0600
    30.3 @@ -9,6 +9,7 @@ import os
    30.4  import signal
    30.5  import sys
    30.6  import threading
    30.7 +import time
    30.8  import linecache
    30.9  import pwd
   30.10  import re
   30.11 @@ -106,12 +107,14 @@ class Daemon:
   30.12          os.close(2)
   30.13          if XEND_DEBUG:
   30.14              os.open('/dev/null', os.O_RDONLY)
   30.15 -            os.open(XEND_DEBUG_LOG, os.O_WRONLY|os.O_CREAT)
   30.16 +            os.open(XEND_DEBUG_LOG, os.O_WRONLY|os.O_CREAT|os.O_APPEND)
   30.17              os.dup(1)
   30.18          else:
   30.19              os.open('/dev/null', os.O_RDWR)
   30.20              os.dup(0)
   30.21 -            os.open(XEND_DEBUG_LOG, os.O_WRONLY|os.O_CREAT)
   30.22 +            os.open(XEND_DEBUG_LOG, os.O_WRONLY|os.O_CREAT|os.O_APPEND)
   30.23 +        print >>sys.stderr, ("Xend started at %s." %
   30.24 +                             time.asctime(time.localtime()))
   30.25  
   30.26          
   30.27      def start(self, trace=0):
    31.1 --- a/tools/xenstat/xentop/xentop.1	Tue Oct 24 11:21:48 2006 -0600
    31.2 +++ b/tools/xenstat/xentop/xentop.1	Wed Oct 25 12:12:01 2006 -0600
    31.3 @@ -47,6 +47,9 @@ seconds between updates (default 3)
    31.4  \fB\-n\fR, \fB\-\-networks\fR
    31.5  output network information
    31.6  .TP
    31.7 +\fB\-x\fR, \fB\-\-vbds\fR
    31.8 +output vbd block device data
    31.9 +.TP
   31.10  \fB\-r\fR, \fB\-\-repeat\-header\fR
   31.11  repeat table header before each domain
   31.12  .TP
    32.1 --- a/tools/xenstat/xentop/xentop.c	Tue Oct 24 11:21:48 2006 -0600
    32.2 +++ b/tools/xenstat/xentop/xentop.c	Wed Oct 25 12:12:01 2006 -0600
    32.3 @@ -204,7 +204,7 @@ static void usage(const char *program)
    32.4  	       "-V, --version        output version information and exit\n"
    32.5  	       "-d, --delay=SECONDS  seconds between updates (default 3)\n"
    32.6  	       "-n, --networks       output vif network data\n"
    32.7 -	       "-b, --vbds           output vbd block device data\n"
    32.8 +	       "-x, --vbds           output vbd block device data\n"
    32.9  	       "-r, --repeat-header  repeat table header before each domain\n"
   32.10  	       "-v, --vcpus          output vcpu data\n"
   32.11  	       "-b, --batch	     output in batch mode, no user input accepted\n"
   32.12 @@ -976,7 +976,7 @@ int main(int argc, char **argv)
   32.13  		{ "help",          no_argument,       NULL, 'h' },
   32.14  		{ "version",       no_argument,       NULL, 'V' },
   32.15  		{ "networks",      no_argument,       NULL, 'n' },
   32.16 - 		{ "vbds",          no_argument,       NULL, 'x' },
   32.17 +		{ "vbds",          no_argument,       NULL, 'x' },
   32.18  		{ "repeat-header", no_argument,       NULL, 'r' },
   32.19  		{ "vcpus",         no_argument,       NULL, 'v' },
   32.20  		{ "delay",         required_argument, NULL, 'd' },
   32.21 @@ -1065,7 +1065,7 @@ int main(int argc, char **argv)
   32.22  					break;
   32.23  			} while (1);
   32.24  	}
   32.25 -	
   32.26 +
   32.27  	/* Cleanup occurs in cleanup(), so no work to do here. */
   32.28  
   32.29  	return 0;
    33.1 --- a/tools/xm-test/lib/XmTestLib/arch.py	Tue Oct 24 11:21:48 2006 -0600
    33.2 +++ b/tools/xm-test/lib/XmTestLib/arch.py	Wed Oct 25 12:12:01 2006 -0600
    33.3 @@ -124,6 +124,7 @@ ppc_ParavirtDefaults = {"memory"  : 64,
    33.4      "i486"  : "x86",
    33.5      "i586"  : "x86",
    33.6      "i686"  : "x86",
    33.7 +    "x86_64": "x86_64",
    33.8      "ia64"  : "ia64",
    33.9      "ppc"   : "powerpc",
   33.10      "ppc64" : "powerpc",
   33.11 @@ -131,7 +132,7 @@ ppc_ParavirtDefaults = {"memory"  : 64,
   33.12  
   33.13  # Lookup current platform.
   33.14  _arch = _uname_to_arch_map.get(os.uname()[4], "Unknown")
   33.15 -if _arch == "x86" or _arch == "ia64":
   33.16 +if _arch == "x86" or _arch == "x86_64" or _arch == "ia64":
   33.17      minSafeMem = ia_minSafeMem
   33.18      getDefaultKernel = ia_getDefaultKernel
   33.19      checkBuffer = ia_checkBuffer
    34.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
    34.2 +++ b/unmodified_drivers/linux-2.6/blkfront/Makefile	Wed Oct 25 12:12:01 2006 -0600
    34.3 @@ -0,0 +1,3 @@
    34.4 +ifneq ($(KERNELRELEASE),)
    34.5 +include $(src)/Kbuild
    34.6 +endif
    35.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
    35.2 +++ b/unmodified_drivers/linux-2.6/compat-include/asm-generic/pgtable-nopmd.h	Wed Oct 25 12:12:01 2006 -0600
    35.3 @@ -0,0 +1,14 @@
    35.4 +#ifndef _PGTABLE_NOPMD_H
    35.5 +#define _PGTABLE_NOPMD_H
    35.6 +
    35.7 +#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,11)
    35.8 +#error "This version of Linux should not need compat pgtable-nopmd.h"
    35.9 +#endif
   35.10 +
   35.11 +#define pud_t             pgd_t
   35.12 +#define pud_offset(d, va)     d
   35.13 +#define pud_none(pud)         0
   35.14 +#define pud_present(pud)      1
   35.15 +#define PTRS_PER_PUD          1
   35.16 +
   35.17 +#endif /* _PGTABLE_NOPMD_H */
    36.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
    36.2 +++ b/unmodified_drivers/linux-2.6/compat-include/asm-generic/pgtable-nopud.h	Wed Oct 25 12:12:01 2006 -0600
    36.3 @@ -0,0 +1,14 @@
    36.4 +#ifndef _PGTABLE_NOPUD_H
    36.5 +#define _PGTABLE_NOPUD_H
    36.6 +
    36.7 +#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,11)
    36.8 +#error "This version of Linux should not need compat pgtable-nopud.h"
    36.9 +#endif
   36.10 +
   36.11 +#define pud_t             pgd_t
   36.12 +#define pud_offset(d, va)     d
   36.13 +#define pud_none(pud)         0
   36.14 +#define pud_present(pud)      1
   36.15 +#define PTRS_PER_PUD          1
   36.16 +
   36.17 +#endif /* _PGTABLE_NOPUD_H */
    37.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
    37.2 +++ b/unmodified_drivers/linux-2.6/compat-include/linux/io.h	Wed Oct 25 12:12:01 2006 -0600
    37.3 @@ -0,0 +1,10 @@
    37.4 +#ifndef _LINUX_IO_H
    37.5 +#define _LINUX_IO_H
    37.6 +
    37.7 +#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,16)
    37.8 +#error "This version of Linux should not need compat linux/io.h"
    37.9 +#endif
   37.10 +
   37.11 +#include <asm/io.h>
   37.12 +
   37.13 +#endif
    38.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
    38.2 +++ b/unmodified_drivers/linux-2.6/compat-include/linux/mutex.h	Wed Oct 25 12:12:01 2006 -0600
    38.3 @@ -0,0 +1,31 @@
    38.4 +/*
    38.5 + * Copyright (c) 2006 Cisco Systems.  All rights reserved.
    38.6 + *
    38.7 + * This file is released under the GPLv2.
    38.8 + */
    38.9 +
   38.10 +/* mutex compatibility for pre-2.6.16 kernels */
   38.11 +
   38.12 +#ifndef __LINUX_MUTEX_H
   38.13 +#define __LINUX_MUTEX_H
   38.14 +
   38.15 +#include <linux/version.h>
   38.16 +
   38.17 +#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,16)
   38.18 +#error "This version of Linux should not need compat mutex.h"
   38.19 +#endif
   38.20 +
   38.21 +#include <linux/version.h>
   38.22 +#include <asm/semaphore.h>
   38.23 +
   38.24 +#define mutex semaphore
   38.25 +#define DEFINE_MUTEX(foo) DECLARE_MUTEX(foo)
   38.26 +#define mutex_init(foo) init_MUTEX(foo)
   38.27 +#define mutex_lock(foo) down(foo)
   38.28 +#define mutex_lock_interruptible(foo) down_interruptible(foo)
   38.29 +/* this function follows the spin_trylock() convention, so        *
   38.30 + * it is negated to the down_trylock() return values! Be careful  */
   38.31 +#define mutex_trylock(foo) !down_trylock(foo)
   38.32 +#define mutex_unlock(foo) up(foo)
   38.33 +
   38.34 +#endif /* __LINUX_MUTEX_H */
    39.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
    39.2 +++ b/unmodified_drivers/linux-2.6/compat-include/xen/platform-compat.h	Wed Oct 25 12:12:01 2006 -0600
    39.3 @@ -0,0 +1,52 @@
    39.4 +#ifndef COMPAT_INCLUDE_XEN_PLATFORM_COMPAT_H
    39.5 +#define COMPAT_INCLUDE_XEN_PLATFORM_COMPAT_H
    39.6 +
    39.7 +#include <linux/version.h>
    39.8 +
    39.9 +#include <linux/spinlock.h>
   39.10 +
   39.11 +#if defined(__LINUX_COMPILER_H) && !defined(__always_inline)
   39.12 +#define __always_inline inline
   39.13 +#endif
   39.14 +
   39.15 +#if defined(__LINUX_SPINLOCK_H) && !defined(DEFINE_SPINLOCK)
   39.16 +#define DEFINE_SPINLOCK(x) spinlock_t x = SPIN_LOCK_UNLOCKED
   39.17 +#endif
   39.18 +
   39.19 +#if defined(_LINUX_INIT_H) && !defined(__init)
   39.20 +#define __init
   39.21 +#endif
   39.22 +
   39.23 +#if defined(__LINUX_CACHE_H) && !defined(__read_mostly)
   39.24 +#define __read_mostly
   39.25 +#endif
   39.26 +
   39.27 +#if defined(_LINUX_SKBUFF_H) && !defined(NET_IP_ALIGN)
   39.28 +#define NET_IP_ALIGN 0
   39.29 +#endif
   39.30 +
   39.31 +#if defined(_LINUX_FS_H) && LINUX_VERSION_CODE < KERNEL_VERSION(2,6,9)
   39.32 +#define nonseekable_open(inode, filp) /* Nothing to do */
   39.33 +#endif
   39.34 +
   39.35 +#if defined(_LINUX_MM_H) && LINUX_VERSION_CODE < KERNEL_VERSION(2,6,10)
   39.36 +unsigned long vmalloc_to_pfn(void *addr);
   39.37 +#endif
   39.38 +
   39.39 +#if defined(__LINUX_COMPLETION_H) && LINUX_VERSION_CODE < KERNEL_VERSION(2,6,11)
   39.40 +unsigned long wait_for_completion_timeout(struct completion *x, unsigned long timeout);
   39.41 +#endif
   39.42 +
   39.43 +#if defined(_LINUX_SCHED_H) && LINUX_VERSION_CODE < KERNEL_VERSION(2,6,14)
   39.44 +signed long schedule_timeout_interruptible(signed long timeout);
   39.45 +#endif
   39.46 +
   39.47 +#if defined(_LINUX_SLAB_H) && LINUX_VERSION_CODE < KERNEL_VERSION(2,6,14)
   39.48 +void *kzalloc(size_t size, int flags);
   39.49 +#endif
   39.50 +
   39.51 +#if defined(_LINUX_BLKDEV_H) && LINUX_VERSION_CODE < KERNEL_VERSION(2,6,16)
   39.52 +#define end_that_request_last(req, uptodate) end_that_request_last(req)
   39.53 +#endif
   39.54 +
   39.55 +#endif
    40.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
    40.2 +++ b/unmodified_drivers/linux-2.6/netfront/Makefile	Wed Oct 25 12:12:01 2006 -0600
    40.3 @@ -0,0 +1,3 @@
    40.4 +ifneq ($(KERNELRELEASE),)
    40.5 +include $(src)/Kbuild
    40.6 +endif
    41.1 --- a/unmodified_drivers/linux-2.6/overrides.mk	Tue Oct 24 11:21:48 2006 -0600
    41.2 +++ b/unmodified_drivers/linux-2.6/overrides.mk	Wed Oct 25 12:12:01 2006 -0600
    41.3 @@ -9,4 +9,4 @@ EXTRA_CFLAGS += -DCONFIG_XEN_SHADOW_MODE
    41.4  EXTRA_CFLAGS += -DCONFIG_XEN_BLKDEV_GRANT -DXEN_EVTCHN_MASK_OPS
    41.5  EXTRA_CFLAGS += -DCONFIG_XEN_NETDEV_GRANT_RX -DCONFIG_XEN_NETDEV_GRANT_TX
    41.6  EXTRA_CFLAGS += -D__XEN_INTERFACE_VERSION__=0x00030202
    41.7 -EXTRA_CFLAGS += -I$(M)/include
    41.8 +EXTRA_CFLAGS += -I$(M)/include -I$(M)/compat-include -DHAVE_XEN_PLATFORM_COMPAT_H
    42.1 --- a/unmodified_drivers/linux-2.6/platform-pci/Kbuild	Tue Oct 24 11:21:48 2006 -0600
    42.2 +++ b/unmodified_drivers/linux-2.6/platform-pci/Kbuild	Wed Oct 25 12:12:01 2006 -0600
    42.3 @@ -4,7 +4,7 @@ obj-m := xen-platform-pci.o
    42.4  
    42.5  EXTRA_CFLAGS += -I$(M)/platform-pci
    42.6  
    42.7 -xen-platform-pci-objs := evtchn.o platform-pci.o gnttab.o xen_support.o features.o
    42.8 +xen-platform-pci-objs := evtchn.o platform-pci.o gnttab.o xen_support.o features.o platform-compat.o
    42.9  
   42.10  # Can we do better ?
   42.11  ifeq ($(ARCH),ia64)
    43.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
    43.2 +++ b/unmodified_drivers/linux-2.6/platform-pci/Makefile	Wed Oct 25 12:12:01 2006 -0600
    43.3 @@ -0,0 +1,3 @@
    43.4 +ifneq ($(KERNELRELEASE),)
    43.5 +include $(src)/Kbuild
    43.6 +endif
    44.1 --- a/unmodified_drivers/linux-2.6/platform-pci/evtchn.c	Tue Oct 24 11:21:48 2006 -0600
    44.2 +++ b/unmodified_drivers/linux-2.6/platform-pci/evtchn.c	Wed Oct 25 12:12:01 2006 -0600
    44.3 @@ -36,6 +36,10 @@
    44.4  #include <xen/features.h>
    44.5  #include "platform-pci.h"
    44.6  
    44.7 +#ifdef HAVE_XEN_PLATFORM_COMPAT_H
    44.8 +#include <xen/platform-compat.h>
    44.9 +#endif
   44.10 +
   44.11  void *shared_info_area;
   44.12  
   44.13  #define MAX_EVTCHN 256
    45.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
    45.2 +++ b/unmodified_drivers/linux-2.6/platform-pci/platform-compat.c	Wed Oct 25 12:12:01 2006 -0600
    45.3 @@ -0,0 +1,116 @@
    45.4 +#include <linux/config.h>
    45.5 +#include <linux/version.h>
    45.6 +
    45.7 +#include <linux/mm.h>
    45.8 +#include <linux/module.h>
    45.9 +#include <linux/sched.h>
   45.10 +#include <linux/slab.h>
   45.11 +
   45.12 +#include <xen/platform-compat.h>
   45.13 +
   45.14 +#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,7)
   45.15 +static int system_state = 1;
   45.16 +EXPORT_SYMBOL(system_state);
   45.17 +#endif
   45.18 +
   45.19 +#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,8)
   45.20 +size_t strcspn(const char *s, const char *reject)
   45.21 +{
   45.22 +        const char *p;
   45.23 +        const char *r;
   45.24 +        size_t count = 0;
   45.25 +
   45.26 +        for (p = s; *p != '\0'; ++p) {
   45.27 +                for (r = reject; *r != '\0'; ++r) {
   45.28 +                        if (*p == *r)
   45.29 +                                return count;
   45.30 +                }
   45.31 +                ++count;
   45.32 +        }
   45.33 +
   45.34 +        return count;
   45.35 +}
   45.36 +EXPORT_SYMBOL(strcspn);
   45.37 +#endif
   45.38 +
   45.39 +#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,10)
   45.40 +/*
   45.41 + * Map a vmalloc()-space virtual address to the physical page frame number.
   45.42 + */
   45.43 +unsigned long vmalloc_to_pfn(void * vmalloc_addr)
   45.44 +{
   45.45 +        return page_to_pfn(vmalloc_to_page(vmalloc_addr));
   45.46 +}
   45.47 +EXPORT_SYMBOL(vmalloc_to_pfn);
   45.48 +#endif
   45.49 +
   45.50 +#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,11)
   45.51 +unsigned long wait_for_completion_timeout(struct completion *x, unsigned long timeout)
   45.52 +{
   45.53 +        might_sleep();
   45.54 +
   45.55 +        spin_lock_irq(&x->wait.lock);
   45.56 +        if (!x->done) {
   45.57 +                DECLARE_WAITQUEUE(wait, current);
   45.58 +
   45.59 +                wait.flags |= WQ_FLAG_EXCLUSIVE;
   45.60 +                __add_wait_queue_tail(&x->wait, &wait);
   45.61 +                do {
   45.62 +                        __set_current_state(TASK_UNINTERRUPTIBLE);
   45.63 +                        spin_unlock_irq(&x->wait.lock);
   45.64 +                        timeout = schedule_timeout(timeout);
   45.65 +                        spin_lock_irq(&x->wait.lock);
   45.66 +                        if (!timeout) {
   45.67 +                                __remove_wait_queue(&x->wait, &wait);
   45.68 +                                goto out;
   45.69 +                        }
   45.70 +                } while (!x->done);
   45.71 +                __remove_wait_queue(&x->wait, &wait);
   45.72 +        }
   45.73 +        x->done--;
   45.74 +out:
   45.75 +        spin_unlock_irq(&x->wait.lock);
   45.76 +        return timeout;
   45.77 +}
   45.78 +EXPORT_SYMBOL(wait_for_completion_timeout);
   45.79 +#endif
   45.80 +
   45.81 +#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,12)
   45.82 +/*
   45.83 +    fake do_exit using complete_and_exit
   45.84 + */
   45.85 +#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,10)
   45.86 +asmlinkage NORET_TYPE void do_exit(long code)
   45.87 +#else
   45.88 +fastcall NORET_TYPE void do_exit(long code)
   45.89 +#endif
   45.90 +{
   45.91 +    complete_and_exit(NULL, code);
   45.92 +}
   45.93 +EXPORT_SYMBOL_GPL(do_exit);
   45.94 +#endif
   45.95 +
   45.96 +#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,14)
   45.97 +signed long schedule_timeout_interruptible(signed long timeout)
   45.98 +{
   45.99 +	__set_current_state(TASK_INTERRUPTIBLE);
  45.100 +	return schedule_timeout(timeout);
  45.101 +}
  45.102 +EXPORT_SYMBOL(schedule_timeout_interruptible);
  45.103 +#endif
  45.104 +
  45.105 +#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,14)
  45.106 +/**
  45.107 + * kzalloc - allocate memory. The memory is set to zero.
  45.108 + * @size: how many bytes of memory are required.
  45.109 + * @flags: the type of memory to allocate.
  45.110 + */
  45.111 +void *kzalloc(size_t size, int flags)
  45.112 +{
  45.113 +	void *ret = kmalloc(size, flags);
  45.114 +	if (ret)
  45.115 +		memset(ret, 0, size);
  45.116 +	return ret;
  45.117 +}
  45.118 +EXPORT_SYMBOL(kzalloc);
  45.119 +#endif
    46.1 --- a/unmodified_drivers/linux-2.6/platform-pci/platform-pci.c	Tue Oct 24 11:21:48 2006 -0600
    46.2 +++ b/unmodified_drivers/linux-2.6/platform-pci/platform-pci.c	Wed Oct 25 12:12:01 2006 -0600
    46.3 @@ -33,6 +33,7 @@
    46.4  #include <asm/irq.h>
    46.5  #include <asm/uaccess.h>
    46.6  #include <asm/hypervisor.h>
    46.7 +#include <asm/pgtable.h>
    46.8  #include <xen/interface/memory.h>
    46.9  #include <xen/features.h>
   46.10  #ifdef __ia64__
   46.11 @@ -41,6 +42,10 @@
   46.12  
   46.13  #include "platform-pci.h"
   46.14  
   46.15 +#ifdef HAVE_XEN_PLATFORM_COMPAT_H
   46.16 +#include <xen/platform-compat.h>
   46.17 +#endif
   46.18 +
   46.19  #define DRV_NAME    "xen-platform-pci"
   46.20  #define DRV_VERSION "0.10"
   46.21  #define DRV_RELDATE "03/03/2005"
    47.1 --- a/unmodified_drivers/linux-2.6/platform-pci/xen_support.c	Tue Oct 24 11:21:48 2006 -0600
    47.2 +++ b/unmodified_drivers/linux-2.6/platform-pci/xen_support.c	Wed Oct 25 12:12:01 2006 -0600
    47.3 @@ -26,6 +26,10 @@
    47.4  #include <asm/hypervisor.h>
    47.5  #include "platform-pci.h"
    47.6  
    47.7 +#ifdef HAVE_XEN_PLATFORM_COMPAT_H
    47.8 +#include <xen/platform-compat.h>
    47.9 +#endif
   47.10 +
   47.11  void xen_machphys_update(unsigned long mfn, unsigned long pfn)
   47.12  {
   47.13  	BUG();
    48.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
    48.2 +++ b/unmodified_drivers/linux-2.6/xenbus/Makefile	Wed Oct 25 12:12:01 2006 -0600
    48.3 @@ -0,0 +1,3 @@
    48.4 +ifneq ($(KERNELRELEASE),)
    48.5 +include $(src)/Kbuild
    48.6 +endif
    49.1 --- a/xen/arch/ia64/vmx/mmio.c	Tue Oct 24 11:21:48 2006 -0600
    49.2 +++ b/xen/arch/ia64/vmx/mmio.c	Wed Oct 25 12:12:01 2006 -0600
    49.3 @@ -52,6 +52,70 @@ struct mmio_list *lookup_mmio(u64 gpa, s
    49.4  #define PIB_OFST_INTA           0x1E0000
    49.5  #define PIB_OFST_XTP            0x1E0008
    49.6  
    49.7 +#define HVM_BUFFERED_IO_RANGE_NR 1
    49.8 +
    49.9 +struct hvm_buffered_io_range {
   49.10 +    unsigned long start_addr;
   49.11 +    unsigned long length;
   49.12 +};
   49.13 +
   49.14 +static struct hvm_buffered_io_range buffered_stdvga_range = {0xA0000, 0x20000};
   49.15 +static struct hvm_buffered_io_range
   49.16 +*hvm_buffered_io_ranges[HVM_BUFFERED_IO_RANGE_NR] =
   49.17 +{
   49.18 +    &buffered_stdvga_range
   49.19 +};
   49.20 +
   49.21 +int hvm_buffered_io_intercept(ioreq_t *p)
   49.22 +{
   49.23 +    struct vcpu *v = current;
   49.24 +    spinlock_t  *buffered_io_lock;
   49.25 +    buffered_iopage_t *buffered_iopage =
   49.26 +        (buffered_iopage_t *)(v->domain->arch.hvm_domain.buffered_io_va);
   49.27 +    unsigned long tmp_write_pointer = 0;
   49.28 +    int i;
   49.29 +
   49.30 +    /* ignore READ ioreq_t! */
   49.31 +    if ( p->dir == IOREQ_READ )
   49.32 +        return 0;
   49.33 +
   49.34 +    for ( i = 0; i < HVM_BUFFERED_IO_RANGE_NR; i++ ) {
   49.35 +        if ( p->addr >= hvm_buffered_io_ranges[i]->start_addr &&
   49.36 +             p->addr + p->size - 1 < hvm_buffered_io_ranges[i]->start_addr +
   49.37 +                                     hvm_buffered_io_ranges[i]->length )
   49.38 +            break;
   49.39 +    }
   49.40 +
   49.41 +    if ( i == HVM_BUFFERED_IO_RANGE_NR )
   49.42 +        return 0;
   49.43 +
   49.44 +    buffered_io_lock = &v->domain->arch.hvm_domain.buffered_io_lock;
   49.45 +    spin_lock(buffered_io_lock);
   49.46 +
   49.47 +    if ( buffered_iopage->write_pointer - buffered_iopage->read_pointer ==
   49.48 +         (unsigned long)IOREQ_BUFFER_SLOT_NUM ) {
   49.49 +        /* the queue is full.
   49.50 +         * send the iopacket through the normal path.
   49.51 +         * NOTE: The arithimetic operation could handle the situation for
   49.52 +         * write_pointer overflow.
   49.53 +         */
   49.54 +        spin_unlock(buffered_io_lock);
   49.55 +        return 0;
   49.56 +    }
   49.57 +
   49.58 +    tmp_write_pointer = buffered_iopage->write_pointer % IOREQ_BUFFER_SLOT_NUM;
   49.59 +
   49.60 +    memcpy(&buffered_iopage->ioreq[tmp_write_pointer], p, sizeof(ioreq_t));
   49.61 +
   49.62 +    /*make the ioreq_t visible before write_pointer*/
   49.63 +    wmb();
   49.64 +    buffered_iopage->write_pointer++;
   49.65 +
   49.66 +    spin_unlock(buffered_io_lock);
   49.67 +
   49.68 +    return 1;
   49.69 +}
   49.70 +
   49.71  static void write_ipi (VCPU *vcpu, uint64_t addr, uint64_t value);
   49.72  
   49.73  static void pib_write(VCPU *vcpu, void *src, uint64_t pib_off, size_t s, int ma)
   49.74 @@ -156,7 +220,11 @@ static void low_mmio_access(VCPU *vcpu, 
   49.75      p->df = 0;
   49.76  
   49.77      p->io_count++;
   49.78 -
   49.79 +    if(hvm_buffered_io_intercept(p)){
   49.80 +        p->state = STATE_IORESP_READY;
   49.81 +        vmx_io_assist(v);
   49.82 +        return ;
   49.83 +    }else 
   49.84      vmx_send_assist_req(v);
   49.85      if(dir==IOREQ_READ){ //read
   49.86          *val=p->u.data;
    50.1 --- a/xen/arch/ia64/vmx/vmx_init.c	Tue Oct 24 11:21:48 2006 -0600
    50.2 +++ b/xen/arch/ia64/vmx/vmx_init.c	Wed Oct 25 12:12:01 2006 -0600
    50.3 @@ -362,8 +362,8 @@ static const io_range_t io_ranges[] = {
    50.4  	{PIB_START, PIB_SIZE, GPFN_PIB},
    50.5  };
    50.6  
    50.7 -/* Reseve 1 page for shared I/O and 1 page for xenstore.  */
    50.8 -#define VMX_SYS_PAGES	(2 + (GFW_SIZE >> PAGE_SHIFT))
    50.9 +/* Reseve 1 page for shared I/O ,1 page for xenstore and 1 page for buffer I/O.  */
   50.10 +#define VMX_SYS_PAGES	(3 + (GFW_SIZE >> PAGE_SHIFT))
   50.11  #define VMX_CONFIG_PAGES(d) ((d)->max_pages - VMX_SYS_PAGES)
   50.12  
   50.13  static void vmx_build_physmap_table(struct domain *d)
   50.14 @@ -425,8 +425,12 @@ static void vmx_build_physmap_table(stru
   50.15  	mfn = page_to_mfn(list_entry(list_ent, struct page_info, list));
   50.16  	assign_domain_page(d, STORE_PAGE_START, mfn << PAGE_SHIFT);
   50.17  	list_ent = mfn_to_page(mfn)->list.next;
   50.18 +	ASSERT(list_ent != &d->page_list);
   50.19 +    
   50.20 +    mfn = page_to_mfn(list_entry(list_ent, struct page_info, list));
   50.21 +    assign_domain_page(d, BUFFER_IO_PAGE_START, mfn << PAGE_SHIFT);
   50.22 +    list_ent = mfn_to_page(mfn)->list.next;
   50.23  	ASSERT(list_ent == &d->page_list);
   50.24 -
   50.25  }
   50.26  
   50.27  void vmx_setup_platform(struct domain *d)
   50.28 @@ -437,6 +441,10 @@ void vmx_setup_platform(struct domain *d
   50.29  
   50.30  	d->arch.vmx_platform.shared_page_va =
   50.31  		(unsigned long)__va(__gpa_to_mpa(d, IO_PAGE_START));
   50.32 +    //For buffered IO requests.
   50.33 +    spin_lock_init(&d->arch.hvm_domain.buffered_io_lock);
   50.34 +    d->arch.hvm_domain.buffered_io_va =
   50.35 +        (unsigned long)__va(__gpa_to_mpa(d, BUFFER_IO_PAGE_START));
   50.36  	/* TEMP */
   50.37  	d->arch.vmx_platform.pib_base = 0xfee00000UL;
   50.38  
    51.1 --- a/xen/arch/x86/Makefile	Tue Oct 24 11:21:48 2006 -0600
    51.2 +++ b/xen/arch/x86/Makefile	Wed Oct 25 12:12:01 2006 -0600
    51.3 @@ -28,12 +28,14 @@ obj-y += microcode.o
    51.4  obj-y += mm.o
    51.5  obj-y += mpparse.o
    51.6  obj-y += nmi.o
    51.7 +obj-y += numa.o
    51.8  obj-y += physdev.o
    51.9  obj-y += rwlock.o
   51.10  obj-y += setup.o
   51.11  obj-y += shutdown.o
   51.12  obj-y += smp.o
   51.13  obj-y += smpboot.o
   51.14 +obj-y += srat.o
   51.15  obj-y += string.o
   51.16  obj-y += sysctl.o
   51.17  obj-y += time.o
    52.1 --- a/xen/arch/x86/hvm/Makefile	Tue Oct 24 11:21:48 2006 -0600
    52.2 +++ b/xen/arch/x86/hvm/Makefile	Wed Oct 25 12:12:01 2006 -0600
    52.3 @@ -5,6 +5,7 @@ obj-y += hvm.o
    52.4  obj-y += i8254.o
    52.5  obj-y += i8259.o
    52.6  obj-y += rtc.o
    52.7 +obj-y += pmtimer.o
    52.8  obj-y += instrlen.o
    52.9  obj-y += intercept.o
   52.10  obj-y += io.o
    53.1 --- a/xen/arch/x86/hvm/hvm.c	Tue Oct 24 11:21:48 2006 -0600
    53.2 +++ b/xen/arch/x86/hvm/hvm.c	Wed Oct 25 12:12:01 2006 -0600
    53.3 @@ -43,7 +43,7 @@
    53.4  #include <asm/mc146818rtc.h>
    53.5  #include <asm/spinlock.h>
    53.6  #include <asm/hvm/hvm.h>
    53.7 -#include <asm/hvm/vpit.h>
    53.8 +#include <asm/hvm/vpt.h>
    53.9  #include <asm/hvm/support.h>
   53.10  #include <public/sched.h>
   53.11  #include <public/hvm/ioreq.h>
   53.12 @@ -285,6 +285,7 @@ void hvm_setup_platform(struct domain* d
   53.13                 pt_timer_fn, v, v->processor);
   53.14      pit_init(v, cpu_khz);
   53.15      rtc_init(v, RTC_PORT(0), RTC_IRQ);
   53.16 +    pmtimer_init(v, ACPI_PM_TMR_BLK_ADDRESS); 
   53.17  }
   53.18  
   53.19  void pic_irq_request(void *data, int level)
    54.1 --- a/xen/arch/x86/hvm/i8254.c	Tue Oct 24 11:21:48 2006 -0600
    54.2 +++ b/xen/arch/x86/hvm/i8254.c	Wed Oct 25 12:12:01 2006 -0600
    54.3 @@ -38,7 +38,7 @@
    54.4  #include <asm/hvm/hvm.h>
    54.5  #include <asm/hvm/io.h>
    54.6  #include <asm/hvm/support.h>
    54.7 -#include <asm/hvm/vpit.h>
    54.8 +#include <asm/hvm/vpt.h>
    54.9  #include <asm/current.h>
   54.10  
   54.11  /* Enable DEBUG_PIT may cause guest calibration inaccuracy */
    55.1 --- a/xen/arch/x86/hvm/io.c	Tue Oct 24 11:21:48 2006 -0600
    55.2 +++ b/xen/arch/x86/hvm/io.c	Wed Oct 25 12:12:01 2006 -0600
    55.3 @@ -35,7 +35,7 @@
    55.4  #include <asm/shadow.h>
    55.5  #include <asm/hvm/hvm.h>
    55.6  #include <asm/hvm/support.h>
    55.7 -#include <asm/hvm/vpit.h>
    55.8 +#include <asm/hvm/vpt.h>
    55.9  #include <asm/hvm/vpic.h>
   55.10  #include <asm/hvm/vlapic.h>
   55.11  
    56.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
    56.2 +++ b/xen/arch/x86/hvm/pmtimer.c	Wed Oct 25 12:12:01 2006 -0600
    56.3 @@ -0,0 +1,63 @@
    56.4 +#include <asm/hvm/vpt.h>
    56.5 +#include <asm/hvm/io.h>
    56.6 +#include <asm/hvm/support.h>
    56.7 +
    56.8 +#define TMR_STS (1 << 0)
    56.9 +static void pmt_update_status(void *opaque)
   56.10 +{
   56.11 +   PMTState *s = opaque;
   56.12 +   s->pm1_status |= TMR_STS;
   56.13 +
   56.14 +   /* TODO: When TMR_EN == 1, generate a SCI event */
   56.15 +
   56.16 +   set_timer(&s->timer, NOW() + (1000000000ULL << 31) / FREQUENCE_PMTIMER);
   56.17 +}
   56.18 +
   56.19 +static int handle_pmt_io(ioreq_t *p)
   56.20 +{
   56.21 +    struct vcpu *v = current;
   56.22 +    PMTState *s = &v->domain->arch.hvm_domain.pl_time.vpmt;
   56.23 +    uint64_t curr_gtime;
   56.24 +
   56.25 +    if (p->size != 4 ||
   56.26 +        p->pdata_valid ||
   56.27 +        p->type != IOREQ_TYPE_PIO){
   56.28 +        printk("HVM_PMT: wrong PM timer IO\n");
   56.29 +        return 1;
   56.30 +    }
   56.31 +    
   56.32 +    if (p->dir == 0) { /* write */
   56.33 +        /* PM_TMR_BLK is read-only */
   56.34 +        return 1;
   56.35 +    } else if (p->dir == 1) { /* read */
   56.36 +        curr_gtime = hvm_get_guest_time(s->vcpu);
   56.37 +        s->pm1_timer += ((curr_gtime - s->last_gtime) * s->scale) >> 32;
   56.38 +        p->u.data = s->pm1_timer;
   56.39 +        s->last_gtime = curr_gtime;
   56.40 +        return 1;
   56.41 +    }
   56.42 +    return 0;
   56.43 +}
   56.44 +
   56.45 +void pmtimer_init(struct vcpu *v, int base)
   56.46 +{
   56.47 +    PMTState *s = &v->domain->arch.hvm_domain.pl_time.vpmt;
   56.48 +
   56.49 +    s->pm1_timer = 0;
   56.50 +    s->pm1_status = 0;
   56.51 +    s->scale = ((uint64_t)FREQUENCE_PMTIMER << 32) / ticks_per_sec(v);
   56.52 +    s->vcpu = v;
   56.53 +
   56.54 +    init_timer(&s->timer, pmt_update_status, s, v->processor);
   56.55 +    /* ACPI supports a 32-bit power management timer */
   56.56 +    set_timer(&s->timer, NOW() + (1000000000ULL << 31) / FREQUENCE_PMTIMER);
   56.57 +    
   56.58 +    register_portio_handler(base, 4, handle_pmt_io);
   56.59 +}
   56.60 +
   56.61 +void pmtimer_deinit(struct domain *d)
   56.62 +{
   56.63 +    PMTState *s = &d->arch.hvm_domain.pl_time.vpmt;
   56.64 +
   56.65 +    kill_timer(&s->timer);
   56.66 +}
    57.1 --- a/xen/arch/x86/hvm/rtc.c	Tue Oct 24 11:21:48 2006 -0600
    57.2 +++ b/xen/arch/x86/hvm/rtc.c	Wed Oct 25 12:12:01 2006 -0600
    57.3 @@ -23,7 +23,7 @@
    57.4   */
    57.5  
    57.6  #include <asm/mc146818rtc.h>
    57.7 -#include <asm/hvm/vpit.h>
    57.8 +#include <asm/hvm/vpt.h>
    57.9  #include <asm/hvm/io.h>
   57.10  #include <asm/hvm/support.h>
   57.11  #include <asm/current.h>
    58.1 --- a/xen/arch/x86/hvm/svm/svm.c	Tue Oct 24 11:21:48 2006 -0600
    58.2 +++ b/xen/arch/x86/hvm/svm/svm.c	Wed Oct 25 12:12:01 2006 -0600
    58.3 @@ -922,6 +922,7 @@ static void svm_relinquish_guest_resourc
    58.4  
    58.5      kill_timer(&d->arch.hvm_domain.pl_time.periodic_tm.timer);
    58.6      rtc_deinit(d);
    58.7 +    pmtimer_deinit(d);
    58.8  
    58.9      if ( d->arch.hvm_domain.shared_page_va )
   58.10          unmap_domain_page_global(
   58.11 @@ -937,6 +938,7 @@ static void svm_migrate_timers(struct vc
   58.12      struct periodic_time *pt = 
   58.13          &(v->domain->arch.hvm_domain.pl_time.periodic_tm);
   58.14      struct RTCState *vrtc = &v->domain->arch.hvm_domain.pl_time.vrtc;
   58.15 +    struct PMTState *vpmt = &v->domain->arch.hvm_domain.pl_time.vpmt;
   58.16  
   58.17      if ( pt->enabled )
   58.18      {
   58.19 @@ -947,6 +949,7 @@ static void svm_migrate_timers(struct vc
   58.20          migrate_timer(&VLAPIC(v)->vlapic_timer, v->processor);
   58.21      migrate_timer(&vrtc->second_timer, v->processor);
   58.22      migrate_timer(&vrtc->second_timer2, v->processor);
   58.23 +    migrate_timer(&vpmt->timer, v->processor);
   58.24  }
   58.25  
   58.26  
    59.1 --- a/xen/arch/x86/hvm/vmx/vmx.c	Tue Oct 24 11:21:48 2006 -0600
    59.2 +++ b/xen/arch/x86/hvm/vmx/vmx.c	Wed Oct 25 12:12:01 2006 -0600
    59.3 @@ -147,6 +147,7 @@ static void vmx_relinquish_guest_resourc
    59.4  
    59.5      kill_timer(&d->arch.hvm_domain.pl_time.periodic_tm.timer);
    59.6      rtc_deinit(d);
    59.7 +    pmtimer_deinit(d);
    59.8  
    59.9      if ( d->arch.hvm_domain.shared_page_va )
   59.10          unmap_domain_page_global(
   59.11 @@ -489,6 +490,7 @@ void vmx_migrate_timers(struct vcpu *v)
   59.12  {
   59.13      struct periodic_time *pt = &(v->domain->arch.hvm_domain.pl_time.periodic_tm);
   59.14      struct RTCState *vrtc = &v->domain->arch.hvm_domain.pl_time.vrtc;
   59.15 +    struct PMTState *vpmt = &v->domain->arch.hvm_domain.pl_time.vpmt;
   59.16  
   59.17      if ( pt->enabled )
   59.18      {
   59.19 @@ -499,6 +501,7 @@ void vmx_migrate_timers(struct vcpu *v)
   59.20          migrate_timer(&VLAPIC(v)->vlapic_timer, v->processor);
   59.21      migrate_timer(&vrtc->second_timer, v->processor);
   59.22      migrate_timer(&vrtc->second_timer2, v->processor);
   59.23 +    migrate_timer(&vpmt->timer, v->processor);
   59.24  }
   59.25  
   59.26  static void vmx_store_cpu_guest_regs(
    60.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
    60.2 +++ b/xen/arch/x86/numa.c	Wed Oct 25 12:12:01 2006 -0600
    60.3 @@ -0,0 +1,308 @@
    60.4 +/* 
    60.5 + * Generic VM initialization for x86-64 NUMA setups.
    60.6 + * Copyright 2002,2003 Andi Kleen, SuSE Labs.
    60.7 + * Adapted for Xen: Ryan Harper <ryanh@us.ibm.com>
    60.8 + */ 
    60.9 +
   60.10 +#include <xen/mm.h>
   60.11 +#include <xen/string.h>
   60.12 +#include <xen/init.h>
   60.13 +#include <xen/ctype.h>
   60.14 +#include <xen/nodemask.h>
   60.15 +#include <xen/numa.h>
   60.16 +#include <xen/keyhandler.h>
   60.17 +#include <xen/time.h>
   60.18 +#include <xen/smp.h>
   60.19 +#include <asm/acpi.h>
   60.20 +
   60.21 +static int numa_setup(char *s);
   60.22 +custom_param("numa", numa_setup);
   60.23 +
   60.24 +#ifndef Dprintk
   60.25 +#define Dprintk(x...)
   60.26 +#endif
   60.27 +
   60.28 +/* from proto.h */
   60.29 +#define round_up(x,y) ((((x)+(y))-1) & (~((y)-1)))
   60.30 +
   60.31 +struct node_data node_data[MAX_NUMNODES];
   60.32 +
   60.33 +int memnode_shift;
   60.34 +u8  memnodemap[NODEMAPSIZE];
   60.35 +
   60.36 +unsigned char cpu_to_node[NR_CPUS] __read_mostly = {
   60.37 +	[0 ... NR_CPUS-1] = NUMA_NO_NODE
   60.38 +};
   60.39 +unsigned char apicid_to_node[MAX_LOCAL_APIC] __cpuinitdata = {
   60.40 + 	[0 ... MAX_LOCAL_APIC-1] = NUMA_NO_NODE
   60.41 +};
   60.42 +cpumask_t node_to_cpumask[MAX_NUMNODES] __read_mostly;
   60.43 +
   60.44 +nodemask_t node_online_map = { { [0] = 1UL } };
   60.45 +
   60.46 +/* Default NUMA to off for now. acpi=on required to enable it. */
   60.47 +int numa_off __initdata = 1;
   60.48 +
   60.49 +int acpi_numa __initdata;
   60.50 +
   60.51 +/*
   60.52 + * Given a shift value, try to populate memnodemap[]
   60.53 + * Returns :
   60.54 + * 1 if OK
   60.55 + * 0 if memnodmap[] too small (of shift too small)
   60.56 + * -1 if node overlap or lost ram (shift too big)
   60.57 + */
   60.58 +static int __init
   60.59 +populate_memnodemap(const struct node *nodes, int numnodes, int shift)
   60.60 +{
   60.61 +	int i; 
   60.62 +	int res = -1;
   60.63 +	unsigned long addr, end;
   60.64 +
   60.65 +	if (shift >= 64)
   60.66 +		return -1;
   60.67 +	memset(memnodemap, 0xff, sizeof(memnodemap));
   60.68 +	for (i = 0; i < numnodes; i++) {
   60.69 +		addr = nodes[i].start;
   60.70 +		end = nodes[i].end;
   60.71 +		if (addr >= end)
   60.72 +			continue;
   60.73 +		if ((end >> shift) >= NODEMAPSIZE)
   60.74 +			return 0;
   60.75 +		do {
   60.76 +			if (memnodemap[addr >> shift] != 0xff)
   60.77 +				return -1;
   60.78 +			memnodemap[addr >> shift] = i;
   60.79 +			addr += (1UL << shift);
   60.80 +		} while (addr < end);
   60.81 +		res = 1;
   60.82 +	} 
   60.83 +	return res;
   60.84 +}
   60.85 +
   60.86 +int __init compute_hash_shift(struct node *nodes, int numnodes)
   60.87 +{
   60.88 +	int shift = 20;
   60.89 +
   60.90 +	while (populate_memnodemap(nodes, numnodes, shift + 1) >= 0)
   60.91 +		shift++;
   60.92 +
   60.93 +	printk(KERN_DEBUG "NUMA: Using %d for the hash shift.\n",
   60.94 +		shift);
   60.95 +
   60.96 +	if (populate_memnodemap(nodes, numnodes, shift) != 1) {
   60.97 +		printk(KERN_INFO
   60.98 +	"Your memory is not aligned you need to rebuild your kernel "
   60.99 +	"with a bigger NODEMAPSIZE shift=%d\n",
  60.100 +			shift);
  60.101 +		return -1;
  60.102 +	}
  60.103 +	return shift;
  60.104 +}
  60.105 +
  60.106 +/* initialize NODE_DATA given nodeid and start/end */
  60.107 +void __init setup_node_bootmem(int nodeid, u64 start, u64 end)
  60.108 +{ 
  60.109 +	unsigned long start_pfn, end_pfn;
  60.110 +
  60.111 +	start_pfn = start >> PAGE_SHIFT;
  60.112 +	end_pfn = end >> PAGE_SHIFT;
  60.113 +
  60.114 +	NODE_DATA(nodeid)->node_id = nodeid;
  60.115 +	NODE_DATA(nodeid)->node_start_pfn = start_pfn;
  60.116 +	NODE_DATA(nodeid)->node_spanned_pages = end_pfn - start_pfn;
  60.117 +
  60.118 +	node_set_online(nodeid);
  60.119 +} 
  60.120 +
  60.121 +void __init numa_init_array(void)
  60.122 +{
  60.123 +	int rr, i;
  60.124 +	/* There are unfortunately some poorly designed mainboards around
  60.125 +	   that only connect memory to a single CPU. This breaks the 1:1 cpu->node
  60.126 +	   mapping. To avoid this fill in the mapping for all possible
  60.127 +	   CPUs, as the number of CPUs is not known yet. 
  60.128 +	   We round robin the existing nodes. */
  60.129 +	rr = first_node(node_online_map);
  60.130 +	for (i = 0; i < NR_CPUS; i++) {
  60.131 +		if (cpu_to_node[i] != NUMA_NO_NODE)
  60.132 +			continue;
  60.133 + 		numa_set_node(i, rr);
  60.134 +		rr = next_node(rr, node_online_map);
  60.135 +		if (rr == MAX_NUMNODES)
  60.136 +			rr = first_node(node_online_map);
  60.137 +	}
  60.138 +
  60.139 +}
  60.140 +
  60.141 +#ifdef CONFIG_NUMA_EMU
  60.142 +static int numa_fake __initdata = 0;
  60.143 +
  60.144 +/* Numa emulation */
  60.145 +static int numa_emulation(unsigned long start_pfn, unsigned long end_pfn)
  60.146 +{
  60.147 + 	int i;
  60.148 + 	struct node nodes[MAX_NUMNODES];
  60.149 + 	unsigned long sz = ((end_pfn - start_pfn)<<PAGE_SHIFT) / numa_fake;
  60.150 +
  60.151 + 	/* Kludge needed for the hash function */
  60.152 + 	if (hweight64(sz) > 1) {
  60.153 + 		unsigned long x = 1;
  60.154 + 		while ((x << 1) < sz)
  60.155 + 			x <<= 1;
  60.156 + 		if (x < sz/2)
  60.157 + 			printk(KERN_ERR "Numa emulation unbalanced. Complain to maintainer\n");
  60.158 + 		sz = x;
  60.159 + 	}
  60.160 +
  60.161 + 	memset(&nodes,0,sizeof(nodes));
  60.162 + 	for (i = 0; i < numa_fake; i++) {
  60.163 + 		nodes[i].start = (start_pfn<<PAGE_SHIFT) + i*sz;
  60.164 + 		if (i == numa_fake-1)
  60.165 + 			sz = (end_pfn<<PAGE_SHIFT) - nodes[i].start;
  60.166 + 		nodes[i].end = nodes[i].start + sz;
  60.167 + 		printk(KERN_INFO "Faking node %d at %"PRIx64"-%"PRIx64" (%"PRIu64"MB)\n",
  60.168 +		       i,
  60.169 +		       nodes[i].start, nodes[i].end,
  60.170 +		       (nodes[i].end - nodes[i].start) >> 20);
  60.171 +		node_set_online(i);
  60.172 + 	}
  60.173 + 	memnode_shift = compute_hash_shift(nodes, numa_fake);
  60.174 + 	if (memnode_shift < 0) {
  60.175 + 		memnode_shift = 0;
  60.176 + 		printk(KERN_ERR "No NUMA hash function found. Emulation disabled.\n");
  60.177 + 		return -1;
  60.178 + 	}
  60.179 + 	for_each_online_node(i)
  60.180 + 		setup_node_bootmem(i, nodes[i].start, nodes[i].end);
  60.181 + 	numa_init_array();
  60.182 + 	return 0;
  60.183 +}
  60.184 +#endif
  60.185 +
  60.186 +void __init numa_initmem_init(unsigned long start_pfn, unsigned long end_pfn)
  60.187 +{ 
  60.188 +	int i;
  60.189 +
  60.190 +#ifdef CONFIG_NUMA_EMU
  60.191 +	if (numa_fake && !numa_emulation(start_pfn, end_pfn))
  60.192 +		return;
  60.193 +#endif
  60.194 +
  60.195 +#ifdef CONFIG_ACPI_NUMA
  60.196 +	if (!numa_off && !acpi_scan_nodes(start_pfn << PAGE_SHIFT,
  60.197 +					  end_pfn << PAGE_SHIFT))
  60.198 +		return;
  60.199 +#endif
  60.200 +
  60.201 +	printk(KERN_INFO "%s\n",
  60.202 +	       numa_off ? "NUMA turned off" : "No NUMA configuration found");
  60.203 +
  60.204 +	printk(KERN_INFO "Faking a node at %016lx-%016lx\n", 
  60.205 +	       start_pfn << PAGE_SHIFT,
  60.206 +	       end_pfn << PAGE_SHIFT); 
  60.207 +	/* setup dummy node covering all memory */ 
  60.208 +	memnode_shift = 63; 
  60.209 +	memnodemap[0] = 0;
  60.210 +	nodes_clear(node_online_map);
  60.211 +	node_set_online(0);
  60.212 +	for (i = 0; i < NR_CPUS; i++)
  60.213 +		numa_set_node(i, 0);
  60.214 +	node_to_cpumask[0] = cpumask_of_cpu(0);
  60.215 +	setup_node_bootmem(0, start_pfn << PAGE_SHIFT, end_pfn << PAGE_SHIFT);
  60.216 +}
  60.217 +
  60.218 +__cpuinit void numa_add_cpu(int cpu)
  60.219 +{
  60.220 +	set_bit(cpu, &node_to_cpumask[cpu_to_node(cpu)]);
  60.221 +} 
  60.222 +
  60.223 +void __cpuinit numa_set_node(int cpu, int node)
  60.224 +{
  60.225 +	cpu_to_node[cpu] = node;
  60.226 +}
  60.227 +
  60.228 +/* [numa=off] */
  60.229 +static __init int numa_setup(char *opt) 
  60.230 +{ 
  60.231 +	if (!strncmp(opt,"off",3))
  60.232 +		numa_off = 1;
  60.233 +	if (!strncmp(opt,"on",2))
  60.234 +		numa_off = 0;
  60.235 +#ifdef CONFIG_NUMA_EMU
  60.236 +	if(!strncmp(opt, "fake=", 5)) {
  60.237 +		numa_off = 0;
  60.238 +		numa_fake = simple_strtoul(opt+5,NULL,0); ;
  60.239 +		if (numa_fake >= MAX_NUMNODES)
  60.240 +			numa_fake = MAX_NUMNODES;
  60.241 +	}
  60.242 +#endif
  60.243 +#ifdef CONFIG_ACPI_NUMA
  60.244 +	if (!strncmp(opt,"noacpi",6)) {
  60.245 +		numa_off = 0;
  60.246 +		acpi_numa = -1;
  60.247 +	}
  60.248 +#endif
  60.249 +	return 1;
  60.250 +} 
  60.251 +
  60.252 +/*
  60.253 + * Setup early cpu_to_node.
  60.254 + *
  60.255 + * Populate cpu_to_node[] only if x86_cpu_to_apicid[],
  60.256 + * and apicid_to_node[] tables have valid entries for a CPU.
  60.257 + * This means we skip cpu_to_node[] initialisation for NUMA
  60.258 + * emulation and faking node case (when running a kernel compiled
  60.259 + * for NUMA on a non NUMA box), which is OK as cpu_to_node[]
  60.260 + * is already initialized in a round robin manner at numa_init_array,
  60.261 + * prior to this call, and this initialization is good enough
  60.262 + * for the fake NUMA cases.
  60.263 + */
  60.264 +void __init init_cpu_to_node(void)
  60.265 +{
  60.266 +	int i;
  60.267 + 	for (i = 0; i < NR_CPUS; i++) {
  60.268 +		u8 apicid = x86_cpu_to_apicid[i];
  60.269 +		if (apicid == BAD_APICID)
  60.270 +			continue;
  60.271 +		if (apicid_to_node[apicid] == NUMA_NO_NODE)
  60.272 +			continue;
  60.273 +		numa_set_node(i,apicid_to_node[apicid]);
  60.274 +	}
  60.275 +}
  60.276 +
  60.277 +EXPORT_SYMBOL(cpu_to_node);
  60.278 +EXPORT_SYMBOL(node_to_cpumask);
  60.279 +EXPORT_SYMBOL(memnode_shift);
  60.280 +EXPORT_SYMBOL(memnodemap);
  60.281 +EXPORT_SYMBOL(node_data);
  60.282 +
  60.283 +static void dump_numa(unsigned char key)
  60.284 +{
  60.285 +	s_time_t now = NOW();
  60.286 +	int i;
  60.287 +
  60.288 +	printk("'%c' pressed -> dumping numa info (now-0x%X:%08X)\n", key,
  60.289 +		  (u32)(now>>32), (u32)now);
  60.290 +
  60.291 +	for_each_online_node(i) {
  60.292 +		unsigned long pa = (NODE_DATA(i)->node_start_pfn + 1)<< PAGE_SHIFT;
  60.293 +		printk("idx%d -> NODE%d start->%lu size->%lu\n",
  60.294 +			  i, NODE_DATA(i)->node_id,
  60.295 +			  NODE_DATA(i)->node_start_pfn,
  60.296 +			  NODE_DATA(i)->node_spanned_pages);
  60.297 +		/* sanity check phys_to_nid() */
  60.298 +		printk("phys_to_nid(%lx) -> %d should be %d\n", pa, phys_to_nid(pa),
  60.299 +			  NODE_DATA(i)->node_id);
  60.300 +	}
  60.301 +	for_each_online_cpu(i)
  60.302 +		printk("CPU%d -> NODE%d\n", i, cpu_to_node[i]);
  60.303 +}
  60.304 +
  60.305 +static __init int register_numa_trigger(void)
  60.306 +{
  60.307 +	register_keyhandler('u', dump_numa, "dump numa info");
  60.308 +	return 0;
  60.309 +}
  60.310 +__initcall(register_numa_trigger);
  60.311 +
    61.1 --- a/xen/arch/x86/setup.c	Tue Oct 24 11:21:48 2006 -0600
    61.2 +++ b/xen/arch/x86/setup.c	Wed Oct 25 12:12:01 2006 -0600
    61.3 @@ -16,6 +16,7 @@
    61.4  #include <xen/percpu.h>
    61.5  #include <xen/hypercall.h>
    61.6  #include <xen/keyhandler.h>
    61.7 +#include <xen/numa.h>
    61.8  #include <public/version.h>
    61.9  #include <asm/bitops.h>
   61.10  #include <asm/smp.h>
   61.11 @@ -29,6 +30,7 @@
   61.12  
   61.13  extern void dmi_scan_machine(void);
   61.14  extern void generic_apic_probe(void);
   61.15 +extern void numa_initmem_init(unsigned long start_pfn, unsigned long end_pfn);
   61.16  
   61.17  /*
   61.18   * opt_xenheap_megabytes: Size of Xen heap in megabytes, excluding the
   61.19 @@ -257,6 +259,20 @@ static void __init init_idle_domain(void
   61.20      setup_idle_pagetable();
   61.21  }
   61.22  
   61.23 +static void srat_detect_node(int cpu)
   61.24 +{
   61.25 +    unsigned node;
   61.26 +    u8 apicid = x86_cpu_to_apicid[cpu];
   61.27 +
   61.28 +    node = apicid_to_node[apicid];
   61.29 +    if ( node == NUMA_NO_NODE )
   61.30 +        node = 0;
   61.31 +    numa_set_node(cpu, node);
   61.32 +
   61.33 +    if ( acpi_numa > 0 )
   61.34 +        printk(KERN_INFO "CPU %d APIC %d -> Node %d\n", cpu, apicid, node);
   61.35 +}
   61.36 +
   61.37  void __init __start_xen(multiboot_info_t *mbi)
   61.38  {
   61.39      char __cmdline[] = "", *cmdline = __cmdline;
   61.40 @@ -485,6 +501,12 @@ void __init __start_xen(multiboot_info_t
   61.41  
   61.42      init_frametable();
   61.43  
   61.44 +    acpi_boot_table_init();
   61.45 +
   61.46 +    acpi_numa_init();
   61.47 +
   61.48 +    numa_initmem_init(0, max_page);
   61.49 +
   61.50      end_boot_allocator();
   61.51  
   61.52      /* Initialise the Xen heap, skipping RAM holes. */
   61.53 @@ -536,9 +558,10 @@ void __init __start_xen(multiboot_info_t
   61.54  
   61.55      generic_apic_probe();
   61.56  
   61.57 -    acpi_boot_table_init();
   61.58      acpi_boot_init();
   61.59  
   61.60 +    init_cpu_to_node();
   61.61 +
   61.62      if ( smp_found_config )
   61.63          get_smp_config();
   61.64  
   61.65 @@ -589,6 +612,11 @@ void __init __start_xen(multiboot_info_t
   61.66              break;
   61.67          if ( !cpu_online(i) )
   61.68              __cpu_up(i);
   61.69 +
   61.70 +        /* Set up cpu_to_node[]. */
   61.71 +        srat_detect_node(i);
   61.72 +        /* Set up node_to_cpumask based on cpu_to_node[]. */
   61.73 +        numa_add_cpu(i);        
   61.74      }
   61.75  
   61.76      printk("Brought up %ld CPUs\n", (long)num_online_cpus());
    62.1 --- a/xen/arch/x86/smpboot.c	Tue Oct 24 11:21:48 2006 -0600
    62.2 +++ b/xen/arch/x86/smpboot.c	Wed Oct 25 12:12:01 2006 -0600
    62.3 @@ -43,6 +43,7 @@
    62.4  #include <xen/delay.h>
    62.5  #include <xen/softirq.h>
    62.6  #include <xen/serial.h>
    62.7 +#include <xen/numa.h>
    62.8  #include <asm/current.h>
    62.9  #include <asm/mc146818rtc.h>
   62.10  #include <asm/desc.h>
   62.11 @@ -628,7 +629,7 @@ u8 cpu_2_logical_apicid[NR_CPUS] __read_
   62.12  static void map_cpu_to_logical_apicid(void)
   62.13  {
   62.14  	int cpu = smp_processor_id();
   62.15 -	int apicid = logical_smp_processor_id();
   62.16 +	int apicid = hard_smp_processor_id();
   62.17  
   62.18  	cpu_2_logical_apicid[cpu] = apicid;
   62.19  	map_cpu_to_node(cpu, apicid_to_node(apicid));
    63.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
    63.2 +++ b/xen/arch/x86/srat.c	Wed Oct 25 12:12:01 2006 -0600
    63.3 @@ -0,0 +1,315 @@
    63.4 +/*
    63.5 + * ACPI 3.0 based NUMA setup
    63.6 + * Copyright 2004 Andi Kleen, SuSE Labs.
    63.7 + *
    63.8 + * Reads the ACPI SRAT table to figure out what memory belongs to which CPUs.
    63.9 + *
   63.10 + * Called from acpi_numa_init while reading the SRAT and SLIT tables.
   63.11 + * Assumes all memory regions belonging to a single proximity domain
   63.12 + * are in one chunk. Holes between them will be included in the node.
   63.13 + * 
   63.14 + * Adapted for Xen: Ryan Harper <ryanh@us.ibm.com>
   63.15 + */
   63.16 +
   63.17 +#include <xen/init.h>
   63.18 +#include <xen/mm.h>
   63.19 +#include <xen/inttypes.h>
   63.20 +#include <xen/nodemask.h>
   63.21 +#include <xen/acpi.h>
   63.22 +#include <xen/numa.h>
   63.23 +#include <asm/page.h>
   63.24 +
   63.25 +static struct acpi_table_slit *acpi_slit;
   63.26 +
   63.27 +static nodemask_t nodes_parsed __initdata;
   63.28 +static nodemask_t nodes_found __initdata;
   63.29 +static struct node nodes[MAX_NUMNODES] __initdata;
   63.30 +static u8 pxm2node[256] = { [0 ... 255] = 0xff };
   63.31 +
   63.32 +/* Too small nodes confuse the VM badly. Usually they result
   63.33 +   from BIOS bugs. */
   63.34 +#define NODE_MIN_SIZE (4*1024*1024)
   63.35 +
   63.36 +static int node_to_pxm(int n);
   63.37 +
   63.38 +int pxm_to_node(int pxm)
   63.39 +{
   63.40 +	if ((unsigned)pxm >= 256)
   63.41 +		return -1;
   63.42 +	/* Extend 0xff to (int)-1 */
   63.43 +	return (signed char)pxm2node[pxm];
   63.44 +}
   63.45 +
   63.46 +static __init int setup_node(int pxm)
   63.47 +{
   63.48 +	unsigned node = pxm2node[pxm];
   63.49 +	if (node == 0xff) {
   63.50 +		if (nodes_weight(nodes_found) >= MAX_NUMNODES)
   63.51 +			return -1;
   63.52 +		node = first_unset_node(nodes_found); 
   63.53 +		node_set(node, nodes_found);
   63.54 +		pxm2node[pxm] = node;
   63.55 +	}
   63.56 +	return pxm2node[pxm];
   63.57 +}
   63.58 +
   63.59 +static __init int conflicting_nodes(u64 start, u64 end)
   63.60 +{
   63.61 +	int i;
   63.62 +	for_each_node_mask(i, nodes_parsed) {
   63.63 +		struct node *nd = &nodes[i];
   63.64 +		if (nd->start == nd->end)
   63.65 +			continue;
   63.66 +		if (nd->end > start && nd->start < end)
   63.67 +			return i;
   63.68 +		if (nd->end == end && nd->start == start)
   63.69 +			return i;
   63.70 +	}
   63.71 +	return -1;
   63.72 +}
   63.73 +
   63.74 +static __init void cutoff_node(int i, u64 start, u64 end)
   63.75 +{
   63.76 +	struct node *nd = &nodes[i];
   63.77 +	if (nd->start < start) {
   63.78 +		nd->start = start;
   63.79 +		if (nd->end < nd->start)
   63.80 +			nd->start = nd->end;
   63.81 +	}
   63.82 +	if (nd->end > end) {
   63.83 +		nd->end = end;
   63.84 +		if (nd->start > nd->end)
   63.85 +			nd->start = nd->end;
   63.86 +	}
   63.87 +}
   63.88 +
   63.89 +static __init void bad_srat(void)
   63.90 +{
   63.91 +	int i;
   63.92 +	printk(KERN_ERR "SRAT: SRAT not used.\n");
   63.93 +	acpi_numa = -1;
   63.94 +	for (i = 0; i < MAX_LOCAL_APIC; i++)
   63.95 +		apicid_to_node[i] = NUMA_NO_NODE;
   63.96 +}
   63.97 +
   63.98 +static __init inline int srat_disabled(void)
   63.99 +{
  63.100 +	return numa_off || acpi_numa < 0;
  63.101 +}
  63.102 +
  63.103 +/*
  63.104 + * A lot of BIOS fill in 10 (= no distance) everywhere. This messes
  63.105 + * up the NUMA heuristics which wants the local node to have a smaller
  63.106 + * distance than the others.
  63.107 + * Do some quick checks here and only use the SLIT if it passes.
  63.108 + */
  63.109 +static __init int slit_valid(struct acpi_table_slit *slit)
  63.110 +{
  63.111 +	int i, j;
  63.112 +	int d = slit->localities;
  63.113 +	for (i = 0; i < d; i++) {
  63.114 +		for (j = 0; j < d; j++)  {
  63.115 +			u8 val = slit->entry[d*i + j];
  63.116 +			if (i == j) {
  63.117 +				if (val != 10)
  63.118 +					return 0;
  63.119 +			} else if (val <= 10)
  63.120 +				return 0;
  63.121 +		}
  63.122 +	}
  63.123 +	return 1;
  63.124 +}
  63.125 +
  63.126 +/* Callback for SLIT parsing */
  63.127 +void __init acpi_numa_slit_init(struct acpi_table_slit *slit)
  63.128 +{
  63.129 +	if (!slit_valid(slit)) {
  63.130 +		printk(KERN_INFO "ACPI: SLIT table looks invalid. Not used.\n");
  63.131 +		return;
  63.132 +	}
  63.133 +	acpi_slit = slit;
  63.134 +}
  63.135 +
  63.136 +/* Callback for Proximity Domain -> LAPIC mapping */
  63.137 +void __init
  63.138 +acpi_numa_processor_affinity_init(struct acpi_table_processor_affinity *pa)
  63.139 +{
  63.140 +	int pxm, node;
  63.141 +	if (srat_disabled())
  63.142 +		return;
  63.143 +	if (pa->header.length != sizeof(struct acpi_table_processor_affinity)) {		bad_srat();
  63.144 +		return;
  63.145 +	}
  63.146 +	if (pa->flags.enabled == 0)
  63.147 +		return;
  63.148 +	pxm = pa->proximity_domain;
  63.149 +	node = setup_node(pxm);
  63.150 +	if (node < 0) {
  63.151 +		printk(KERN_ERR "SRAT: Too many proximity domains %x\n", pxm);
  63.152 +		bad_srat();
  63.153 +		return;
  63.154 +	}
  63.155 +	apicid_to_node[pa->apic_id] = node;
  63.156 +	acpi_numa = 1;
  63.157 +	printk(KERN_INFO "SRAT: PXM %u -> APIC %u -> Node %u\n",
  63.158 +	       pxm, pa->apic_id, node);
  63.159 +}
  63.160 +
  63.161 +/* Callback for parsing of the Proximity Domain <-> Memory Area mappings */
  63.162 +void __init
  63.163 +acpi_numa_memory_affinity_init(struct acpi_table_memory_affinity *ma)
  63.164 +{
  63.165 +	struct node *nd;
  63.166 +	u64 start, end;
  63.167 +	int node, pxm;
  63.168 +	int i;
  63.169 +
  63.170 +	if (srat_disabled())
  63.171 +		return;
  63.172 +	if (ma->header.length != sizeof(struct acpi_table_memory_affinity)) {
  63.173 +		bad_srat();
  63.174 +		return;
  63.175 +	}
  63.176 +	if (ma->flags.enabled == 0)
  63.177 +		return;
  63.178 +	start = ma->base_addr_lo | ((u64)ma->base_addr_hi << 32);
  63.179 +	end = start + (ma->length_lo | ((u64)ma->length_hi << 32));
  63.180 +	pxm = ma->proximity_domain;
  63.181 +	node = setup_node(pxm);
  63.182 +	if (node < 0) {
  63.183 +		printk(KERN_ERR "SRAT: Too many proximity domains.\n");
  63.184 +		bad_srat();
  63.185 +		return;
  63.186 +	}
  63.187 +	/* It is fine to add this area to the nodes data it will be used later*/
  63.188 +	if (ma->flags.hot_pluggable == 1)
  63.189 +		printk(KERN_INFO "SRAT: hot plug zone found %"PRIx64" - %"PRIx64" \n",
  63.190 +				start, end);
  63.191 +	i = conflicting_nodes(start, end);
  63.192 +	if (i == node) {
  63.193 +		printk(KERN_WARNING
  63.194 +		"SRAT: Warning: PXM %d (%"PRIx64"-%"PRIx64") overlaps with itself (%"
  63.195 +		PRIx64"-%"PRIx64")\n", pxm, start, end, nodes[i].start, nodes[i].end);
  63.196 +	} else if (i >= 0) {
  63.197 +		printk(KERN_ERR
  63.198 +		       "SRAT: PXM %d (%"PRIx64"-%"PRIx64") overlaps with PXM %d (%"
  63.199 +		       PRIx64"-%"PRIx64")\n", pxm, start, end, node_to_pxm(i),
  63.200 +			   nodes[i].start, nodes[i].end);
  63.201 +		bad_srat();
  63.202 +		return;
  63.203 +	}
  63.204 +	nd = &nodes[node];
  63.205 +	if (!node_test_and_set(node, nodes_parsed)) {
  63.206 +		nd->start = start;
  63.207 +		nd->end = end;
  63.208 +	} else {
  63.209 +		if (start < nd->start)
  63.210 +			nd->start = start;
  63.211 +		if (nd->end < end)
  63.212 +			nd->end = end;
  63.213 +	}
  63.214 +	printk(KERN_INFO "SRAT: Node %u PXM %u %"PRIx64"-%"PRIx64"\n", node, pxm,
  63.215 +	       nd->start, nd->end);
  63.216 +}
  63.217 +
  63.218 +/* Sanity check to catch more bad SRATs (they are amazingly common).
  63.219 +   Make sure the PXMs cover all memory. */
  63.220 +static int nodes_cover_memory(void)
  63.221 +{
  63.222 +	int i;
  63.223 +	u64 pxmram, e820ram;
  63.224 +
  63.225 +	pxmram = 0;
  63.226 +	for_each_node_mask(i, nodes_parsed) {
  63.227 +		u64 s = nodes[i].start >> PAGE_SHIFT;
  63.228 +		u64 e = nodes[i].end >> PAGE_SHIFT;
  63.229 +		pxmram += e - s;
  63.230 +	}
  63.231 +
  63.232 +	e820ram = max_page;
  63.233 +	/* We seem to lose 3 pages somewhere. Allow a bit of slack. */
  63.234 +	if ((long)(e820ram - pxmram) >= 1*1024*1024) {
  63.235 +		printk(KERN_ERR "SRAT: PXMs only cover %"PRIu64"MB of your %"
  63.236 +			PRIu64"MB e820 RAM. Not used.\n",
  63.237 +			(pxmram << PAGE_SHIFT) >> 20,
  63.238 +			(e820ram << PAGE_SHIFT) >> 20);
  63.239 +		return 0;
  63.240 +	}
  63.241 +	return 1;
  63.242 +}
  63.243 +
  63.244 +static void unparse_node(int node)
  63.245 +{
  63.246 +	int i;
  63.247 +	node_clear(node, nodes_parsed);
  63.248 +	for (i = 0; i < MAX_LOCAL_APIC; i++) {
  63.249 +		if (apicid_to_node[i] == node)
  63.250 +			apicid_to_node[i] = NUMA_NO_NODE;
  63.251 +	}
  63.252 +}
  63.253 +
  63.254 +void __init acpi_numa_arch_fixup(void) {}
  63.255 +
  63.256 +/* Use the information discovered above to actually set up the nodes. */
  63.257 +int __init acpi_scan_nodes(u64 start, u64 end)
  63.258 +{
  63.259 +	int i;
  63.260 +
  63.261 +	/* First clean up the node list */
  63.262 +	for (i = 0; i < MAX_NUMNODES; i++) {
  63.263 +		cutoff_node(i, start, end);
  63.264 +		if ((nodes[i].end - nodes[i].start) < NODE_MIN_SIZE)
  63.265 +			unparse_node(i);
  63.266 +	}
  63.267 +
  63.268 +	if (acpi_numa <= 0)
  63.269 +		return -1;
  63.270 +
  63.271 +	if (!nodes_cover_memory()) {
  63.272 +		bad_srat();
  63.273 +		return -1;
  63.274 +	}
  63.275 +
  63.276 +	memnode_shift = compute_hash_shift(nodes, MAX_NUMNODES);
  63.277 +	if (memnode_shift < 0) {
  63.278 +		printk(KERN_ERR
  63.279 +		     "SRAT: No NUMA node hash function found. Contact maintainer\n");
  63.280 +		bad_srat();
  63.281 +		return -1;
  63.282 +	}
  63.283 +
  63.284 +	/* Finally register nodes */
  63.285 +	for_each_node_mask(i, nodes_parsed)
  63.286 +		setup_node_bootmem(i, nodes[i].start, nodes[i].end);
  63.287 +	for (i = 0; i < NR_CPUS; i++) { 
  63.288 +		if (cpu_to_node[i] == NUMA_NO_NODE)
  63.289 +			continue;
  63.290 +		if (!node_isset(cpu_to_node[i], nodes_parsed))
  63.291 +			numa_set_node(i, NUMA_NO_NODE);
  63.292 +	}
  63.293 +	numa_init_array();
  63.294 +	return 0;
  63.295 +}
  63.296 +
  63.297 +static int node_to_pxm(int n)
  63.298 +{
  63.299 +       int i;
  63.300 +       if (pxm2node[n] == n)
  63.301 +               return n;
  63.302 +       for (i = 0; i < 256; i++)
  63.303 +               if (pxm2node[i] == n)
  63.304 +                       return i;
  63.305 +       return 0;
  63.306 +}
  63.307 +
  63.308 +int __node_distance(int a, int b)
  63.309 +{
  63.310 +	int index;
  63.311 +
  63.312 +	if (!acpi_slit)
  63.313 +		return a == b ? 10 : 20;
  63.314 +	index = acpi_slit->localities * node_to_pxm(a);
  63.315 +	return acpi_slit->entry[index + node_to_pxm(b)];
  63.316 +}
  63.317 +
  63.318 +EXPORT_SYMBOL(__node_distance);
    64.1 --- a/xen/common/memory.c	Tue Oct 24 11:21:48 2006 -0600
    64.2 +++ b/xen/common/memory.c	Wed Oct 25 12:12:01 2006 -0600
    64.3 @@ -41,6 +41,8 @@ increase_reservation(
    64.4      struct page_info *page;
    64.5      unsigned long i;
    64.6      xen_pfn_t mfn;
    64.7 +    /* use domain's first processor for locality parameter */
    64.8 +    unsigned int cpu = d->vcpu[0]->processor;
    64.9  
   64.10      if ( !guest_handle_is_null(extent_list) &&
   64.11           !guest_handle_okay(extent_list, nr_extents) )
   64.12 @@ -58,8 +60,8 @@ increase_reservation(
   64.13              return i;
   64.14          }
   64.15  
   64.16 -        if ( unlikely((page = alloc_domheap_pages(
   64.17 -            d, extent_order, memflags)) == NULL) )
   64.18 +        if ( unlikely((page = __alloc_domheap_pages( d, cpu, 
   64.19 +            extent_order, memflags )) == NULL) ) 
   64.20          {
   64.21              DPRINTK("Could not allocate order=%d extent: "
   64.22                      "id=%d memflags=%x (%ld of %d)\n",
   64.23 @@ -92,6 +94,8 @@ populate_physmap(
   64.24      unsigned long i, j;
   64.25      xen_pfn_t gpfn;
   64.26      xen_pfn_t mfn;
   64.27 +    /* use domain's first processor for locality parameter */
   64.28 +    unsigned int cpu = d->vcpu[0]->processor;
   64.29  
   64.30      if ( !guest_handle_okay(extent_list, nr_extents) )
   64.31          return 0;
   64.32 @@ -111,8 +115,8 @@ populate_physmap(
   64.33          if ( unlikely(__copy_from_guest_offset(&gpfn, extent_list, i, 1)) )
   64.34              goto out;
   64.35  
   64.36 -        if ( unlikely((page = alloc_domheap_pages(
   64.37 -            d, extent_order, memflags)) == NULL) )
   64.38 +        if ( unlikely((page = __alloc_domheap_pages( d, cpu, 
   64.39 +            extent_order, memflags )) == NULL) ) 
   64.40          {
   64.41              DPRINTK("Could not allocate order=%d extent: "
   64.42                      "id=%d memflags=%x (%ld of %d)\n",
   64.43 @@ -294,7 +298,7 @@ memory_exchange(XEN_GUEST_HANDLE(xen_mem
   64.44      unsigned long in_chunk_order, out_chunk_order;
   64.45      xen_pfn_t     gpfn, gmfn, mfn;
   64.46      unsigned long i, j, k;
   64.47 -    unsigned int  memflags = 0;
   64.48 +    unsigned int  memflags = 0, cpu;
   64.49      long          rc = 0;
   64.50      struct domain *d;
   64.51      struct page_info *page;
   64.52 @@ -368,6 +372,9 @@ memory_exchange(XEN_GUEST_HANDLE(xen_mem
   64.53      }
   64.54      d = current->domain;
   64.55  
   64.56 +    /* use domain's first processor for locality parameter */
   64.57 +    cpu = d->vcpu[0]->processor;
   64.58 +
   64.59      for ( i = 0; i < (exch.in.nr_extents >> in_chunk_order); i++ )
   64.60      {
   64.61          if ( hypercall_preempt_check() )
   64.62 @@ -413,8 +420,8 @@ memory_exchange(XEN_GUEST_HANDLE(xen_mem
   64.63          /* Allocate a chunk's worth of anonymous output pages. */
   64.64          for ( j = 0; j < (1UL << out_chunk_order); j++ )
   64.65          {
   64.66 -            page = alloc_domheap_pages(
   64.67 -                NULL, exch.out.extent_order, memflags);
   64.68 +            page = __alloc_domheap_pages( NULL, cpu, 
   64.69 +                  exch.out.extent_order, memflags);
   64.70              if ( unlikely(page == NULL) )
   64.71              {
   64.72                  rc = -ENOMEM;
    65.1 --- a/xen/common/page_alloc.c	Tue Oct 24 11:21:48 2006 -0600
    65.2 +++ b/xen/common/page_alloc.c	Wed Oct 25 12:12:01 2006 -0600
    65.3 @@ -4,6 +4,7 @@
    65.4   * Simple buddy heap allocator for Xen.
    65.5   * 
    65.6   * Copyright (c) 2002-2004 K A Fraser
    65.7 + * Copyright (c) 2006 IBM Ryan Harper <ryanh@us.ibm.com>
    65.8   * 
    65.9   * This program is free software; you can redistribute it and/or modify
   65.10   * it under the terms of the GNU General Public License as published by
   65.11 @@ -33,6 +34,8 @@
   65.12  #include <xen/domain_page.h>
   65.13  #include <xen/keyhandler.h>
   65.14  #include <xen/perfc.h>
   65.15 +#include <xen/numa.h>
   65.16 +#include <xen/nodemask.h>
   65.17  #include <asm/page.h>
   65.18  
   65.19  /*
   65.20 @@ -247,22 +250,23 @@ unsigned long alloc_boot_pages(unsigned 
   65.21  #define pfn_dom_zone_type(_pfn)                                 \
   65.22      (((_pfn) <= MAX_DMADOM_PFN) ? MEMZONE_DMADOM : MEMZONE_DOM)
   65.23  
   65.24 -static struct list_head heap[NR_ZONES][MAX_ORDER+1];
   65.25 +static struct list_head heap[NR_ZONES][MAX_NUMNODES][MAX_ORDER+1];
   65.26  
   65.27 -static unsigned long avail[NR_ZONES];
   65.28 +static unsigned long avail[NR_ZONES][MAX_NUMNODES];
   65.29  
   65.30  static DEFINE_SPINLOCK(heap_lock);
   65.31  
   65.32  void end_boot_allocator(void)
   65.33  {
   65.34 -    unsigned long i, j;
   65.35 +    unsigned long i, j, k;
   65.36      int curr_free = 0, next_free = 0;
   65.37  
   65.38      memset(avail, 0, sizeof(avail));
   65.39  
   65.40      for ( i = 0; i < NR_ZONES; i++ )
   65.41 -        for ( j = 0; j <= MAX_ORDER; j++ )
   65.42 -            INIT_LIST_HEAD(&heap[i][j]);
   65.43 +        for ( j = 0; j < MAX_NUMNODES; j++ )
   65.44 +            for ( k = 0; k <= MAX_ORDER; k++ )
   65.45 +                INIT_LIST_HEAD(&heap[i][j][k]);
   65.46  
   65.47      /* Pages that are free now go to the domain sub-allocator. */
   65.48      for ( i = 0; i < max_page; i++ )
   65.49 @@ -272,29 +276,59 @@ void end_boot_allocator(void)
   65.50          if ( next_free )
   65.51              map_alloc(i+1, 1); /* prevent merging in free_heap_pages() */
   65.52          if ( curr_free )
   65.53 -            free_heap_pages(pfn_dom_zone_type(i), mfn_to_page(i), 0);
   65.54 +            init_heap_pages(pfn_dom_zone_type(i), mfn_to_page(i), 1);
   65.55      }
   65.56  }
   65.57  
   65.58 -/* Hand the specified arbitrary page range to the specified heap zone. */
   65.59 +/* 
   65.60 + * Hand the specified arbitrary page range to the specified heap zone
   65.61 + * checking the node_id of the previous page.  If they differ and the
   65.62 + * latter is not on a MAX_ORDER boundary, then we reserve the page by
   65.63 + * not freeing it to the buddy allocator.
   65.64 + */
   65.65 +#define MAX_ORDER_ALIGNED (1UL << (MAX_ORDER))
   65.66  void init_heap_pages(
   65.67      unsigned int zone, struct page_info *pg, unsigned long nr_pages)
   65.68  {
   65.69 +    unsigned int nid_curr,nid_prev;
   65.70      unsigned long i;
   65.71  
   65.72      ASSERT(zone < NR_ZONES);
   65.73  
   65.74 +    if ( likely(page_to_mfn(pg) != 0) )
   65.75 +        nid_prev = phys_to_nid(page_to_maddr(pg-1));
   65.76 +    else
   65.77 +        nid_prev = phys_to_nid(page_to_maddr(pg));
   65.78 +
   65.79      for ( i = 0; i < nr_pages; i++ )
   65.80 -        free_heap_pages(zone, pg+i, 0);
   65.81 +    {
   65.82 +        nid_curr = phys_to_nid(page_to_maddr(pg+i));
   65.83 +
   65.84 +        /*
   65.85 +         * free pages of the same node, or if they differ, but are on a
   65.86 +         * MAX_ORDER alignement boundary (which already get reserved)
   65.87 +         */
   65.88 +         if ( (nid_curr == nid_prev) || (page_to_maddr(pg+i) &
   65.89 +                                         MAX_ORDER_ALIGNED) )
   65.90 +             free_heap_pages(zone, pg+i, 0);
   65.91 +         else
   65.92 +             printk("Reserving non-aligned node boundary @ mfn %lu\n",
   65.93 +                    page_to_mfn(pg+i));
   65.94 +
   65.95 +        nid_prev = nid_curr;
   65.96 +    }
   65.97  }
   65.98  
   65.99 -
  65.100  /* Allocate 2^@order contiguous pages. */
  65.101 -struct page_info *alloc_heap_pages(unsigned int zone, unsigned int order)
  65.102 +struct page_info *alloc_heap_pages(unsigned int zone, unsigned int cpu,
  65.103 +                                   unsigned int order)
  65.104  {
  65.105 -    int i;
  65.106 +    unsigned int i,j, node = cpu_to_node(cpu), num_nodes = num_online_nodes();
  65.107 +    unsigned int request = (1UL << order);
  65.108      struct page_info *pg;
  65.109  
  65.110 +    ASSERT(node >= 0);
  65.111 +    ASSERT(node < num_nodes);
  65.112      ASSERT(zone < NR_ZONES);
  65.113  
  65.114      if ( unlikely(order > MAX_ORDER) )
  65.115 @@ -302,29 +336,46 @@ struct page_info *alloc_heap_pages(unsig
  65.116  
  65.117      spin_lock(&heap_lock);
  65.118  
  65.119 -    /* Find smallest order which can satisfy the request. */
  65.120 -    for ( i = order; i <= MAX_ORDER; i++ )
  65.121 -        if ( !list_empty(&heap[zone][i]) )
  65.122 -            goto found;
  65.123 +    /* start with requested node, but exhaust all node memory
  65.124 +     * in requested zone before failing, only calc new node
  65.125 +     * value if we fail to find memory in target node, this avoids
  65.126 +     * needless computation on fast-path */
  65.127 +    for ( i = 0; i < num_nodes; i++ )
  65.128 +    {
  65.129 +        /* check if target node can support the allocation */
  65.130 +        if ( avail[zone][node] >= request )
  65.131 +        {
  65.132 +            /* Find smallest order which can satisfy the request. */
  65.133 +            for ( j = order; j <= MAX_ORDER; j++ )
  65.134 +            {
  65.135 +                if ( !list_empty(&heap[zone][node][j]) )
  65.136 +                    goto found;
  65.137 +            }
  65.138 +        }
  65.139 +        /* pick next node, wrapping around if needed */
  65.140 +        if ( ++node == num_nodes )
  65.141 +            node = 0;
  65.142 +    }
  65.143  
  65.144      /* No suitable memory blocks. Fail the request. */
  65.145      spin_unlock(&heap_lock);
  65.146      return NULL;
  65.147  
  65.148   found: 
  65.149 -    pg = list_entry(heap[zone][i].next, struct page_info, list);
  65.150 +    pg = list_entry(heap[zone][node][j].next, struct page_info, list);
  65.151      list_del(&pg->list);
  65.152  
  65.153      /* We may have to halve the chunk a number of times. */
  65.154 -    while ( i != order )
  65.155 +    while ( j != order )
  65.156      {
  65.157 -        PFN_ORDER(pg) = --i;
  65.158 -        list_add_tail(&pg->list, &heap[zone][i]);
  65.159 -        pg += 1 << i;
  65.160 +        PFN_ORDER(pg) = --j;
  65.161 +        list_add_tail(&pg->list, &heap[zone][node][j]);
  65.162 +        pg += 1 << j;
  65.163      }
  65.164      
  65.165 -    map_alloc(page_to_mfn(pg), 1 << order);
  65.166 -    avail[zone] -= 1 << order;
  65.167 +    map_alloc(page_to_mfn(pg), request);
  65.168 +    ASSERT(avail[zone][node] >= request);
  65.169 +    avail[zone][node] -= request;
  65.170  
  65.171      spin_unlock(&heap_lock);
  65.172  
  65.173 @@ -337,14 +388,17 @@ void free_heap_pages(
  65.174      unsigned int zone, struct page_info *pg, unsigned int order)
  65.175  {
  65.176      unsigned long mask;
  65.177 +    int node = phys_to_nid(page_to_maddr(pg));
  65.178  
  65.179      ASSERT(zone < NR_ZONES);
  65.180      ASSERT(order <= MAX_ORDER);
  65.181 +    ASSERT(node >= 0);
  65.182 +    ASSERT(node < num_online_nodes());
  65.183  
  65.184      spin_lock(&heap_lock);
  65.185  
  65.186      map_free(page_to_mfn(pg), 1 << order);
  65.187 -    avail[zone] += 1 << order;
  65.188 +    avail[zone][node] += 1 << order;
  65.189      
  65.190      /* Merge chunks as far as possible. */
  65.191      while ( order < MAX_ORDER )
  65.192 @@ -370,10 +424,13 @@ void free_heap_pages(
  65.193          }
  65.194          
  65.195          order++;
  65.196 +
  65.197 +        /* after merging, pg should be in the same node */
  65.198 +        ASSERT(phys_to_nid(page_to_maddr(pg)) == node );
  65.199      }
  65.200  
  65.201      PFN_ORDER(pg) = order;
  65.202 -    list_add_tail(&pg->list, &heap[zone][order]);
  65.203 +    list_add_tail(&pg->list, &heap[zone][node][order]);
  65.204  
  65.205      spin_unlock(&heap_lock);
  65.206  }
  65.207 @@ -466,7 +523,7 @@ void *alloc_xenheap_pages(unsigned int o
  65.208      int i;
  65.209  
  65.210      local_irq_save(flags);
  65.211 -    pg = alloc_heap_pages(MEMZONE_XEN, order);
  65.212 +    pg = alloc_heap_pages(MEMZONE_XEN, smp_processor_id(), order);
  65.213      local_irq_restore(flags);
  65.214  
  65.215      if ( unlikely(pg == NULL) )
  65.216 @@ -580,8 +637,9 @@ int assign_pages(
  65.217  }
  65.218  
  65.219  
  65.220 -struct page_info *alloc_domheap_pages(
  65.221 -    struct domain *d, unsigned int order, unsigned int memflags)
  65.222 +struct page_info *__alloc_domheap_pages(
  65.223 +    struct domain *d, unsigned int cpu, unsigned int order, 
  65.224 +    unsigned int memflags)
  65.225  {
  65.226      struct page_info *pg = NULL;
  65.227      cpumask_t mask;
  65.228 @@ -591,17 +649,17 @@ struct page_info *alloc_domheap_pages(
  65.229  
  65.230      if ( !(memflags & MEMF_dma) )
  65.231      {
  65.232 -        pg = alloc_heap_pages(MEMZONE_DOM, order);
  65.233 +        pg = alloc_heap_pages(MEMZONE_DOM, cpu, order);
  65.234          /* Failure? Then check if we can fall back to the DMA pool. */
  65.235          if ( unlikely(pg == NULL) &&
  65.236               ((order > MAX_ORDER) ||
  65.237 -              (avail[MEMZONE_DMADOM] <
  65.238 +              (avail_heap_pages(MEMZONE_DMADOM,-1) <
  65.239                 (lowmem_emergency_pool_pages + (1UL << order)))) )
  65.240              return NULL;
  65.241      }
  65.242  
  65.243      if ( pg == NULL )
  65.244 -        if ( (pg = alloc_heap_pages(MEMZONE_DMADOM, order)) == NULL )
  65.245 +        if ( (pg = alloc_heap_pages(MEMZONE_DMADOM, cpu, order)) == NULL )
  65.246              return NULL;
  65.247  
  65.248      mask = pg->u.free.cpumask;
  65.249 @@ -640,6 +698,11 @@ struct page_info *alloc_domheap_pages(
  65.250      return pg;
  65.251  }
  65.252  
  65.253 +inline struct page_info *alloc_domheap_pages(
  65.254 +    struct domain *d, unsigned int order, unsigned int flags)
  65.255 +{
  65.256 +    return __alloc_domheap_pages(d, smp_processor_id(), order, flags);
  65.257 +}
  65.258  
  65.259  void free_domheap_pages(struct page_info *pg, unsigned int order)
  65.260  {
  65.261 @@ -714,13 +777,27 @@ void free_domheap_pages(struct page_info
  65.262  }
  65.263  
  65.264  
  65.265 +unsigned long avail_heap_pages(int zone, int node)
  65.266 +{
  65.267 +    int i,j, num_nodes = num_online_nodes();
  65.268 +    unsigned long free_pages = 0;
  65.269 +   
  65.270 +    for (i=0; i<NR_ZONES; i++)
  65.271 +        if ( (zone == -1) || (zone == i) )
  65.272 +            for (j=0; j < num_nodes; j++)
  65.273 +                if ( (node == -1) || (node == j) )
  65.274 +                    free_pages += avail[i][j];            
  65.275 +
  65.276 +    return free_pages;
  65.277 +}
  65.278 +
  65.279  unsigned long avail_domheap_pages(void)
  65.280  {
  65.281      unsigned long avail_nrm, avail_dma;
  65.282 +    
  65.283 +    avail_nrm = avail_heap_pages(MEMZONE_DOM,-1);
  65.284  
  65.285 -    avail_nrm = avail[MEMZONE_DOM];
  65.286 -
  65.287 -    avail_dma = avail[MEMZONE_DMADOM];
  65.288 +    avail_dma = avail_heap_pages(MEMZONE_DMADOM,-1);
  65.289      if ( avail_dma > lowmem_emergency_pool_pages )
  65.290          avail_dma -= lowmem_emergency_pool_pages;
  65.291      else
  65.292 @@ -729,6 +806,10 @@ unsigned long avail_domheap_pages(void)
  65.293      return avail_nrm + avail_dma;
  65.294  }
  65.295  
  65.296 +unsigned long avail_nodeheap_pages(int node)
  65.297 +{
  65.298 +    return avail_heap_pages(-1, node);
  65.299 +}
  65.300  
  65.301  static void pagealloc_keyhandler(unsigned char key)
  65.302  {
  65.303 @@ -736,9 +817,9 @@ static void pagealloc_keyhandler(unsigne
  65.304      printk("    Xen heap: %lukB free\n"
  65.305             "    DMA heap: %lukB free\n"
  65.306             "    Dom heap: %lukB free\n",
  65.307 -           avail[MEMZONE_XEN]<<(PAGE_SHIFT-10),
  65.308 -           avail[MEMZONE_DMADOM]<<(PAGE_SHIFT-10),
  65.309 -           avail[MEMZONE_DOM]<<(PAGE_SHIFT-10));
  65.310 +           avail_heap_pages(MEMZONE_XEN, -1) << (PAGE_SHIFT-10), 
  65.311 +           avail_heap_pages(MEMZONE_DMADOM, -1) <<(PAGE_SHIFT-10), 
  65.312 +           avail_heap_pages(MEMZONE_DOM, -1) <<(PAGE_SHIFT-10));
  65.313  }
  65.314  
  65.315  
  65.316 @@ -806,6 +887,46 @@ unsigned long avail_scrub_pages(void)
  65.317      return scrub_pages;
  65.318  }
  65.319  
  65.320 +static unsigned long count_bucket(struct list_head* l, int order)
  65.321 +{
  65.322 +    unsigned long total_pages = 0;
  65.323 +    int pages = 1 << order;
  65.324 +    struct page_info *pg;
  65.325 +
  65.326 +    list_for_each_entry(pg, l, list)
  65.327 +        total_pages += pages;
  65.328 +
  65.329 +    return total_pages;
  65.330 +}
  65.331 +
  65.332 +static void dump_heap(unsigned char key)
  65.333 +{
  65.334 +    s_time_t       now = NOW();
  65.335 +    int i,j,k;
  65.336 +    unsigned long total;
  65.337 +
  65.338 +    printk("'%c' pressed -> dumping heap info (now-0x%X:%08X)\n", key,
  65.339 +           (u32)(now>>32), (u32)now);
  65.340 +
  65.341 +    for (i=0; i<NR_ZONES; i++ )
  65.342 +        for (j=0;j<MAX_NUMNODES;j++)
  65.343 +            for (k=0;k<=MAX_ORDER;k++)
  65.344 +                if ( !list_empty(&heap[i][j][k]) )
  65.345 +                {
  65.346 +                    total = count_bucket(&heap[i][j][k], k);
  65.347 +                    printk("heap[%d][%d][%d]-> %lu pages\n",
  65.348 +                            i, j, k, total);
  65.349 +                }
  65.350 +}
  65.351 +
  65.352 +static __init int register_heap_trigger(void)
  65.353 +{
  65.354 +    register_keyhandler('H', dump_heap, "dump heap info");
  65.355 +    return 0;
  65.356 +}
  65.357 +__initcall(register_heap_trigger);
  65.358 +
  65.359 +
  65.360  static __init int page_scrub_init(void)
  65.361  {
  65.362      open_softirq(PAGE_SCRUB_SOFTIRQ, page_scrub_softirq);
    66.1 --- a/xen/drivers/acpi/Makefile	Tue Oct 24 11:21:48 2006 -0600
    66.2 +++ b/xen/drivers/acpi/Makefile	Wed Oct 25 12:12:01 2006 -0600
    66.3 @@ -1,1 +1,2 @@
    66.4  obj-y += tables.o
    66.5 +obj-y += numa.o
    67.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
    67.2 +++ b/xen/drivers/acpi/numa.c	Wed Oct 25 12:12:01 2006 -0600
    67.3 @@ -0,0 +1,216 @@
    67.4 +/*
    67.5 + *  acpi_numa.c - ACPI NUMA support
    67.6 + *
    67.7 + *  Copyright (C) 2002 Takayoshi Kochi <t-kochi@bq.jp.nec.com>
    67.8 + *
    67.9 + * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
   67.10 + *
   67.11 + *  This program is free software; you can redistribute it and/or modify
   67.12 + *  it under the terms of the GNU General Public License as published by
   67.13 + *  the Free Software Foundation; either version 2 of the License, or
   67.14 + *  (at your option) any later version.
   67.15 + *
   67.16 + *  This program is distributed in the hope that it will be useful,
   67.17 + *  but WITHOUT ANY WARRANTY; without even the implied warranty of
   67.18 + *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
   67.19 + *  GNU General Public License for more details.
   67.20 + *
   67.21 + *  You should have received a copy of the GNU General Public License
   67.22 + *  along with this program; if not, write to the Free Software
   67.23 + *  Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
   67.24 + *
   67.25 + * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
   67.26 + *
   67.27 + */
   67.28 +#if 0
   67.29 +#include <linux/module.h>
   67.30 +#include <linux/kernel.h>
   67.31 +#endif
   67.32 +#include <xen/config.h>
   67.33 +#include <xen/init.h>
   67.34 +#include <xen/types.h>
   67.35 +#include <xen/errno.h>
   67.36 +#include <xen/acpi.h>
   67.37 +#include <xen/numa.h>
   67.38 +#include <acpi/acpi_bus.h>
   67.39 +#include <acpi/acmacros.h>
   67.40 +#include <asm/page.h> /* __va() */
   67.41 +
   67.42 +#define ACPI_NUMA	0x80000000
   67.43 +#define _COMPONENT	ACPI_NUMA
   67.44 +ACPI_MODULE_NAME("numa")
   67.45 +
   67.46 +extern int __init acpi_table_parse_madt_family(enum acpi_table_id id,
   67.47 +					       unsigned long madt_size,
   67.48 +					       int entry_id,
   67.49 +					       acpi_madt_entry_handler handler,
   67.50 +					       unsigned int max_entries);
   67.51 +
   67.52 +void __init acpi_table_print_srat_entry(acpi_table_entry_header * header)
   67.53 +{
   67.54 +
   67.55 +	ACPI_FUNCTION_NAME("acpi_table_print_srat_entry");
   67.56 +
   67.57 +	if (!header)
   67.58 +		return;
   67.59 +
   67.60 +	switch (header->type) {
   67.61 +
   67.62 +	case ACPI_SRAT_PROCESSOR_AFFINITY:
   67.63 +#ifdef ACPI_DEBUG_OUTPUT
   67.64 +		{
   67.65 +			struct acpi_table_processor_affinity *p =
   67.66 +			    (struct acpi_table_processor_affinity *)header;
   67.67 +			ACPI_DEBUG_PRINT((ACPI_DB_INFO,
   67.68 +					  "SRAT Processor (id[0x%02x] eid[0x%02x]) in proximity domain %d %s\n",
   67.69 +					  p->apic_id, p->lsapic_eid,
   67.70 +					  p->proximity_domain,
   67.71 +					  p->flags.
   67.72 +					  enabled ? "enabled" : "disabled"));
   67.73 +		}
   67.74 +#endif				/* ACPI_DEBUG_OUTPUT */
   67.75 +		break;
   67.76 +
   67.77 +	case ACPI_SRAT_MEMORY_AFFINITY:
   67.78 +#ifdef ACPI_DEBUG_OUTPUT
   67.79 +		{
   67.80 +			struct acpi_table_memory_affinity *p =
   67.81 +			    (struct acpi_table_memory_affinity *)header;
   67.82 +			ACPI_DEBUG_PRINT((ACPI_DB_INFO,
   67.83 +					  "SRAT Memory (0x%08x%08x length 0x%08x%08x type 0x%x) in proximity domain %d %s%s\n",
   67.84 +					  p->base_addr_hi, p->base_addr_lo,
   67.85 +					  p->length_hi, p->length_lo,
   67.86 +					  p->memory_type, p->proximity_domain,
   67.87 +					  p->flags.
   67.88 +					  enabled ? "enabled" : "disabled",
   67.89 +					  p->flags.
   67.90 +					  hot_pluggable ? " hot-pluggable" :
   67.91 +					  ""));
   67.92 +		}
   67.93 +#endif				/* ACPI_DEBUG_OUTPUT */
   67.94 +		break;
   67.95 +
   67.96 +	default:
   67.97 +		printk(KERN_WARNING PREFIX
   67.98 +		       "Found unsupported SRAT entry (type = 0x%x)\n",
   67.99 +		       header->type);
  67.100 +		break;
  67.101 +	}
  67.102 +}
  67.103 +
  67.104 +static int __init acpi_parse_slit(unsigned long phys_addr, unsigned long size)
  67.105 +{
  67.106 +	struct acpi_table_slit *slit;
  67.107 +	u32 localities;
  67.108 +
  67.109 +	if (!phys_addr || !size)
  67.110 +		return -EINVAL;
  67.111 +
  67.112 +	slit = (struct acpi_table_slit *)__va(phys_addr);
  67.113 +
  67.114 +	/* downcast just for %llu vs %lu for i386/ia64  */
  67.115 +	localities = (u32) slit->localities;
  67.116 +
  67.117 +	acpi_numa_slit_init(slit);
  67.118 +
  67.119 +	return 0;
  67.120 +}
  67.121 +
  67.122 +static int __init
  67.123 +acpi_parse_processor_affinity(acpi_table_entry_header * header,
  67.124 +			      const unsigned long end)
  67.125 +{
  67.126 +	struct acpi_table_processor_affinity *processor_affinity;
  67.127 +
  67.128 +	processor_affinity = (struct acpi_table_processor_affinity *)header;
  67.129 +	if (!processor_affinity)
  67.130 +		return -EINVAL;
  67.131 +
  67.132 +	acpi_table_print_srat_entry(header);
  67.133 +
  67.134 +	/* let architecture-dependent part to do it */
  67.135 +	acpi_numa_processor_affinity_init(processor_affinity);
  67.136 +
  67.137 +	return 0;
  67.138 +}
  67.139 +
  67.140 +static int __init
  67.141 +acpi_parse_memory_affinity(acpi_table_entry_header * header,
  67.142 +			   const unsigned long end)
  67.143 +{
  67.144 +	struct acpi_table_memory_affinity *memory_affinity;
  67.145 +
  67.146 +	memory_affinity = (struct acpi_table_memory_affinity *)header;
  67.147 +	if (!memory_affinity)
  67.148 +		return -EINVAL;
  67.149 +
  67.150 +	acpi_table_print_srat_entry(header);
  67.151 +
  67.152 +	/* let architecture-dependent part to do it */
  67.153 +	acpi_numa_memory_affinity_init(memory_affinity);
  67.154 +
  67.155 +	return 0;
  67.156 +}
  67.157 +
  67.158 +static int __init acpi_parse_srat(unsigned long phys_addr, unsigned long size)
  67.159 +{
  67.160 +	struct acpi_table_srat *srat;
  67.161 +
  67.162 +	if (!phys_addr || !size)
  67.163 +		return -EINVAL;
  67.164 +
  67.165 +	srat = (struct acpi_table_srat *)__va(phys_addr);
  67.166 +
  67.167 +	return 0;
  67.168 +}
  67.169 +
  67.170 +int __init
  67.171 +acpi_table_parse_srat(enum acpi_srat_entry_id id,
  67.172 +		      acpi_madt_entry_handler handler, unsigned int max_entries)
  67.173 +{
  67.174 +	return acpi_table_parse_madt_family(ACPI_SRAT,
  67.175 +					    sizeof(struct acpi_table_srat), id,
  67.176 +					    handler, max_entries);
  67.177 +}
  67.178 +
  67.179 +int __init acpi_numa_init(void)
  67.180 +{
  67.181 +	int result;
  67.182 +
  67.183 +	/* SRAT: Static Resource Affinity Table */
  67.184 +	result = acpi_table_parse(ACPI_SRAT, acpi_parse_srat);
  67.185 +
  67.186 +	if (result > 0) {
  67.187 +		result = acpi_table_parse_srat(ACPI_SRAT_PROCESSOR_AFFINITY,
  67.188 +					       acpi_parse_processor_affinity,
  67.189 +					       NR_CPUS);
  67.190 +		result = acpi_table_parse_srat(ACPI_SRAT_MEMORY_AFFINITY, acpi_parse_memory_affinity, NR_NODE_MEMBLKS);	// IA64 specific
  67.191 +	}
  67.192 +
  67.193 +	/* SLIT: System Locality Information Table */
  67.194 +	result = acpi_table_parse(ACPI_SLIT, acpi_parse_slit);
  67.195 +
  67.196 +	acpi_numa_arch_fixup();
  67.197 +	return 0;
  67.198 +}
  67.199 +
  67.200 +#if 0
  67.201 +int acpi_get_pxm(acpi_handle h)
  67.202 +{
  67.203 +	unsigned long pxm;
  67.204 +	acpi_status status;
  67.205 +	acpi_handle handle;
  67.206 +	acpi_handle phandle = h;
  67.207 +
  67.208 +	do {
  67.209 +		handle = phandle;
  67.210 +		status = acpi_evaluate_integer(handle, "_PXM", NULL, &pxm);
  67.211 +		if (ACPI_SUCCESS(status))
  67.212 +			return (int)pxm;
  67.213 +		status = acpi_get_parent(handle, &phandle);
  67.214 +	} while (ACPI_SUCCESS(status));
  67.215 +	return -1;
  67.216 +}
  67.217 +
  67.218 +EXPORT_SYMBOL(acpi_get_pxm);
  67.219 +#endif
    68.1 --- a/xen/include/asm-ia64/vmx_platform.h	Tue Oct 24 11:21:48 2006 -0600
    68.2 +++ b/xen/include/asm-ia64/vmx_platform.h	Wed Oct 25 12:12:01 2006 -0600
    68.3 @@ -24,6 +24,8 @@
    68.4  #include <asm/hvm/vioapic.h>
    68.5  struct mmio_list;
    68.6  typedef struct virtual_platform_def {
    68.7 +    unsigned long          buffered_io_va;
    68.8 +    spinlock_t             buffered_io_lock;
    68.9      unsigned long       shared_page_va;
   68.10      unsigned long       pib_base;
   68.11      unsigned char       xtp;
    69.1 --- a/xen/include/asm-x86/acpi.h	Tue Oct 24 11:21:48 2006 -0600
    69.2 +++ b/xen/include/asm-x86/acpi.h	Wed Oct 25 12:12:01 2006 -0600
    69.3 @@ -157,6 +157,9 @@ static inline void check_acpi_pci(void) 
    69.4  
    69.5  static inline void acpi_noirq_set(void) { acpi_noirq = 1; }
    69.6  static inline int acpi_irq_balance_set(char *str) { return 0; }
    69.7 +extern int acpi_scan_nodes(u64 start, u64 end);
    69.8 +extern int acpi_numa;
    69.9 +#define NR_NODE_MEMBLKS (MAX_NUMNODES*2)
   69.10  
   69.11  #ifdef CONFIG_ACPI_SLEEP
   69.12  
   69.13 @@ -173,5 +176,6 @@ extern void acpi_reserve_bootmem(void);
   69.14  #endif /*CONFIG_ACPI_SLEEP*/
   69.15  
   69.16  extern u8 x86_acpiid_to_apicid[];
   69.17 +#define MAX_LOCAL_APIC 256
   69.18  
   69.19  #endif /*_ASM_ACPI_H*/
    70.1 --- a/xen/include/asm-x86/config.h	Tue Oct 24 11:21:48 2006 -0600
    70.2 +++ b/xen/include/asm-x86/config.h	Wed Oct 25 12:12:01 2006 -0600
    70.3 @@ -24,6 +24,11 @@
    70.4  #define CONFIG_X86_IO_APIC 1
    70.5  #define CONFIG_HPET_TIMER 1
    70.6  #define CONFIG_X86_MCE_P4THERMAL 1
    70.7 +#define CONFIG_ACPI_NUMA 1
    70.8 +#define CONFIG_NUMA 1
    70.9 +#define CONFIG_ACPI_SRAT 1
   70.10 +#define CONFIG_DISCONTIGMEM 1
   70.11 +#define CONFIG_NUMA_EMU 1
   70.12  
   70.13  /* Intel P4 currently has largest cache line (L2 line size is 128 bytes). */
   70.14  #define CONFIG_X86_L1_CACHE_SHIFT 7
    71.1 --- a/xen/include/asm-x86/hvm/domain.h	Tue Oct 24 11:21:48 2006 -0600
    71.2 +++ b/xen/include/asm-x86/hvm/domain.h	Wed Oct 25 12:12:01 2006 -0600
    71.3 @@ -23,7 +23,7 @@
    71.4  #define __ASM_X86_HVM_DOMAIN_H__
    71.5  
    71.6  #include <asm/hvm/vpic.h>
    71.7 -#include <asm/hvm/vpit.h>
    71.8 +#include <asm/hvm/vpt.h>
    71.9  #include <asm/hvm/vlapic.h>
   71.10  #include <asm/hvm/vioapic.h>
   71.11  #include <public/hvm/params.h>
    72.1 --- a/xen/include/asm-x86/hvm/vpit.h	Tue Oct 24 11:21:48 2006 -0600
    72.2 +++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
    72.3 @@ -1,139 +0,0 @@
    72.4 -/*
    72.5 - * vpit.h: Virtual PIT definitions
    72.6 - *
    72.7 - * Copyright (c) 2004, Intel Corporation.
    72.8 - *
    72.9 - * This program is free software; you can redistribute it and/or modify it
   72.10 - * under the terms and conditions of the GNU General Public License,
   72.11 - * version 2, as published by the Free Software Foundation.
   72.12 - *
   72.13 - * This program is distributed in the hope it will be useful, but WITHOUT
   72.14 - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
   72.15 - * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
   72.16 - * more details.
   72.17 - *
   72.18 - * You should have received a copy of the GNU General Public License along with
   72.19 - * this program; if not, write to the Free Software Foundation, Inc., 59 Temple
   72.20 - * Place - Suite 330, Boston, MA 02111-1307 USA.
   72.21 - */
   72.22 -
   72.23 -#ifndef __ASM_X86_HVM_VPIT_H__
   72.24 -#define __ASM_X86_HVM_VPIT_H__
   72.25 -
   72.26 -#include <xen/config.h>
   72.27 -#include <xen/init.h>
   72.28 -#include <xen/lib.h>
   72.29 -#include <xen/time.h>
   72.30 -#include <xen/errno.h>
   72.31 -#include <xen/time.h>
   72.32 -#include <xen/timer.h>
   72.33 -#include <asm/hvm/vpic.h>
   72.34 -
   72.35 -#define PIT_FREQ 1193181
   72.36 -#define PIT_BASE 0x40
   72.37 -
   72.38 -typedef struct PITChannelState {
   72.39 -    int count; /* can be 65536 */
   72.40 -    u16 latched_count;
   72.41 -    u8 count_latched;
   72.42 -    u8 status_latched;
   72.43 -    u8 status;
   72.44 -    u8 read_state;
   72.45 -    u8 write_state;
   72.46 -    u8 write_latch;
   72.47 -    u8 rw_mode;
   72.48 -    u8 mode;
   72.49 -    u8 bcd; /* not supported */
   72.50 -    u8 gate; /* timer start */
   72.51 -    s64 count_load_time;
   72.52 -    /* irq handling */
   72.53 -    struct vcpu      *vcpu;
   72.54 -    struct periodic_time *pt;
   72.55 -} PITChannelState;
   72.56 -
   72.57 -typedef struct PITState {
   72.58 -    PITChannelState channels[3];
   72.59 -    int speaker_data_on;
   72.60 -    int dummy_refresh_clock;
   72.61 -} PITState;
   72.62 -
   72.63 -#define RTC_SIZE 14
   72.64 -typedef struct RTCState {
   72.65 -    uint8_t cmos_data[RTC_SIZE];  /* Only handle time/interrupt part in HV */
   72.66 -    uint8_t cmos_index;
   72.67 -    struct tm current_tm;
   72.68 -    int irq;
   72.69 -    /* second update */
   72.70 -    int64_t next_second_time;
   72.71 -    struct timer second_timer;
   72.72 -    struct timer second_timer2;
   72.73 -    struct vcpu      *vcpu;
   72.74 -    struct periodic_time *pt;
   72.75 -} RTCState;
   72.76 -   
   72.77 -/*
   72.78 - * Abstract layer of periodic time, one short time.
   72.79 - */
   72.80 -typedef void time_cb(struct vcpu *v, void *opaque);
   72.81 -
   72.82 -struct periodic_time {
   72.83 -    char enabled;               /* enabled */
   72.84 -    char one_shot;              /* one shot time */
   72.85 -    char irq;
   72.86 -    char first_injected;        /* flag to prevent shadow window */
   72.87 -    u32 pending_intr_nr;        /* the couner for pending timer interrupts */
   72.88 -    u32 period;                 /* frequency in ns */
   72.89 -    u64 period_cycles;          /* frequency in cpu cycles */
   72.90 -    s_time_t scheduled;         /* scheduled timer interrupt */
   72.91 -    u64 last_plt_gtime;         /* platform time when last IRQ is injected */
   72.92 -    struct timer timer;         /* ac_timer */
   72.93 -    time_cb *cb;
   72.94 -    void *priv;                 /* ponit back to platform time source */
   72.95 -};
   72.96 -
   72.97 -struct pl_time {    /* platform time */
   72.98 -    struct periodic_time periodic_tm;
   72.99 -    struct PITState      vpit;
  72.100 -    struct RTCState      vrtc;
  72.101 -    /* TODO: ACPI time */
  72.102 -};
  72.103 -
  72.104 -static __inline__ s_time_t get_scheduled(
  72.105 -    struct vcpu *v, int irq,
  72.106 -    struct periodic_time *pt)
  72.107 -{
  72.108 -    if ( is_irq_enabled(v, irq) ) {
  72.109 -        return pt->scheduled;
  72.110 -    }
  72.111 -    else
  72.112 -        return -1;
  72.113 -}
  72.114 -
  72.115 -extern u64 hvm_get_guest_time(struct vcpu *v);
  72.116 -/*
  72.117 - * get processor time.
  72.118 - * unit: TSC
  72.119 - */
  72.120 -static __inline__ int64_t hvm_get_clock(struct vcpu *v)
  72.121 -{
  72.122 -    uint64_t  gtsc;
  72.123 -
  72.124 -    gtsc = hvm_get_guest_time(v);
  72.125 -    return gtsc;
  72.126 -}
  72.127 -
  72.128 -#define ticks_per_sec(v)      (v->domain->arch.hvm_domain.tsc_frequency)
  72.129 -
  72.130 -/* to hook the ioreq packet to get the PIT initialization info */
  72.131 -extern void hvm_hooks_assist(struct vcpu *v);
  72.132 -extern void pickup_deactive_ticks(struct periodic_time *vpit);
  72.133 -extern struct periodic_time *create_periodic_time(u32 period, char irq, char one_shot, time_cb *cb, void *data);
  72.134 -extern void destroy_periodic_time(struct periodic_time *pt);
  72.135 -void pit_init(struct vcpu *v, unsigned long cpu_khz);
  72.136 -void rtc_init(struct vcpu *v, int base, int irq);
  72.137 -void rtc_deinit(struct domain *d);
  72.138 -int is_rtc_periodic_irq(void *opaque);
  72.139 -void pt_timer_fn(void *data);
  72.140 -void pit_time_fired(struct vcpu *v, void *priv);
  72.141 -
  72.142 -#endif /* __ASM_X86_HVM_VPIT_H__ */
    73.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
    73.2 +++ b/xen/include/asm-x86/hvm/vpt.h	Wed Oct 25 12:12:01 2006 -0600
    73.3 @@ -0,0 +1,151 @@
    73.4 +/*
    73.5 + * vpt.h: Virtual Platform Timer definitions
    73.6 + *
    73.7 + * Copyright (c) 2004, Intel Corporation.
    73.8 + *
    73.9 + * This program is free software; you can redistribute it and/or modify it
   73.10 + * under the terms and conditions of the GNU General Public License,
   73.11 + * version 2, as published by the Free Software Foundation.
   73.12 + *
   73.13 + * This program is distributed in the hope it will be useful, but WITHOUT
   73.14 + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
   73.15 + * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
   73.16 + * more details.
   73.17 + *
   73.18 + * You should have received a copy of the GNU General Public License along with
   73.19 + * this program; if not, write to the Free Software Foundation, Inc., 59 Temple
   73.20 + * Place - Suite 330, Boston, MA 02111-1307 USA.
   73.21 + */
   73.22 +
   73.23 +#ifndef __ASM_X86_HVM_VPT_H__
   73.24 +#define __ASM_X86_HVM_VPT_H__
   73.25 +
   73.26 +#include <xen/config.h>
   73.27 +#include <xen/init.h>
   73.28 +#include <xen/lib.h>
   73.29 +#include <xen/time.h>
   73.30 +#include <xen/errno.h>
   73.31 +#include <xen/time.h>
   73.32 +#include <xen/timer.h>
   73.33 +#include <asm/hvm/vpic.h>
   73.34 +
   73.35 +#define PIT_FREQ 1193181
   73.36 +#define PIT_BASE 0x40
   73.37 +
   73.38 +typedef struct PITChannelState {
   73.39 +    int count; /* can be 65536 */
   73.40 +    u16 latched_count;
   73.41 +    u8 count_latched;
   73.42 +    u8 status_latched;
   73.43 +    u8 status;
   73.44 +    u8 read_state;
   73.45 +    u8 write_state;
   73.46 +    u8 write_latch;
   73.47 +    u8 rw_mode;
   73.48 +    u8 mode;
   73.49 +    u8 bcd; /* not supported */
   73.50 +    u8 gate; /* timer start */
   73.51 +    s64 count_load_time;
   73.52 +    /* irq handling */
   73.53 +    struct vcpu      *vcpu;
   73.54 +    struct periodic_time *pt;
   73.55 +} PITChannelState;
   73.56 +
   73.57 +typedef struct PITState {
   73.58 +    PITChannelState channels[3];
   73.59 +    int speaker_data_on;
   73.60 +    int dummy_refresh_clock;
   73.61 +} PITState;
   73.62 +
   73.63 +#define RTC_SIZE 14
   73.64 +typedef struct RTCState {
   73.65 +    uint8_t cmos_data[RTC_SIZE];  /* Only handle time/interrupt part in HV */
   73.66 +    uint8_t cmos_index;
   73.67 +    struct tm current_tm;
   73.68 +    int irq;
   73.69 +    /* second update */
   73.70 +    int64_t next_second_time;
   73.71 +    struct timer second_timer;
   73.72 +    struct timer second_timer2;
   73.73 +    struct vcpu      *vcpu;
   73.74 +    struct periodic_time *pt;
   73.75 +} RTCState;
   73.76 +
   73.77 +#define FREQUENCE_PMTIMER  3579545
   73.78 +typedef struct PMTState {
   73.79 +    uint32_t pm1_timer;
   73.80 +    uint32_t pm1_status;
   73.81 +    uint64_t last_gtime;
   73.82 +    struct timer timer;
   73.83 +    uint64_t scale;
   73.84 +    struct vcpu *vcpu;
   73.85 +} PMTState;
   73.86 +
   73.87 +/*
   73.88 + * Abstract layer of periodic time, one short time.
   73.89 + */
   73.90 +typedef void time_cb(struct vcpu *v, void *opaque);
   73.91 +
   73.92 +struct periodic_time {
   73.93 +    char enabled;               /* enabled */
   73.94 +    char one_shot;              /* one shot time */
   73.95 +    char irq;
   73.96 +    char first_injected;        /* flag to prevent shadow window */
   73.97 +    u32 pending_intr_nr;        /* the couner for pending timer interrupts */
   73.98 +    u32 period;                 /* frequency in ns */
   73.99 +    u64 period_cycles;          /* frequency in cpu cycles */
  73.100 +    s_time_t scheduled;         /* scheduled timer interrupt */
  73.101 +    u64 last_plt_gtime;         /* platform time when last IRQ is injected */
  73.102 +    struct timer timer;         /* ac_timer */
  73.103 +    time_cb *cb;
  73.104 +    void *priv;                 /* ponit back to platform time source */
  73.105 +};
  73.106 +
  73.107 +struct pl_time {    /* platform time */
  73.108 +    struct periodic_time periodic_tm;
  73.109 +    struct PITState      vpit;
  73.110 +    struct RTCState      vrtc;
  73.111 +    struct PMTState      vpmt;
  73.112 +};
  73.113 +
  73.114 +static __inline__ s_time_t get_scheduled(
  73.115 +    struct vcpu *v, int irq,
  73.116 +    struct periodic_time *pt)
  73.117 +{
  73.118 +    if ( is_irq_enabled(v, irq) ) {
  73.119 +        return pt->scheduled;
  73.120 +    }
  73.121 +    else
  73.122 +        return -1;
  73.123 +}
  73.124 +
  73.125 +extern u64 hvm_get_guest_time(struct vcpu *v);
  73.126 +/*
  73.127 + * get processor time.
  73.128 + * unit: TSC
  73.129 + */
  73.130 +static __inline__ int64_t hvm_get_clock(struct vcpu *v)
  73.131 +{
  73.132 +    uint64_t  gtsc;
  73.133 +
  73.134 +    gtsc = hvm_get_guest_time(v);
  73.135 +    return gtsc;
  73.136 +}
  73.137 +
  73.138 +#define ticks_per_sec(v)      (v->domain->arch.hvm_domain.tsc_frequency)
  73.139 +
  73.140 +/* to hook the ioreq packet to get the PIT initialization info */
  73.141 +extern void hvm_hooks_assist(struct vcpu *v);
  73.142 +extern void pickup_deactive_ticks(struct periodic_time *vpit);
  73.143 +extern struct periodic_time *create_periodic_time(u32 period, char irq, char one_shot, time_cb *cb, void *data);
  73.144 +extern void destroy_periodic_time(struct periodic_time *pt);
  73.145 +void pit_init(struct vcpu *v, unsigned long cpu_khz);
  73.146 +void rtc_init(struct vcpu *v, int base, int irq);
  73.147 +void rtc_deinit(struct domain *d);
  73.148 +void pmtimer_init(struct vcpu *v, int base);
  73.149 +void pmtimer_deinit(struct domain *d);
  73.150 +int is_rtc_periodic_irq(void *opaque);
  73.151 +void pt_timer_fn(void *data);
  73.152 +void pit_time_fired(struct vcpu *v, void *priv);
  73.153 +
  73.154 +#endif /* __ASM_X86_HVM_VPT_H__ */
    74.1 --- a/xen/include/asm-x86/mach-generic/mach_apic.h	Tue Oct 24 11:21:48 2006 -0600
    74.2 +++ b/xen/include/asm-x86/mach-generic/mach_apic.h	Wed Oct 25 12:12:01 2006 -0600
    74.3 @@ -22,11 +22,7 @@ static inline void enable_apic_mode(void
    74.4  	return;
    74.5  }
    74.6  
    74.7 -/* No sane NUMA support right now. We should parse ACPI SRAT. */
    74.8 -static inline int apicid_to_node(int logical_apicid)
    74.9 -{
   74.10 -	return 0;
   74.11 -}
   74.12 +#define apicid_to_node(apicid) ((int)apicid_to_node[(u8)apicid])
   74.13  
   74.14  extern u8 bios_cpu_apicid[];
   74.15  static inline int cpu_present_to_apicid(int mps_cpu)
    75.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
    75.2 +++ b/xen/include/asm-x86/numa.h	Wed Oct 25 12:12:01 2006 -0600
    75.3 @@ -0,0 +1,78 @@
    75.4 +#ifndef _ASM_X8664_NUMA_H 
    75.5 +#define _ASM_X8664_NUMA_H 1
    75.6 +
    75.7 +#include <xen/cpumask.h>
    75.8 +
    75.9 +#define NODES_SHIFT 6
   75.10 +
   75.11 +extern unsigned char cpu_to_node[];
   75.12 +extern cpumask_t     node_to_cpumask[];
   75.13 +
   75.14 +#define cpu_to_node(cpu)		(cpu_to_node[cpu])
   75.15 +#define parent_node(node)		(node)
   75.16 +#define node_to_first_cpu(node)  (__ffs(node_to_cpumask[node]))
   75.17 +#define node_to_cpumask(node)    (node_to_cpumask[node])
   75.18 +
   75.19 +struct node { 
   75.20 +	u64 start,end; 
   75.21 +};
   75.22 +
   75.23 +extern int compute_hash_shift(struct node *nodes, int numnodes);
   75.24 +extern int pxm_to_node(int nid);
   75.25 +
   75.26 +#define ZONE_ALIGN (1UL << (MAX_ORDER+PAGE_SHIFT))
   75.27 +#define VIRTUAL_BUG_ON(x) 
   75.28 +#define NODEMAPSIZE 0xfff
   75.29 +
   75.30 +extern void numa_add_cpu(int cpu);
   75.31 +extern void numa_init_array(void);
   75.32 +extern int numa_off;
   75.33 +
   75.34 +extern void numa_set_node(int cpu, int node);
   75.35 +
   75.36 +extern void setup_node_bootmem(int nodeid, u64 start, u64 end);
   75.37 +extern unsigned char apicid_to_node[256];
   75.38 +#ifdef CONFIG_NUMA
   75.39 +extern void __init init_cpu_to_node(void);
   75.40 +
   75.41 +static inline void clear_node_cpumask(int cpu)
   75.42 +{
   75.43 +	clear_bit(cpu, &node_to_cpumask[cpu_to_node(cpu)]);
   75.44 +}
   75.45 +
   75.46 +/* Simple perfect hash to map physical addresses to node numbers */
   75.47 +extern int memnode_shift; 
   75.48 +extern u8  memnodemap[NODEMAPSIZE]; 
   75.49 +
   75.50 +struct node_data {
   75.51 +    unsigned long node_start_pfn;
   75.52 +    unsigned long node_spanned_pages;
   75.53 +    unsigned int  node_id;
   75.54 +};
   75.55 +
   75.56 +extern struct node_data node_data[];
   75.57 +
   75.58 +static inline __attribute__((pure)) int phys_to_nid(unsigned long addr) 
   75.59 +{ 
   75.60 +	unsigned nid; 
   75.61 +	VIRTUAL_BUG_ON((addr >> memnode_shift) >= NODEMAPSIZE);
   75.62 +	nid = memnodemap[addr >> memnode_shift]; 
   75.63 +	VIRTUAL_BUG_ON(nid >= MAX_NUMNODES || !node_data[nid]); 
   75.64 +	return nid; 
   75.65 +} 
   75.66 +
   75.67 +#define NODE_DATA(nid)		(&(node_data[nid]))
   75.68 +
   75.69 +#define node_start_pfn(nid)	(NODE_DATA(nid)->node_start_pfn)
   75.70 +#define node_end_pfn(nid)       (NODE_DATA(nid)->node_start_pfn + \
   75.71 +				 NODE_DATA(nid)->node_spanned_pages)
   75.72 +
   75.73 +
   75.74 +#else
   75.75 +#define init_cpu_to_node() do {} while (0)
   75.76 +#define clear_node_cpumask(cpu) do {} while (0)
   75.77 +#endif
   75.78 +
   75.79 +#define NUMA_NO_NODE 0xff
   75.80 +
   75.81 +#endif
    76.1 --- a/xen/include/public/arch-ia64.h	Tue Oct 24 11:21:48 2006 -0600
    76.2 +++ b/xen/include/public/arch-ia64.h	Wed Oct 25 12:12:01 2006 -0600
    76.3 @@ -68,6 +68,9 @@ typedef unsigned long xen_ulong_t;
    76.4  #define STORE_PAGE_START (IO_PAGE_START + IO_PAGE_SIZE)
    76.5  #define STORE_PAGE_SIZE	 PAGE_SIZE
    76.6  
    76.7 +#define BUFFER_IO_PAGE_START (STORE_PAGE_START+PAGE_SIZE)
    76.8 +#define BUFFER_IO_PAGE_SIZE PAGE_SIZE
    76.9 +
   76.10  #define IO_SAPIC_START   0xfec00000UL
   76.11  #define IO_SAPIC_SIZE    0x100000
   76.12  
    77.1 --- a/xen/include/public/hvm/ioreq.h	Tue Oct 24 11:21:48 2006 -0600
    77.2 +++ b/xen/include/public/hvm/ioreq.h	Wed Oct 25 12:12:01 2006 -0600
    77.3 @@ -86,6 +86,10 @@ struct buffered_iopage {
    77.4  };            /* sizeof this structure must be in one page */
    77.5  typedef struct buffered_iopage buffered_iopage_t;
    77.6  
    77.7 +#define ACPI_PM1A_EVT_BLK_ADDRESS           0x000000000000c010
    77.8 +#define ACPI_PM1A_CNT_BLK_ADDRESS           (ACPI_PM1A_EVT_BLK_ADDRESS + 0x04)
    77.9 +#define ACPI_PM_TMR_BLK_ADDRESS             (ACPI_PM1A_EVT_BLK_ADDRESS + 0x08)
   77.10 +
   77.11  #endif /* _IOREQ_H_ */
   77.12  
   77.13  /*
    78.1 --- a/xen/include/xen/config.h	Tue Oct 24 11:21:48 2006 -0600
    78.2 +++ b/xen/include/xen/config.h	Wed Oct 25 12:12:01 2006 -0600
    78.3 @@ -50,5 +50,7 @@
    78.4  #endif /* !__ASSEMBLY__ */
    78.5  
    78.6  #define fastcall
    78.7 +#define __cpuinitdata
    78.8 +#define __cpuinit
    78.9  
   78.10  #endif /* __XEN_CONFIG_H__ */
    79.1 --- a/xen/include/xen/mm.h	Tue Oct 24 11:21:48 2006 -0600
    79.2 +++ b/xen/include/xen/mm.h	Wed Oct 25 12:12:01 2006 -0600
    79.3 @@ -45,7 +45,8 @@ void end_boot_allocator(void);
    79.4  /* Generic allocator. These functions are *not* interrupt-safe. */
    79.5  void init_heap_pages(
    79.6      unsigned int zone, struct page_info *pg, unsigned long nr_pages);
    79.7 -struct page_info *alloc_heap_pages(unsigned int zone, unsigned int order);
    79.8 +struct page_info *alloc_heap_pages(
    79.9 +    unsigned int zone, unsigned int cpu, unsigned int order);
   79.10  void free_heap_pages(
   79.11      unsigned int zone, struct page_info *pg, unsigned int order);
   79.12  void scrub_heap_pages(void);
   79.13 @@ -61,8 +62,12 @@ void free_xenheap_pages(void *v, unsigne
   79.14  void init_domheap_pages(paddr_t ps, paddr_t pe);
   79.15  struct page_info *alloc_domheap_pages(
   79.16      struct domain *d, unsigned int order, unsigned int memflags);
   79.17 +struct page_info *__alloc_domheap_pages(
   79.18 +    struct domain *d, unsigned int cpu, unsigned int order, 
   79.19 +    unsigned int memflags);
   79.20  void free_domheap_pages(struct page_info *pg, unsigned int order);
   79.21  unsigned long avail_domheap_pages(void);
   79.22 +unsigned long avail_heap_pages(int zone, int node);
   79.23  #define alloc_domheap_page(d) (alloc_domheap_pages(d,0,0))
   79.24  #define free_domheap_page(p)  (free_domheap_pages(p,0))
   79.25  
    80.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
    80.2 +++ b/xen/include/xen/nodemask.h	Wed Oct 25 12:12:01 2006 -0600
    80.3 @@ -0,0 +1,338 @@
    80.4 +#ifndef __LINUX_NODEMASK_H
    80.5 +#define __LINUX_NODEMASK_H
    80.6 +
    80.7 +/*
    80.8 + * Nodemasks provide a bitmap suitable for representing the
    80.9 + * set of Node's in a system, one bit position per Node number.
   80.10 + *
   80.11 + * See detailed comments in the file linux/bitmap.h describing the
   80.12 + * data type on which these nodemasks are based.
   80.13 + *
   80.14 + * For details of nodemask_scnprintf() and nodemask_parse(),
   80.15 + * see bitmap_scnprintf() and bitmap_parse() in lib/bitmap.c.
   80.16 + *
   80.17 + * The available nodemask operations are:
   80.18 + *
   80.19 + * void node_set(node, mask)		turn on bit 'node' in mask
   80.20 + * void node_clear(node, mask)		turn off bit 'node' in mask
   80.21 + * void nodes_setall(mask)		set all bits
   80.22 + * void nodes_clear(mask)		clear all bits
   80.23 + * int node_isset(node, mask)		true iff bit 'node' set in mask
   80.24 + * int node_test_and_set(node, mask)	test and set bit 'node' in mask
   80.25 + *
   80.26 + * void nodes_and(dst, src1, src2)	dst = src1 & src2  [intersection]
   80.27 + * void nodes_or(dst, src1, src2)	dst = src1 | src2  [union]
   80.28 + * void nodes_xor(dst, src1, src2)	dst = src1 ^ src2
   80.29 + * void nodes_andnot(dst, src1, src2)	dst = src1 & ~src2
   80.30 + * void nodes_complement(dst, src)	dst = ~src
   80.31 + *
   80.32 + * int nodes_equal(mask1, mask2)	Does mask1 == mask2?
   80.33 + * int nodes_intersects(mask1, mask2)	Do mask1 and mask2 intersect?
   80.34 + * int nodes_subset(mask1, mask2)	Is mask1 a subset of mask2?
   80.35 + * int nodes_empty(mask)		Is mask empty (no bits sets)?
   80.36 + * int nodes_full(mask)			Is mask full (all bits sets)?
   80.37 + * int nodes_weight(mask)		Hamming weight - number of set bits
   80.38 + *
   80.39 + * void nodes_shift_right(dst, src, n)	Shift right
   80.40 + * void nodes_shift_left(dst, src, n)	Shift left
   80.41 + *
   80.42 + * int first_node(mask)			Number lowest set bit, or MAX_NUMNODES
   80.43 + * int next_node(node, mask)		Next node past 'node', or MAX_NUMNODES
   80.44 + * int first_unset_node(mask)		First node not set in mask, or 
   80.45 + *					MAX_NUMNODES.
   80.46 + *
   80.47 + * nodemask_t nodemask_of_node(node)	Return nodemask with bit 'node' set
   80.48 + * NODE_MASK_ALL			Initializer - all bits set
   80.49 + * NODE_MASK_NONE			Initializer - no bits set
   80.50 + * unsigned long *nodes_addr(mask)	Array of unsigned long's in mask
   80.51 + *
   80.52 + * int nodemask_scnprintf(buf, len, mask) Format nodemask for printing
   80.53 + * int nodemask_parse(ubuf, ulen, mask)	Parse ascii string as nodemask
   80.54 + *
   80.55 + * for_each_node_mask(node, mask)	for-loop node over mask
   80.56 + *
   80.57 + * int num_online_nodes()		Number of online Nodes
   80.58 + * int num_possible_nodes()		Number of all possible Nodes
   80.59 + *
   80.60 + * int node_online(node)		Is some node online?
   80.61 + * int node_possible(node)		Is some node possible?
   80.62 + *
   80.63 + * int any_online_node(mask)		First online node in mask
   80.64 + *
   80.65 + * node_set_online(node)		set bit 'node' in node_online_map
   80.66 + * node_set_offline(node)		clear bit 'node' in node_online_map
   80.67 + *
   80.68 + * for_each_node(node)			for-loop node over node_possible_map
   80.69 + * for_each_online_node(node)		for-loop node over node_online_map
   80.70 + *
   80.71 + * Subtlety:
   80.72 + * 1) The 'type-checked' form of node_isset() causes gcc (3.3.2, anyway)
   80.73 + *    to generate slightly worse code.  So use a simple one-line #define
   80.74 + *    for node_isset(), instead of wrapping an inline inside a macro, the
   80.75 + *    way we do the other calls.
   80.76 + */
   80.77 +
   80.78 +#include <xen/kernel.h>
   80.79 +#include <xen/bitmap.h>
   80.80 +#include <xen/numa.h>
   80.81 +
   80.82 +typedef struct { DECLARE_BITMAP(bits, MAX_NUMNODES); } nodemask_t;
   80.83 +extern nodemask_t _unused_nodemask_arg_;
   80.84 +
   80.85 +#define node_set(node, dst) __node_set((node), &(dst))
   80.86 +static inline void __node_set(int node, volatile nodemask_t *dstp)
   80.87 +{
   80.88 +	set_bit(node, dstp->bits);
   80.89 +}
   80.90 +
   80.91 +#define node_clear(node, dst) __node_clear((node), &(dst))
   80.92 +static inline void __node_clear(int node, volatile nodemask_t *dstp)
   80.93 +{
   80.94 +	clear_bit(node, dstp->bits);
   80.95 +}
   80.96 +
   80.97 +#define nodes_setall(dst) __nodes_setall(&(dst), MAX_NUMNODES)
   80.98 +static inline void __nodes_setall(nodemask_t *dstp, int nbits)
   80.99 +{
  80.100 +	bitmap_fill(dstp->bits, nbits);
  80.101 +}
  80.102 +
  80.103 +#define nodes_clear(dst) __nodes_clear(&(dst), MAX_NUMNODES)
  80.104 +static inline void __nodes_clear(nodemask_t *dstp, int nbits)
  80.105 +{
  80.106 +	bitmap_zero(dstp->bits, nbits);
  80.107 +}
  80.108 +
  80.109 +/* No static inline type checking - see Subtlety (1) above. */
  80.110 +#define node_isset(node, nodemask) test_bit((node), (nodemask).bits)
  80.111 +
  80.112 +#define node_test_and_set(node, nodemask) \
  80.113 +			__node_test_and_set((node), &(nodemask))
  80.114 +static inline int __node_test_and_set(int node, nodemask_t *addr)
  80.115 +{
  80.116 +	return test_and_set_bit(node, addr->bits);
  80.117 +}
  80.118 +
  80.119 +#define nodes_and(dst, src1, src2) \
  80.120 +			__nodes_and(&(dst), &(src1), &(src2), MAX_NUMNODES)
  80.121 +static inline void __nodes_and(nodemask_t *dstp, const nodemask_t *src1p,
  80.122 +					const nodemask_t *src2p, int nbits)
  80.123 +{
  80.124 +	bitmap_and(dstp->bits, src1p->bits, src2p->bits, nbits);
  80.125 +}
  80.126 +
  80.127 +#define nodes_or(dst, src1, src2) \
  80.128 +			__nodes_or(&(dst), &(src1), &(src2), MAX_NUMNODES)
  80.129 +static inline void __nodes_or(nodemask_t *dstp, const nodemask_t *src1p,
  80.130 +					const nodemask_t *src2p, int nbits)
  80.131 +{
  80.132 +	bitmap_or(dstp->bits, src1p->bits, src2p->bits, nbits);
  80.133 +}
  80.134 +
  80.135 +#define nodes_xor(dst, src1, src2) \
  80.136 +			__nodes_xor(&(dst), &(src1), &(src2), MAX_NUMNODES)
  80.137 +static inline void __nodes_xor(nodemask_t *dstp, const nodemask_t *src1p,
  80.138 +					const nodemask_t *src2p, int nbits)
  80.139 +{
  80.140 +	bitmap_xor(dstp->bits, src1p->bits, src2p->bits, nbits);
  80.141 +}
  80.142 +
  80.143 +#define nodes_andnot(dst, src1, src2) \
  80.144 +			__nodes_andnot(&(dst), &(src1), &(src2), MAX_NUMNODES)
  80.145 +static inline void __nodes_andnot(nodemask_t *dstp, const nodemask_t *src1p,
  80.146 +					const nodemask_t *src2p, int nbits)
  80.147 +{
  80.148 +	bitmap_andnot(dstp->bits, src1p->bits, src2p->bits, nbits);
  80.149 +}
  80.150 +
  80.151 +#define nodes_complement(dst, src) \
  80.152 +			__nodes_complement(&(dst), &(src), MAX_NUMNODES)
  80.153 +static inline void __nodes_complement(nodemask_t *dstp,
  80.154 +					const nodemask_t *srcp, int nbits)
  80.155 +{
  80.156 +	bitmap_complement(dstp->bits, srcp->bits, nbits);
  80.157 +}
  80.158 +
  80.159 +#define nodes_equal(src1, src2) \
  80.160 +			__nodes_equal(&(src1), &(src2), MAX_NUMNODES)
  80.161 +static inline int __nodes_equal(const nodemask_t *src1p,
  80.162 +					const nodemask_t *src2p, int nbits)
  80.163 +{
  80.164 +	return bitmap_equal(src1p->bits, src2p->bits, nbits);
  80.165 +}
  80.166 +
  80.167 +#define nodes_intersects(src1, src2) \
  80.168 +			__nodes_intersects(&(src1), &(src2), MAX_NUMNODES)
  80.169 +static inline int __nodes_intersects(const nodemask_t *src1p,
  80.170 +					const nodemask_t *src2p, int nbits)
  80.171 +{
  80.172 +	return bitmap_intersects(src1p->bits, src2p->bits, nbits);
  80.173 +}
  80.174 +
  80.175 +#define nodes_subset(src1, src2) \
  80.176 +			__nodes_subset(&(src1), &(src2), MAX_NUMNODES)
  80.177 +static inline int __nodes_subset(const nodemask_t *src1p,
  80.178 +					const nodemask_t *src2p, int nbits)
  80.179 +{
  80.180 +	return bitmap_subset(src1p->bits, src2p->bits, nbits);
  80.181 +}
  80.182 +
  80.183 +#define nodes_empty(src) __nodes_empty(&(src), MAX_NUMNODES)
  80.184 +static inline int __nodes_empty(const nodemask_t *srcp, int nbits)
  80.185 +{
  80.186 +	return bitmap_empty(srcp->bits, nbits);
  80.187 +}
  80.188 +
  80.189 +#define nodes_full(nodemask) __nodes_full(&(nodemask), MAX_NUMNODES)
  80.190 +static inline int __nodes_full(const nodemask_t *srcp, int nbits)
  80.191 +{
  80.192 +	return bitmap_full(srcp->bits, nbits);
  80.193 +}
  80.194 +
  80.195 +#define nodes_weight(nodemask) __nodes_weight(&(nodemask), MAX_NUMNODES)
  80.196 +static inline int __nodes_weight(const nodemask_t *srcp, int nbits)
  80.197 +{
  80.198 +	return bitmap_weight(srcp->bits, nbits);
  80.199 +}
  80.200 +
  80.201 +#define nodes_shift_right(dst, src, n) \
  80.202 +			__nodes_shift_right(&(dst), &(src), (n), MAX_NUMNODES)
  80.203 +static inline void __nodes_shift_right(nodemask_t *dstp,
  80.204 +					const nodemask_t *srcp, int n, int nbits)
  80.205 +{
  80.206 +	bitmap_shift_right(dstp->bits, srcp->bits, n, nbits);
  80.207 +}
  80.208 +
  80.209 +#define nodes_shift_left(dst, src, n) \
  80.210 +			__nodes_shift_left(&(dst), &(src), (n), MAX_NUMNODES)
  80.211 +static inline void __nodes_shift_left(nodemask_t *dstp,
  80.212 +					const nodemask_t *srcp, int n, int nbits)
  80.213 +{
  80.214 +	bitmap_shift_left(dstp->bits, srcp->bits, n, nbits);
  80.215 +}
  80.216 +
  80.217 +/* FIXME: better would be to fix all architectures to never return
  80.218 +          > MAX_NUMNODES, then the silly min_ts could be dropped. */
  80.219 +
  80.220 +#define first_node(src) __first_node(&(src))
  80.221 +static inline int __first_node(const nodemask_t *srcp)
  80.222 +{
  80.223 +	return min_t(int, MAX_NUMNODES, find_first_bit(srcp->bits, MAX_NUMNODES));
  80.224 +}
  80.225 +
  80.226 +#define next_node(n, src) __next_node((n), &(src))
  80.227 +static inline int __next_node(int n, const nodemask_t *srcp)
  80.228 +{
  80.229 +	return min_t(int,MAX_NUMNODES,find_next_bit(srcp->bits, MAX_NUMNODES, n+1));
  80.230 +}
  80.231 +
  80.232 +#define nodemask_of_node(node)						\
  80.233 +({									\
  80.234 +	typeof(_unused_nodemask_arg_) m;				\
  80.235 +	if (sizeof(m) == sizeof(unsigned long)) {			\
  80.236 +		m.bits[0] = 1UL<<(node);				\
  80.237 +	} else {							\
  80.238 +		nodes_clear(m);						\
  80.239 +		node_set((node), m);					\
  80.240 +	}								\
  80.241 +	m;								\
  80.242 +})
  80.243 +
  80.244 +#define first_unset_node(mask) __first_unset_node(&(mask))
  80.245 +static inline int __first_unset_node(const nodemask_t *maskp)
  80.246 +{
  80.247 +	return min_t(int,MAX_NUMNODES,
  80.248 +			find_first_zero_bit(maskp->bits, MAX_NUMNODES));
  80.249 +}
  80.250 +
  80.251 +#define NODE_MASK_LAST_WORD BITMAP_LAST_WORD_MASK(MAX_NUMNODES)
  80.252 +
  80.253 +#if MAX_NUMNODES <= BITS_PER_LONG
  80.254 +
  80.255 +#define NODE_MASK_ALL							\
  80.256 +((nodemask_t) { {							\
  80.257 +	[BITS_TO_LONGS(MAX_NUMNODES)-1] = NODE_MASK_LAST_WORD		\
  80.258 +} })
  80.259 +
  80.260 +#else
  80.261 +
  80.262 +#define NODE_MASK_ALL							\
  80.263 +((nodemask_t) { {							\
  80.264 +	[0 ... BITS_TO_LONGS(MAX_NUMNODES)-2] = ~0UL,			\
  80.265 +	[BITS_TO_LONGS(MAX_NUMNODES)-1] = NODE_MASK_LAST_WORD		\
  80.266 +} })
  80.267 +
  80.268 +#endif
  80.269 +
  80.270 +#define NODE_MASK_NONE							\
  80.271 +((nodemask_t) { {							\
  80.272 +	[0 ... BITS_TO_LONGS(MAX_NUMNODES)-1] =  0UL			\
  80.273 +} })
  80.274 +
  80.275 +#define nodes_addr(src) ((src).bits)
  80.276 +
  80.277 +#if 0
  80.278 +#define nodemask_scnprintf(buf, len, src) \
  80.279 +			__nodemask_scnprintf((buf), (len), &(src), MAX_NUMNODES)
  80.280 +static inline int __nodemask_scnprintf(char *buf, int len,
  80.281 +					const nodemask_t *srcp, int nbits)
  80.282 +{
  80.283 +	return bitmap_scnprintf(buf, len, srcp->bits, nbits);
  80.284 +}
  80.285 +
  80.286 +#define nodemask_parse(ubuf, ulen, dst) \
  80.287 +			__nodemask_parse((ubuf), (ulen), &(dst), MAX_NUMNODES)
  80.288 +static inline int __nodemask_parse(const char __user *buf, int len,
  80.289 +					nodemask_t *dstp, int nbits)
  80.290 +{
  80.291 +	return bitmap_parse(buf, len, dstp->bits, nbits);
  80.292 +}
  80.293 +#endif
  80.294 +
  80.295 +#if MAX_NUMNODES > 1
  80.296 +#define for_each_node_mask(node, mask)			\
  80.297 +	for ((node) = first_node(mask);			\
  80.298 +		(node) < MAX_NUMNODES;			\
  80.299 +		(node) = next_node((node), (mask)))
  80.300 +#else /* MAX_NUMNODES == 1 */
  80.301 +#define for_each_node_mask(node, mask)			\
  80.302 +	if (!nodes_empty(mask))				\
  80.303 +		for ((node) = 0; (node) < 1; (node)++)
  80.304 +#endif /* MAX_NUMNODES */
  80.305 +
  80.306 +/*
  80.307 + * The following particular system nodemasks and operations
  80.308 + * on them manage all possible and online nodes.
  80.309 + */
  80.310 +
  80.311 +extern nodemask_t node_online_map;
  80.312 +extern nodemask_t node_possible_map;
  80.313 +
  80.314 +#if MAX_NUMNODES > 1
  80.315 +#define num_online_nodes()	nodes_weight(node_online_map)
  80.316 +#define num_possible_nodes()	nodes_weight(node_possible_map)
  80.317 +#define node_online(node)	node_isset((node), node_online_map)
  80.318 +#define node_possible(node)	node_isset((node), node_possible_map)
  80.319 +#else
  80.320 +#define num_online_nodes()	1
  80.321 +#define num_possible_nodes()	1
  80.322 +#define node_online(node)	((node) == 0)
  80.323 +#define node_possible(node)	((node) == 0)
  80.324 +#endif
  80.325 +
  80.326 +#define any_online_node(mask)			\
  80.327 +({						\
  80.328 +	int node;				\
  80.329 +	for_each_node_mask(node, (mask))	\
  80.330 +		if (node_online(node))		\
  80.331 +			break;			\
  80.332 +	node;					\
  80.333 +})
  80.334 +
  80.335 +#define node_set_online(node)	   set_bit((node), node_online_map.bits)
  80.336 +#define node_set_offline(node)	   clear_bit((node), node_online_map.bits)
  80.337 +
  80.338 +#define for_each_node(node)	   for_each_node_mask((node), node_possible_map)
  80.339 +#define for_each_online_node(node) for_each_node_mask((node), node_online_map)
  80.340 +
  80.341 +#endif /* __LINUX_NODEMASK_H */
    81.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
    81.2 +++ b/xen/include/xen/numa.h	Wed Oct 25 12:12:01 2006 -0600
    81.3 @@ -0,0 +1,13 @@
    81.4 +#ifndef _XEN_NUMA_H
    81.5 +#define _XEN_NUMA_H
    81.6 +
    81.7 +#include <xen/config.h>
    81.8 +#include <asm/numa.h>
    81.9 +
   81.10 +#ifndef NODES_SHIFT
   81.11 +#define NODES_SHIFT     0
   81.12 +#endif
   81.13 +
   81.14 +#define MAX_NUMNODES    (1 << NODES_SHIFT)
   81.15 +
   81.16 +#endif /* _XEN_NUMA_H */