ia64/xen-unstable

changeset 10194:c073ebdbde8c

merge with xen-unstable.hg
author awilliam@xenbuild.aw
date Fri May 26 13:41:49 2006 -0600 (2006-05-26)
parents 9d52a66c7499 954f4dea9da6
children 18b087bafac6
files linux-2.6-xen-sparse/drivers/xen/net_driver_util.c linux-2.6-xen-sparse/include/asm-x86_64/e820.h linux-2.6-xen-sparse/include/xen/net_driver_util.h tools/xenstore/xenstored_proc.h
line diff
     1.1 --- a/.hgignore	Thu May 25 15:59:18 2006 -0600
     1.2 +++ b/.hgignore	Fri May 26 13:41:49 2006 -0600
     1.3 @@ -14,7 +14,7 @@
     1.4  .*\.orig$
     1.5  .*\.rej$
     1.6  .*/a\.out$
     1.7 -.*/cscope\.*$
     1.8 +.*/cscope\..*$
     1.9  ^[^/]*\.bz2$
    1.10  ^TAGS$
    1.11  ^dist/.*$
     2.1 --- a/extras/mini-os/Makefile	Thu May 25 15:59:18 2006 -0600
     2.2 +++ b/extras/mini-os/Makefile	Fri May 26 13:41:49 2006 -0600
     2.3 @@ -13,6 +13,7 @@ CFLAGS += -Wstrict-prototypes -Wnested-e
     2.4  override CPPFLAGS := -Iinclude $(CPPFLAGS)
     2.5  ASFLAGS = -D__ASSEMBLY__
     2.6  
     2.7 +LDLIBS =  -L. -lminios
     2.8  LDFLAGS := -N -T minios-$(TARGET_ARCH).lds
     2.9  
    2.10  ifeq ($(TARGET_ARCH),x86_32)
    2.11 @@ -55,11 +56,11 @@ default: $(TARGET)
    2.12  links:
    2.13  	[ -e include/xen ] || ln -sf ../../../xen/include/public include/xen
    2.14  
    2.15 -libminios.a: $(OBJS) $(HEAD)
    2.16 -	ar r libminios.a $(HEAD) $(OBJS)
    2.17 +libminios.a: links $(OBJS) $(HEAD)
    2.18 +	$(AR) r libminios.a $(HEAD) $(OBJS)
    2.19  
    2.20 -$(TARGET): links libminios.a $(HEAD)
    2.21 -	$(LD) $(LDFLAGS) $(HEAD) -L. -lminios -o $@.elf
    2.22 +$(TARGET): libminios.a $(HEAD)
    2.23 +	$(LD) $(LDFLAGS) $(HEAD) $(LDLIBS) -o $@.elf
    2.24  	gzip -f -9 -c $@.elf >$@.gz
    2.25  
    2.26  .PHONY: clean
     3.1 --- a/extras/mini-os/lib/printf.c	Thu May 25 15:59:18 2006 -0600
     3.2 +++ b/extras/mini-os/lib/printf.c	Fri May 26 13:41:49 2006 -0600
     3.3 @@ -54,6 +54,8 @@
     3.4   * $FreeBSD: src/sys/libkern/divdi3.c,v 1.6 1999/08/28 00:46:31 peter Exp $
     3.5   */
     3.6  
     3.7 +#if !defined HAVE_LIBC
     3.8 +
     3.9  #include <os.h>
    3.10  #include <types.h>
    3.11  #include <hypervisor.h>
    3.12 @@ -789,4 +791,4 @@ int sscanf(const char * buf, const char 
    3.13  	return i;
    3.14  }
    3.15  
    3.16 -
    3.17 +#endif
     4.1 --- a/extras/mini-os/lib/string.c	Thu May 25 15:59:18 2006 -0600
     4.2 +++ b/extras/mini-os/lib/string.c	Fri May 26 13:41:49 2006 -0600
     4.3 @@ -18,6 +18,8 @@
     4.4   ****************************************************************************
     4.5   */
     4.6  
     4.7 +#if !defined HAVE_LIBC
     4.8 +
     4.9  #include <os.h>
    4.10  #include <types.h>
    4.11  #include <lib.h>
    4.12 @@ -153,3 +155,5 @@ char * strstr(const char * s1,const char
    4.13          }
    4.14          return NULL;
    4.15  }
    4.16 +
    4.17 +#endif
     5.1 --- a/linux-2.6-xen-sparse/arch/ia64/xen/drivers/Makefile	Thu May 25 15:59:18 2006 -0600
     5.2 +++ b/linux-2.6-xen-sparse/arch/ia64/xen/drivers/Makefile	Fri May 26 13:41:49 2006 -0600
     5.3 @@ -2,7 +2,6 @@
     5.4  ifneq ($(CONFIG_XEN_IA64_DOM0_VP),y)
     5.5  obj-y   += util.o
     5.6  endif
     5.7 -obj-$(CONFIG_XEN_IA64_DOM0_VP)	+= net_driver_util.o
     5.8  
     5.9  obj-y	+= core/
    5.10  #obj-y	+= char/
     6.1 --- a/linux-2.6-xen-sparse/drivers/char/tpm/tpm_xen.c	Thu May 25 15:59:18 2006 -0600
     6.2 +++ b/linux-2.6-xen-sparse/drivers/char/tpm/tpm_xen.c	Fri May 26 13:41:49 2006 -0600
     6.3 @@ -329,7 +329,7 @@ out:
     6.4   * Callback received when the backend's state changes.
     6.5   */
     6.6  static void backend_changed(struct xenbus_device *dev,
     6.7 -			    XenbusState backend_state)
     6.8 +			    enum xenbus_state backend_state)
     6.9  {
    6.10  	struct tpm_private *tp = dev->data;
    6.11  	DPRINTK("\n");
     7.1 --- a/linux-2.6-xen-sparse/drivers/xen/Makefile	Thu May 25 15:59:18 2006 -0600
     7.2 +++ b/linux-2.6-xen-sparse/drivers/xen/Makefile	Fri May 26 13:41:49 2006 -0600
     7.3 @@ -1,5 +1,4 @@
     7.4  
     7.5 -obj-y	+= net_driver_util.o
     7.6  obj-y	+= util.o
     7.7  
     7.8  obj-y	+= core/
     8.1 --- a/linux-2.6-xen-sparse/drivers/xen/balloon/balloon.c	Thu May 25 15:59:18 2006 -0600
     8.2 +++ b/linux-2.6-xen-sparse/drivers/xen/balloon/balloon.c	Fri May 26 13:41:49 2006 -0600
     8.3 @@ -67,7 +67,7 @@ static DECLARE_MUTEX(balloon_mutex);
     8.4   * Also protects non-atomic updates of current_pages and driver_pages, and
     8.5   * balloon lists.
     8.6   */
     8.7 -spinlock_t balloon_lock = SPIN_LOCK_UNLOCKED;
     8.8 +DEFINE_SPINLOCK(balloon_lock);
     8.9  
    8.10  /* We aim for 'current allocation' == 'target allocation'. */
    8.11  static unsigned long current_pages;
    8.12 @@ -360,6 +360,12 @@ static void balloon_process(void *unused
    8.13  /* Resets the Xen limit, sets new target, and kicks off processing. */
    8.14  static void set_new_target(unsigned long target)
    8.15  {
    8.16 +	unsigned long min_target;
    8.17 +
    8.18 +	/* Do not allow target to reduce below 2% of maximum memory size. */
    8.19 +	min_target = max_pfn / 50;
    8.20 +	target = max(target, min_target);
    8.21 +
    8.22  	/* No need for lock. Not read-modify-write updates. */
    8.23  	hard_limit   = ~0UL;
    8.24  	target_pages = target;
     9.1 --- a/linux-2.6-xen-sparse/drivers/xen/blkback/blkback.c	Thu May 25 15:59:18 2006 -0600
     9.2 +++ b/linux-2.6-xen-sparse/drivers/xen/blkback/blkback.c	Fri May 26 13:41:49 2006 -0600
     9.3 @@ -82,7 +82,7 @@ typedef struct {
     9.4  
     9.5  static pending_req_t *pending_reqs;
     9.6  static struct list_head pending_free;
     9.7 -static spinlock_t pending_free_lock = SPIN_LOCK_UNLOCKED;
     9.8 +static DEFINE_SPINLOCK(pending_free_lock);
     9.9  static DECLARE_WAIT_QUEUE_HEAD(pending_free_wq);
    9.10  
    9.11  #define BLKBACK_INVALID_HANDLE (~0)
    10.1 --- a/linux-2.6-xen-sparse/drivers/xen/blkback/xenbus.c	Thu May 25 15:59:18 2006 -0600
    10.2 +++ b/linux-2.6-xen-sparse/drivers/xen/blkback/xenbus.c	Fri May 26 13:41:49 2006 -0600
    10.3 @@ -247,7 +247,7 @@ static void backend_changed(struct xenbu
    10.4   * Callback received when the frontend's state changes.
    10.5   */
    10.6  static void frontend_changed(struct xenbus_device *dev,
    10.7 -			     XenbusState frontend_state)
    10.8 +			     enum xenbus_state frontend_state)
    10.9  {
   10.10  	struct backend_info *be = dev->data;
   10.11  	int err;
    11.1 --- a/linux-2.6-xen-sparse/drivers/xen/blkfront/blkfront.c	Thu May 25 15:59:18 2006 -0600
    11.2 +++ b/linux-2.6-xen-sparse/drivers/xen/blkfront/blkfront.c	Fri May 26 13:41:49 2006 -0600
    11.3 @@ -247,7 +247,7 @@ fail:
    11.4   * Callback received when the backend's state changes.
    11.5   */
    11.6  static void backend_changed(struct xenbus_device *dev,
    11.7 -			    XenbusState backend_state)
    11.8 +			    enum xenbus_state backend_state)
    11.9  {
   11.10  	struct blkfront_info *info = dev->data;
   11.11  	struct block_device *bd;
   11.12 @@ -434,7 +434,7 @@ int blkif_release(struct inode *inode, s
   11.13  		   have ignored this request initially, as the device was
   11.14  		   still mounted. */
   11.15  		struct xenbus_device * dev = info->xbdev;
   11.16 -		XenbusState state = xenbus_read_driver_state(dev->otherend);
   11.17 +		enum xenbus_state state = xenbus_read_driver_state(dev->otherend);
   11.18  
   11.19  		if (state == XenbusStateClosing)
   11.20  			blkfront_closing(dev);
    12.1 --- a/linux-2.6-xen-sparse/drivers/xen/blkfront/vbd.c	Thu May 25 15:59:18 2006 -0600
    12.2 +++ b/linux-2.6-xen-sparse/drivers/xen/blkfront/vbd.c	Fri May 26 13:41:49 2006 -0600
    12.3 @@ -93,7 +93,7 @@ static struct block_device_operations xl
    12.4  	.ioctl  = blkif_ioctl,
    12.5  };
    12.6  
    12.7 -spinlock_t blkif_io_lock = SPIN_LOCK_UNLOCKED;
    12.8 +DEFINE_SPINLOCK(blkif_io_lock);
    12.9  
   12.10  static struct xlbd_major_info *
   12.11  xlbd_alloc_major_info(int major, int minor, int index)
    13.1 --- a/linux-2.6-xen-sparse/drivers/xen/blktap/blktap.c	Thu May 25 15:59:18 2006 -0600
    13.2 +++ b/linux-2.6-xen-sparse/drivers/xen/blktap/blktap.c	Fri May 26 13:41:49 2006 -0600
    13.3 @@ -138,7 +138,7 @@ typedef struct {
    13.4   */
    13.5  static pending_req_t pending_reqs[MAX_PENDING_REQS];
    13.6  static unsigned char pending_ring[MAX_PENDING_REQS];
    13.7 -static spinlock_t pend_prod_lock = SPIN_LOCK_UNLOCKED;
    13.8 +static DEFINE_SPINLOCK(pend_prod_lock);
    13.9  /* NB. We use a different index type to differentiate from shared blk rings. */
   13.10  typedef unsigned int PEND_RING_IDX;
   13.11  #define MASK_PEND_IDX(_i) ((_i)&(MAX_PENDING_REQS-1))
    14.1 --- a/linux-2.6-xen-sparse/drivers/xen/console/console.c	Thu May 25 15:59:18 2006 -0600
    14.2 +++ b/linux-2.6-xen-sparse/drivers/xen/console/console.c	Fri May 26 13:41:49 2006 -0600
    14.3 @@ -117,14 +117,17 @@ static int __init xencons_bufsz_setup(ch
    14.4  {
    14.5  	unsigned int goal;
    14.6  	goal = simple_strtoul(str, NULL, 0);
    14.7 -	while (wbuf_size < goal)
    14.8 -		wbuf_size <<= 1;
    14.9 +	if (goal) {
   14.10 +		goal = roundup_pow_of_two(goal);
   14.11 +		if (wbuf_size < goal)
   14.12 +			wbuf_size = goal;
   14.13 +	}
   14.14  	return 1;
   14.15  }
   14.16  __setup("xencons_bufsz=", xencons_bufsz_setup);
   14.17  
   14.18  /* This lock protects accesses to the common transmit buffer. */
   14.19 -static spinlock_t xencons_lock = SPIN_LOCK_UNLOCKED;
   14.20 +static DEFINE_SPINLOCK(xencons_lock);
   14.21  
   14.22  /* Common transmit-kick routine. */
   14.23  static void __xencons_tx_flush(void);
   14.24 @@ -133,8 +136,7 @@ static struct tty_driver *xencons_driver
   14.25  
   14.26  /******************** Kernel console driver ********************************/
   14.27  
   14.28 -static void kcons_write(
   14.29 -	struct console *c, const char *s, unsigned int count)
   14.30 +static void kcons_write(struct console *c, const char *s, unsigned int count)
   14.31  {
   14.32  	int           i = 0;
   14.33  	unsigned long flags;
   14.34 @@ -155,14 +157,14 @@ static void kcons_write(
   14.35  	spin_unlock_irqrestore(&xencons_lock, flags);
   14.36  }
   14.37  
   14.38 -static void kcons_write_dom0(
   14.39 -	struct console *c, const char *s, unsigned int count)
   14.40 +static void kcons_write_dom0(struct console *c, const char *s, unsigned int count)
   14.41  {
   14.42 -	int rc;
   14.43  
   14.44 -	while ((count > 0) &&
   14.45 -	       ((rc = HYPERVISOR_console_io(
   14.46 -			CONSOLEIO_write, count, (char *)s)) > 0)) {
   14.47 +	while (count > 0) {
   14.48 +		int rc;
   14.49 +		rc = HYPERVISOR_console_io( CONSOLEIO_write, count, (char *)s);
   14.50 +		if (rc <= 0)
   14.51 +			break;
   14.52  		count -= rc;
   14.53  		s += rc;
   14.54  	}
    15.1 --- a/linux-2.6-xen-sparse/drivers/xen/core/Makefile	Thu May 25 15:59:18 2006 -0600
    15.2 +++ b/linux-2.6-xen-sparse/drivers/xen/core/Makefile	Fri May 26 13:41:49 2006 -0600
    15.3 @@ -4,8 +4,9 @@
    15.4  
    15.5  obj-y   := evtchn.o reboot.o gnttab.o features.o
    15.6  
    15.7 -obj-$(CONFIG_PROC_FS) += xen_proc.o
    15.8 -obj-$(CONFIG_NET)     += skbuff.o
    15.9 -obj-$(CONFIG_SMP)     += smpboot.o
   15.10 -obj-$(CONFIG_SYSFS)   += hypervisor_sysfs.o
   15.11 -obj-$(CONFIG_XEN_SYSFS) += xen_sysfs.o
   15.12 +obj-$(CONFIG_PROC_FS)     += xen_proc.o
   15.13 +obj-$(CONFIG_NET)         += skbuff.o
   15.14 +obj-$(CONFIG_SMP)         += smpboot.o
   15.15 +obj-$(CONFIG_HOTPLUG_CPU) += cpu_hotplug.o
   15.16 +obj-$(CONFIG_SYSFS)       += hypervisor_sysfs.o
   15.17 +obj-$(CONFIG_XEN_SYSFS)   += xen_sysfs.o
    16.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
    16.2 +++ b/linux-2.6-xen-sparse/drivers/xen/core/cpu_hotplug.c	Fri May 26 13:41:49 2006 -0600
    16.3 @@ -0,0 +1,185 @@
    16.4 +#include <linux/config.h>
    16.5 +#include <linux/init.h>
    16.6 +#include <linux/kernel.h>
    16.7 +#include <linux/sched.h>
    16.8 +#include <linux/notifier.h>
    16.9 +#include <linux/cpu.h>
   16.10 +#include <xen/cpu_hotplug.h>
   16.11 +#include <xen/xenbus.h>
   16.12 +
   16.13 +/*
   16.14 + * Set of CPUs that remote admin software will allow us to bring online.
   16.15 + * Notified to us via xenbus.
   16.16 + */
   16.17 +static cpumask_t xenbus_allowed_cpumask;
   16.18 +
   16.19 +/* Set of CPUs that local admin will allow us to bring online. */
   16.20 +static cpumask_t local_allowed_cpumask = CPU_MASK_ALL;
   16.21 +
   16.22 +static int local_cpu_hotplug_request(void)
   16.23 +{
   16.24 +	/*
   16.25 +	 * We assume a CPU hotplug request comes from local admin if it is made
   16.26 +	 * via a userspace process (i.e., one with a real mm_struct).
   16.27 +	 */
   16.28 +	return (current->mm != NULL);
   16.29 +}
   16.30 +
   16.31 +static void vcpu_hotplug(unsigned int cpu)
   16.32 +{
   16.33 +	int err;
   16.34 +	char dir[32], state[32];
   16.35 +
   16.36 +	if ((cpu >= NR_CPUS) || !cpu_possible(cpu))
   16.37 +		return;
   16.38 +
   16.39 +	sprintf(dir, "cpu/%d", cpu);
   16.40 +	err = xenbus_scanf(XBT_NULL, dir, "availability", "%s", state);
   16.41 +	if (err != 1) {
   16.42 +		printk(KERN_ERR "XENBUS: Unable to read cpu state\n");
   16.43 +		return;
   16.44 +	}
   16.45 +
   16.46 +	if (strcmp(state, "online") == 0) {
   16.47 +		cpu_set(cpu, xenbus_allowed_cpumask);
   16.48 +		(void)cpu_up(cpu);
   16.49 +	} else if (strcmp(state, "offline") == 0) {
   16.50 +		cpu_clear(cpu, xenbus_allowed_cpumask);
   16.51 +		(void)cpu_down(cpu);
   16.52 +	} else {
   16.53 +		printk(KERN_ERR "XENBUS: unknown state(%s) on CPU%d\n",
   16.54 +		       state, cpu);
   16.55 +	}
   16.56 +}
   16.57 +
   16.58 +static void handle_vcpu_hotplug_event(
   16.59 +	struct xenbus_watch *watch, const char **vec, unsigned int len)
   16.60 +{
   16.61 +	int cpu;
   16.62 +	char *cpustr;
   16.63 +	const char *node = vec[XS_WATCH_PATH];
   16.64 +
   16.65 +	if ((cpustr = strstr(node, "cpu/")) != NULL) {
   16.66 +		sscanf(cpustr, "cpu/%d", &cpu);
   16.67 +		vcpu_hotplug(cpu);
   16.68 +	}
   16.69 +}
   16.70 +
   16.71 +static int smpboot_cpu_notify(struct notifier_block *notifier,
   16.72 +			      unsigned long action, void *hcpu)
   16.73 +{
   16.74 +	int cpu = (long)hcpu;
   16.75 +
   16.76 +	/*
   16.77 +	 * We do this in a callback notifier rather than __cpu_disable()
   16.78 +	 * because local_cpu_hotplug_request() does not work in the latter
   16.79 +	 * as it's always executed from within a stopmachine kthread.
   16.80 +	 */
   16.81 +	if ((action == CPU_DOWN_PREPARE) && local_cpu_hotplug_request())
   16.82 +		cpu_clear(cpu, local_allowed_cpumask);
   16.83 +
   16.84 +	return NOTIFY_OK;
   16.85 +}
   16.86 +
   16.87 +static int setup_cpu_watcher(struct notifier_block *notifier,
   16.88 +			      unsigned long event, void *data)
   16.89 +{
   16.90 +	int i;
   16.91 +
   16.92 +	static struct xenbus_watch cpu_watch = {
   16.93 +		.node = "cpu",
   16.94 +		.callback = handle_vcpu_hotplug_event,
   16.95 +		.flags = XBWF_new_thread };
   16.96 +	(void)register_xenbus_watch(&cpu_watch);
   16.97 +
   16.98 +	if (!(xen_start_info->flags & SIF_INITDOMAIN)) {
   16.99 +		for_each_cpu(i)
  16.100 +			vcpu_hotplug(i);
  16.101 +		printk(KERN_INFO "Brought up %ld CPUs\n",
  16.102 +		       (long)num_online_cpus());
  16.103 +	}
  16.104 +
  16.105 +	return NOTIFY_DONE;
  16.106 +}
  16.107 +
  16.108 +static int __init setup_vcpu_hotplug_event(void)
  16.109 +{
  16.110 +	static struct notifier_block hotplug_cpu = {
  16.111 +		.notifier_call = smpboot_cpu_notify };
  16.112 +	static struct notifier_block xsn_cpu = {
  16.113 +		.notifier_call = setup_cpu_watcher };
  16.114 +
  16.115 +	register_cpu_notifier(&hotplug_cpu);
  16.116 +	register_xenstore_notifier(&xsn_cpu);
  16.117 +
  16.118 +	return 0;
  16.119 +}
  16.120 +
  16.121 +arch_initcall(setup_vcpu_hotplug_event);
  16.122 +
  16.123 +int smp_suspend(void)
  16.124 +{
  16.125 +	int i, err;
  16.126 +
  16.127 +	lock_cpu_hotplug();
  16.128 +
  16.129 +	/*
  16.130 +	 * Take all other CPUs offline. We hold the hotplug mutex to
  16.131 +	 * avoid other processes bringing up CPUs under our feet.
  16.132 +	 */
  16.133 +	while (num_online_cpus() > 1) {
  16.134 +		unlock_cpu_hotplug();
  16.135 +		for_each_online_cpu(i) {
  16.136 +			if (i == 0)
  16.137 +				continue;
  16.138 +			err = cpu_down(i);
  16.139 +			if (err) {
  16.140 +				printk(KERN_CRIT "Failed to take all CPUs "
  16.141 +				       "down: %d.\n", err);
  16.142 +				for_each_cpu(i)
  16.143 +					vcpu_hotplug(i);
  16.144 +				return err;
  16.145 +			}
  16.146 +		}
  16.147 +		lock_cpu_hotplug();
  16.148 +	}
  16.149 +
  16.150 +	return 0;
  16.151 +}
  16.152 +
  16.153 +void smp_resume(void)
  16.154 +{
  16.155 +	int cpu;
  16.156 +
  16.157 +	for_each_cpu(cpu)
  16.158 +		cpu_initialize_context(cpu);
  16.159 +
  16.160 +	unlock_cpu_hotplug();
  16.161 +
  16.162 +	for_each_cpu(cpu)
  16.163 +		vcpu_hotplug(cpu);
  16.164 +}
  16.165 +
  16.166 +int cpu_up_is_allowed(unsigned int cpu)
  16.167 +{
  16.168 +	int rc = 0;
  16.169 +
  16.170 +	if (local_cpu_hotplug_request()) {
  16.171 +		cpu_set(cpu, local_allowed_cpumask);
  16.172 +		if (!cpu_isset(cpu, xenbus_allowed_cpumask)) {
  16.173 +			printk("%s: attempt to bring up CPU %u disallowed by "
  16.174 +			       "remote admin.\n", __FUNCTION__, cpu);
  16.175 +			rc = -EBUSY;
  16.176 +		}
  16.177 +	} else if (!cpu_isset(cpu, local_allowed_cpumask) ||
  16.178 +		   !cpu_isset(cpu, xenbus_allowed_cpumask)) {
  16.179 +		rc = -EBUSY;
  16.180 +	}
  16.181 +
  16.182 +	return rc;
  16.183 +}
  16.184 +
  16.185 +void init_xenbus_allowed_cpumask(void)
  16.186 +{
  16.187 +	xenbus_allowed_cpumask = cpu_present_map;
  16.188 +}
    17.1 --- a/linux-2.6-xen-sparse/drivers/xen/core/evtchn.c	Thu May 25 15:59:18 2006 -0600
    17.2 +++ b/linux-2.6-xen-sparse/drivers/xen/core/evtchn.c	Fri May 26 13:41:49 2006 -0600
    17.3 @@ -51,10 +51,10 @@
    17.4   * This lock protects updates to the following mapping and reference-count
    17.5   * arrays. The lock does not need to be acquired to read the mapping tables.
    17.6   */
    17.7 -static spinlock_t irq_mapping_update_lock;
    17.8 +static DEFINE_SPINLOCK(irq_mapping_update_lock);
    17.9  
   17.10  /* IRQ <-> event-channel mappings. */
   17.11 -static int evtchn_to_irq[NR_EVENT_CHANNELS];
   17.12 +static int evtchn_to_irq[NR_EVENT_CHANNELS] = {[0 ...  NR_EVENT_CHANNELS-1] = -1};
   17.13  
   17.14  /* Packed IRQ information: binding type, sub-type index, and event channel. */
   17.15  static u32 irq_info[NR_IRQS];
   17.16 @@ -91,13 +91,13 @@ static inline unsigned int type_from_irq
   17.17  }
   17.18  
   17.19  /* IRQ <-> VIRQ mapping. */
   17.20 -DEFINE_PER_CPU(int, virq_to_irq[NR_VIRQS]);
   17.21 +DEFINE_PER_CPU(int, virq_to_irq[NR_VIRQS]) = {[0 ... NR_VIRQS-1] = -1};
   17.22  
   17.23  /* IRQ <-> IPI mapping. */
   17.24  #ifndef NR_IPIS
   17.25  #define NR_IPIS 1
   17.26  #endif
   17.27 -DEFINE_PER_CPU(int, ipi_to_irq[NR_IPIS]);
   17.28 +DEFINE_PER_CPU(int, ipi_to_irq[NR_IPIS]) = {[0 ... NR_IPIS-1] = -1};
   17.29  
   17.30  /* Reference counts for bindings to IRQs. */
   17.31  static int irq_bindcount[NR_IRQS];
   17.32 @@ -751,7 +751,9 @@ void irq_resume(void)
   17.33  		BUG_ON(irq_info[pirq_to_irq(pirq)] != IRQ_UNBOUND);
   17.34  
   17.35  	/* Secondary CPUs must have no VIRQ or IPI bindings. */
   17.36 -	for (cpu = 1; cpu < NR_CPUS; cpu++) {
   17.37 +	for_each_possible_cpu(cpu) {
   17.38 +		if (cpu == 0)
   17.39 +			continue;
   17.40  		for (virq = 0; virq < NR_VIRQS; virq++)
   17.41  			BUG_ON(per_cpu(virq_to_irq, cpu)[virq] != -1);
   17.42  		for (ipi = 0; ipi < NR_IPIS; ipi++)
   17.43 @@ -813,25 +815,12 @@ void irq_resume(void)
   17.44  void __init xen_init_IRQ(void)
   17.45  {
   17.46  	int i;
   17.47 -	int cpu;
   17.48 -
   17.49 -	spin_lock_init(&irq_mapping_update_lock);
   17.50  
   17.51  	init_evtchn_cpu_bindings();
   17.52  
   17.53 -	/* No VIRQ or IPI bindings. */
   17.54 -	for (cpu = 0; cpu < NR_CPUS; cpu++) {
   17.55 -		for (i = 0; i < NR_VIRQS; i++)
   17.56 -			per_cpu(virq_to_irq, cpu)[i] = -1;
   17.57 -		for (i = 0; i < NR_IPIS; i++)
   17.58 -			per_cpu(ipi_to_irq, cpu)[i] = -1;
   17.59 -	}
   17.60 -
   17.61 -	/* No event-channel -> IRQ mappings. */
   17.62 -	for (i = 0; i < NR_EVENT_CHANNELS; i++) {
   17.63 -		evtchn_to_irq[i] = -1;
   17.64 -		mask_evtchn(i); /* No event channels are 'live' right now. */
   17.65 -	}
   17.66 +	/* No event channels are 'live' right now. */
   17.67 +	for (i = 0; i < NR_EVENT_CHANNELS; i++)
   17.68 +		mask_evtchn(i);
   17.69  
   17.70  	/* No IRQ -> event-channel mappings. */
   17.71  	for (i = 0; i < NR_IRQS; i++)
    18.1 --- a/linux-2.6-xen-sparse/drivers/xen/core/gnttab.c	Thu May 25 15:59:18 2006 -0600
    18.2 +++ b/linux-2.6-xen-sparse/drivers/xen/core/gnttab.c	Fri May 26 13:41:49 2006 -0600
    18.3 @@ -38,7 +38,6 @@
    18.4  #include <linux/vmalloc.h>
    18.5  #include <asm/pgtable.h>
    18.6  #include <xen/interface/xen.h>
    18.7 -#include <asm/fixmap.h>
    18.8  #include <asm/uaccess.h>
    18.9  #include <xen/gnttab.h>
   18.10  #include <asm/synch_bitops.h>
   18.11 @@ -81,7 +80,7 @@ EXPORT_SYMBOL_GPL(gnttab_grant_foreign_t
   18.12  static grant_ref_t gnttab_list[NR_GRANT_ENTRIES];
   18.13  static int gnttab_free_count;
   18.14  static grant_ref_t gnttab_free_head;
   18.15 -static spinlock_t gnttab_list_lock = SPIN_LOCK_UNLOCKED;
   18.16 +static DEFINE_SPINLOCK(gnttab_list_lock);
   18.17  
   18.18  static grant_entry_t *shared = NULL;
   18.19  
    19.1 --- a/linux-2.6-xen-sparse/drivers/xen/core/reboot.c	Thu May 25 15:59:18 2006 -0600
    19.2 +++ b/linux-2.6-xen-sparse/drivers/xen/core/reboot.c	Fri May 26 13:41:49 2006 -0600
    19.3 @@ -17,6 +17,7 @@
    19.4  #include <linux/kthread.h>
    19.5  #include <xen/gnttab.h>
    19.6  #include <xen/xencons.h>
    19.7 +#include <xen/cpu_hotplug.h>
    19.8  
    19.9  #if defined(__i386__) || defined(__x86_64__)
   19.10  /*
   19.11 @@ -81,14 +82,6 @@ static int shutting_down = SHUTDOWN_INVA
   19.12  static void __shutdown_handler(void *unused);
   19.13  static DECLARE_WORK(shutdown_work, __shutdown_handler, NULL);
   19.14  
   19.15 -#ifdef CONFIG_SMP
   19.16 -int  smp_suspend(void);
   19.17 -void smp_resume(void);
   19.18 -#else
   19.19 -#define smp_suspend()	(0)
   19.20 -#define smp_resume()	((void)0)
   19.21 -#endif
   19.22 -
   19.23  /* Ensure we run on the idle task page tables so that we will
   19.24     switch page tables before running user space. This is needed
   19.25     on architectures with separate kernel and user page tables
    20.1 --- a/linux-2.6-xen-sparse/drivers/xen/core/smpboot.c	Thu May 25 15:59:18 2006 -0600
    20.2 +++ b/linux-2.6-xen-sparse/drivers/xen/core/smpboot.c	Fri May 26 13:41:49 2006 -0600
    20.3 @@ -23,6 +23,7 @@
    20.4  #include <asm/pgalloc.h>
    20.5  #include <xen/evtchn.h>
    20.6  #include <xen/interface/vcpu.h>
    20.7 +#include <xen/cpu_hotplug.h>
    20.8  #include <xen/xenbus.h>
    20.9  
   20.10  #ifdef CONFIG_SMP_ALTERNATIVES
   20.11 @@ -79,15 +80,6 @@ EXPORT_SYMBOL(x86_cpu_to_apicid);
   20.12  unsigned int maxcpus = NR_CPUS;
   20.13  #endif
   20.14  
   20.15 -/*
   20.16 - * Set of CPUs that remote admin software will allow us to bring online.
   20.17 - * Notified to us via xenbus.
   20.18 - */
   20.19 -static cpumask_t xenbus_allowed_cpumask;
   20.20 -
   20.21 -/* Set of CPUs that local admin will allow us to bring online. */
   20.22 -static cpumask_t local_allowed_cpumask = CPU_MASK_ALL;
   20.23 -
   20.24  void __init prefill_possible_map(void)
   20.25  {
   20.26  	int i, rc;
   20.27 @@ -167,17 +159,17 @@ static void cpu_bringup(void)
   20.28  	cpu_idle();
   20.29  }
   20.30  
   20.31 -static void vcpu_prepare(int vcpu)
   20.32 +void cpu_initialize_context(unsigned int cpu)
   20.33  {
   20.34  	vcpu_guest_context_t ctxt;
   20.35 -	struct task_struct *idle = idle_task(vcpu);
   20.36 +	struct task_struct *idle = idle_task(cpu);
   20.37  #ifdef __x86_64__
   20.38 -	struct desc_ptr *gdt_descr = &cpu_gdt_descr[vcpu];
   20.39 +	struct desc_ptr *gdt_descr = &cpu_gdt_descr[cpu];
   20.40  #else
   20.41 -	struct Xgt_desc_struct *gdt_descr = &per_cpu(cpu_gdt_descr, vcpu);
   20.42 +	struct Xgt_desc_struct *gdt_descr = &per_cpu(cpu_gdt_descr, cpu);
   20.43  #endif
   20.44  
   20.45 -	if (vcpu == 0)
   20.46 +	if (cpu == 0)
   20.47  		return;
   20.48  
   20.49  	memset(&ctxt, 0, sizeof(ctxt));
   20.50 @@ -226,10 +218,10 @@ static void vcpu_prepare(int vcpu)
   20.51  
   20.52  	ctxt.ctrlreg[3] = virt_to_mfn(init_level4_pgt) << PAGE_SHIFT;
   20.53  
   20.54 -	ctxt.gs_base_kernel = (unsigned long)(cpu_pda(vcpu));
   20.55 +	ctxt.gs_base_kernel = (unsigned long)(cpu_pda(cpu));
   20.56  #endif
   20.57  
   20.58 -	BUG_ON(HYPERVISOR_vcpu_op(VCPUOP_initialise, vcpu, &ctxt));
   20.59 +	BUG_ON(HYPERVISOR_vcpu_op(VCPUOP_initialise, cpu, &ctxt));
   20.60  }
   20.61  
   20.62  void __init smp_prepare_cpus(unsigned int max_cpus)
   20.63 @@ -304,10 +296,10 @@ void __init smp_prepare_cpus(unsigned in
   20.64  		cpu_set(cpu, cpu_present_map);
   20.65  #endif
   20.66  
   20.67 -		vcpu_prepare(cpu);
   20.68 +		cpu_initialize_context(cpu);
   20.69  	}
   20.70  
   20.71 -	xenbus_allowed_cpumask = cpu_present_map;
   20.72 +	init_xenbus_allowed_cpumask();
   20.73  
   20.74  	/* Currently, Xen gives no dynamic NUMA/HT info. */
   20.75  	for (cpu = 1; cpu < NR_CPUS; cpu++) {
   20.76 @@ -332,15 +324,6 @@ void __devinit smp_prepare_boot_cpu(void
   20.77  	cpu_online_map   = cpumask_of_cpu(0);
   20.78  }
   20.79  
   20.80 -static int local_cpu_hotplug_request(void)
   20.81 -{
   20.82 -	/*
   20.83 -	 * We assume a CPU hotplug request comes from local admin if it is made
   20.84 -	 * via a userspace process (i.e., one with a real mm_struct).
   20.85 -	 */
   20.86 -	return (current->mm != NULL);
   20.87 -}
   20.88 -
   20.89  #ifdef CONFIG_HOTPLUG_CPU
   20.90  
   20.91  /*
   20.92 @@ -355,141 +338,6 @@ static int __init initialize_cpu_present
   20.93  }
   20.94  core_initcall(initialize_cpu_present_map);
   20.95  
   20.96 -static void vcpu_hotplug(unsigned int cpu)
   20.97 -{
   20.98 -	int err;
   20.99 -	char dir[32], state[32];
  20.100 -
  20.101 -	if ((cpu >= NR_CPUS) || !cpu_possible(cpu))
  20.102 -		return;
  20.103 -
  20.104 -	sprintf(dir, "cpu/%d", cpu);
  20.105 -	err = xenbus_scanf(XBT_NULL, dir, "availability", "%s", state);
  20.106 -	if (err != 1) {
  20.107 -		printk(KERN_ERR "XENBUS: Unable to read cpu state\n");
  20.108 -		return;
  20.109 -	}
  20.110 -
  20.111 -	if (strcmp(state, "online") == 0) {
  20.112 -		cpu_set(cpu, xenbus_allowed_cpumask);
  20.113 -		(void)cpu_up(cpu);
  20.114 -	} else if (strcmp(state, "offline") == 0) {
  20.115 -		cpu_clear(cpu, xenbus_allowed_cpumask);
  20.116 -		(void)cpu_down(cpu);
  20.117 -	} else {
  20.118 -		printk(KERN_ERR "XENBUS: unknown state(%s) on CPU%d\n",
  20.119 -		       state, cpu);
  20.120 -	}
  20.121 -}
  20.122 -
  20.123 -static void handle_vcpu_hotplug_event(
  20.124 -	struct xenbus_watch *watch, const char **vec, unsigned int len)
  20.125 -{
  20.126 -	int cpu;
  20.127 -	char *cpustr;
  20.128 -	const char *node = vec[XS_WATCH_PATH];
  20.129 -
  20.130 -	if ((cpustr = strstr(node, "cpu/")) != NULL) {
  20.131 -		sscanf(cpustr, "cpu/%d", &cpu);
  20.132 -		vcpu_hotplug(cpu);
  20.133 -	}
  20.134 -}
  20.135 -
  20.136 -static int smpboot_cpu_notify(struct notifier_block *notifier,
  20.137 -			      unsigned long action, void *hcpu)
  20.138 -{
  20.139 -	int cpu = (long)hcpu;
  20.140 -
  20.141 -	/*
  20.142 -	 * We do this in a callback notifier rather than __cpu_disable()
  20.143 -	 * because local_cpu_hotplug_request() does not work in the latter
  20.144 -	 * as it's always executed from within a stopmachine kthread.
  20.145 -	 */
  20.146 -	if ((action == CPU_DOWN_PREPARE) && local_cpu_hotplug_request())
  20.147 -		cpu_clear(cpu, local_allowed_cpumask);
  20.148 -
  20.149 -	return NOTIFY_OK;
  20.150 -}
  20.151 -
  20.152 -static int setup_cpu_watcher(struct notifier_block *notifier,
  20.153 -			      unsigned long event, void *data)
  20.154 -{
  20.155 -	int i;
  20.156 -
  20.157 -	static struct xenbus_watch cpu_watch = {
  20.158 -		.node = "cpu",
  20.159 -		.callback = handle_vcpu_hotplug_event,
  20.160 -		.flags = XBWF_new_thread };
  20.161 -	(void)register_xenbus_watch(&cpu_watch);
  20.162 -
  20.163 -	if (!(xen_start_info->flags & SIF_INITDOMAIN)) {
  20.164 -		for_each_cpu(i)
  20.165 -			vcpu_hotplug(i);
  20.166 -		printk(KERN_INFO "Brought up %ld CPUs\n",
  20.167 -		       (long)num_online_cpus());
  20.168 -	}
  20.169 -
  20.170 -	return NOTIFY_DONE;
  20.171 -}
  20.172 -
  20.173 -static int __init setup_vcpu_hotplug_event(void)
  20.174 -{
  20.175 -	static struct notifier_block hotplug_cpu = {
  20.176 -		.notifier_call = smpboot_cpu_notify };
  20.177 -	static struct notifier_block xsn_cpu = {
  20.178 -		.notifier_call = setup_cpu_watcher };
  20.179 -
  20.180 -	register_cpu_notifier(&hotplug_cpu);
  20.181 -	register_xenstore_notifier(&xsn_cpu);
  20.182 -
  20.183 -	return 0;
  20.184 -}
  20.185 -
  20.186 -arch_initcall(setup_vcpu_hotplug_event);
  20.187 -
  20.188 -int smp_suspend(void)
  20.189 -{
  20.190 -	int i, err;
  20.191 -
  20.192 -	lock_cpu_hotplug();
  20.193 -
  20.194 -	/*
  20.195 -	 * Take all other CPUs offline. We hold the hotplug mutex to
  20.196 -	 * avoid other processes bringing up CPUs under our feet.
  20.197 -	 */
  20.198 -	while (num_online_cpus() > 1) {
  20.199 -		unlock_cpu_hotplug();
  20.200 -		for_each_online_cpu(i) {
  20.201 -			if (i == 0)
  20.202 -				continue;
  20.203 -			err = cpu_down(i);
  20.204 -			if (err) {
  20.205 -				printk(KERN_CRIT "Failed to take all CPUs "
  20.206 -				       "down: %d.\n", err);
  20.207 -				for_each_cpu(i)
  20.208 -					vcpu_hotplug(i);
  20.209 -				return err;
  20.210 -			}
  20.211 -		}
  20.212 -		lock_cpu_hotplug();
  20.213 -	}
  20.214 -
  20.215 -	return 0;
  20.216 -}
  20.217 -
  20.218 -void smp_resume(void)
  20.219 -{
  20.220 -	int i;
  20.221 -
  20.222 -	for_each_cpu(i)
  20.223 -		vcpu_prepare(i);
  20.224 -
  20.225 -	unlock_cpu_hotplug();
  20.226 -
  20.227 -	for_each_cpu(i)
  20.228 -		vcpu_hotplug(i);
  20.229 -}
  20.230 -
  20.231  static void
  20.232  remove_siblinginfo(int cpu)
  20.233  {
  20.234 @@ -536,20 +384,6 @@ void __cpu_die(unsigned int cpu)
  20.235  
  20.236  #else /* !CONFIG_HOTPLUG_CPU */
  20.237  
  20.238 -int smp_suspend(void)
  20.239 -{
  20.240 -	if (num_online_cpus() > 1) {
  20.241 -		printk(KERN_WARNING "Can't suspend SMP guests "
  20.242 -		       "without CONFIG_HOTPLUG_CPU\n");
  20.243 -		return -EOPNOTSUPP;
  20.244 -	}
  20.245 -	return 0;
  20.246 -}
  20.247 -
  20.248 -void smp_resume(void)
  20.249 -{
  20.250 -}
  20.251 -
  20.252  int __cpu_disable(void)
  20.253  {
  20.254  	return -ENOSYS;
  20.255 @@ -566,17 +400,9 @@ int __devinit __cpu_up(unsigned int cpu)
  20.256  {
  20.257  	int rc;
  20.258  
  20.259 -	if (local_cpu_hotplug_request()) {
  20.260 -		cpu_set(cpu, local_allowed_cpumask);
  20.261 -		if (!cpu_isset(cpu, xenbus_allowed_cpumask)) {
  20.262 -			printk("%s: attempt to bring up CPU %u disallowed by "
  20.263 -			       "remote admin.\n", __FUNCTION__, cpu);
  20.264 -			return -EBUSY;
  20.265 -		}
  20.266 -	} else if (!cpu_isset(cpu, local_allowed_cpumask) ||
  20.267 -		   !cpu_isset(cpu, xenbus_allowed_cpumask)) {
  20.268 -		return -EBUSY;
  20.269 -	}
  20.270 +	rc = cpu_up_is_allowed(cpu);
  20.271 +	if (rc)
  20.272 +		return rc;
  20.273  
  20.274  #ifdef CONFIG_SMP_ALTERNATIVES
  20.275  	if (num_online_cpus() == 1)
  20.276 @@ -591,8 +417,7 @@ int __devinit __cpu_up(unsigned int cpu)
  20.277  	cpu_set(cpu, cpu_online_map);
  20.278  
  20.279  	rc = HYPERVISOR_vcpu_op(VCPUOP_up, cpu, NULL);
  20.280 -	if (rc != 0)
  20.281 -		BUG();
  20.282 +	BUG_ON(rc);
  20.283  
  20.284  	return 0;
  20.285  }
    21.1 --- a/linux-2.6-xen-sparse/drivers/xen/net_driver_util.c	Thu May 25 15:59:18 2006 -0600
    21.2 +++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
    21.3 @@ -1,58 +0,0 @@
    21.4 -/*****************************************************************************
    21.5 - *
    21.6 - * Utility functions for Xen network devices.
    21.7 - *
    21.8 - * Copyright (c) 2005 XenSource Ltd.
    21.9 - * 
   21.10 - * This program is free software; you can redistribute it and/or
   21.11 - * modify it under the terms of the GNU General Public License version 2
   21.12 - * as published by the Free Software Foundation; or, when distributed
   21.13 - * separately from the Linux kernel or incorporated into other
   21.14 - * software packages, subject to the following license:
   21.15 - * 
   21.16 - * Permission is hereby granted, free of charge, to any person obtaining a
   21.17 - * copy of this source file (the "Software"), to deal in the Software without
   21.18 - * restriction, including without limitation the rights to use, copy, modify,
   21.19 - * merge, publish, distribute, sublicense, and/or sell copies of the Software,
   21.20 - * and to permit persons to whom the Software is furnished to do so, subject
   21.21 - * to the following conditions:
   21.22 - * 
   21.23 - * The above copyright notice and this permission notice shall be included in
   21.24 - * all copies or substantial portions of the Software.
   21.25 - * 
   21.26 - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
   21.27 - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
   21.28 - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
   21.29 - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
   21.30 - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
   21.31 - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
   21.32 - * DEALINGS IN THE SOFTWARE.
   21.33 - */
   21.34 -
   21.35 -#include <linux/if_ether.h>
   21.36 -#include <linux/err.h>
   21.37 -#include <linux/module.h>
   21.38 -#include <xen/net_driver_util.h>
   21.39 -
   21.40 -
   21.41 -int xen_net_read_mac(struct xenbus_device *dev, u8 mac[])
   21.42 -{
   21.43 -	char *s;
   21.44 -	int i;
   21.45 -	char *e;
   21.46 -	char *macstr = xenbus_read(XBT_NULL, dev->nodename, "mac", NULL);
   21.47 -	if (IS_ERR(macstr))
   21.48 -		return PTR_ERR(macstr);
   21.49 -	s = macstr;
   21.50 -	for (i = 0; i < ETH_ALEN; i++) {
   21.51 -		mac[i] = simple_strtoul(s, &e, 16);
   21.52 -		if (s == e || (e[0] != ':' && e[0] != 0)) {
   21.53 -			kfree(macstr);
   21.54 -			return -ENOENT;
   21.55 -		}
   21.56 -		s = &e[1];
   21.57 -	}
   21.58 -	kfree(macstr);
   21.59 -	return 0;
   21.60 -}
   21.61 -EXPORT_SYMBOL_GPL(xen_net_read_mac);
    22.1 --- a/linux-2.6-xen-sparse/drivers/xen/netback/netback.c	Thu May 25 15:59:18 2006 -0600
    22.2 +++ b/linux-2.6-xen-sparse/drivers/xen/netback/netback.c	Fri May 26 13:41:49 2006 -0600
    22.3 @@ -99,7 +99,7 @@ static spinlock_t net_schedule_list_lock
    22.4  #define MAX_MFN_ALLOC 64
    22.5  static unsigned long mfn_list[MAX_MFN_ALLOC];
    22.6  static unsigned int alloc_index = 0;
    22.7 -static spinlock_t mfn_lock = SPIN_LOCK_UNLOCKED;
    22.8 +static DEFINE_SPINLOCK(mfn_lock);
    22.9  
   22.10  static unsigned long alloc_mfn(void)
   22.11  {
   22.12 @@ -691,7 +691,7 @@ static void net_tx_action(unsigned long 
   22.13  
   22.14  static void netif_idx_release(u16 pending_idx)
   22.15  {
   22.16 -	static spinlock_t _lock = SPIN_LOCK_UNLOCKED;
   22.17 +	static DEFINE_SPINLOCK(_lock);
   22.18  	unsigned long flags;
   22.19  
   22.20  	spin_lock_irqsave(&_lock, flags);
    23.1 --- a/linux-2.6-xen-sparse/drivers/xen/netback/xenbus.c	Thu May 25 15:59:18 2006 -0600
    23.2 +++ b/linux-2.6-xen-sparse/drivers/xen/netback/xenbus.c	Fri May 26 13:41:49 2006 -0600
    23.3 @@ -17,37 +17,31 @@
    23.4      Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
    23.5  */
    23.6  
    23.7 -
    23.8  #include <stdarg.h>
    23.9  #include <linux/module.h>
   23.10  #include <xen/xenbus.h>
   23.11 -#include <xen/net_driver_util.h>
   23.12  #include "common.h"
   23.13  
   23.14 -
   23.15  #if 0
   23.16  #undef DPRINTK
   23.17  #define DPRINTK(fmt, args...) \
   23.18      printk("netback/xenbus (%s:%d) " fmt ".\n", __FUNCTION__, __LINE__, ##args)
   23.19  #endif
   23.20  
   23.21 -
   23.22  struct backend_info
   23.23  {
   23.24  	struct xenbus_device *dev;
   23.25  	netif_t *netif;
   23.26  	struct xenbus_watch backend_watch;
   23.27 -	XenbusState frontend_state;
   23.28 +	enum xenbus_state frontend_state;
   23.29  };
   23.30  
   23.31 -
   23.32  static int connect_rings(struct backend_info *);
   23.33  static void connect(struct backend_info *);
   23.34  static void maybe_connect(struct backend_info *);
   23.35  static void backend_changed(struct xenbus_watch *, const char **,
   23.36  			    unsigned int);
   23.37  
   23.38 -
   23.39  static int netback_remove(struct xenbus_device *dev)
   23.40  {
   23.41  	struct backend_info *be = dev->data;
   23.42 @@ -191,7 +185,7 @@ static void backend_changed(struct xenbu
   23.43   * Callback received when the frontend's state changes.
   23.44   */
   23.45  static void frontend_changed(struct xenbus_device *dev,
   23.46 -			     XenbusState frontend_state)
   23.47 +			     enum xenbus_state frontend_state)
   23.48  {
   23.49  	struct backend_info *be = dev->data;
   23.50  
   23.51 @@ -273,6 +267,27 @@ static void xen_net_read_rate(struct xen
   23.52  	kfree(ratestr);
   23.53  }
   23.54  
   23.55 +static int xen_net_read_mac(struct xenbus_device *dev, u8 mac[])
   23.56 +{
   23.57 +	char *s, *e, *macstr;
   23.58 +	int i;
   23.59 +
   23.60 +	macstr = s = xenbus_read(XBT_NULL, dev->nodename, "mac", NULL);
   23.61 +	if (IS_ERR(macstr))
   23.62 +		return PTR_ERR(macstr);
   23.63 +
   23.64 +	for (i = 0; i < ETH_ALEN; i++) {
   23.65 +		mac[i] = simple_strtoul(s, &e, 16);
   23.66 +		if ((s == e) || (*e != ((i == ETH_ALEN-1) ? '\0' : ':'))) {
   23.67 +			kfree(macstr);
   23.68 +			return -ENOENT;
   23.69 +		}
   23.70 +		s = e+1;
   23.71 +	}
   23.72 +
   23.73 +	kfree(macstr);
   23.74 +	return 0;
   23.75 +}
   23.76  
   23.77  static void connect(struct backend_info *be)
   23.78  {
    24.1 --- a/linux-2.6-xen-sparse/drivers/xen/netfront/netfront.c	Thu May 25 15:59:18 2006 -0600
    24.2 +++ b/linux-2.6-xen-sparse/drivers/xen/netfront/netfront.c	Fri May 26 13:41:49 2006 -0600
    24.3 @@ -60,7 +60,6 @@
    24.4  #include <asm/uaccess.h>
    24.5  #include <xen/interface/grant_table.h>
    24.6  #include <xen/gnttab.h>
    24.7 -#include <xen/net_driver_util.h>
    24.8  
    24.9  #define GRANT_INVALID_REF	0
   24.10  
   24.11 @@ -89,12 +88,6 @@ struct netfront_info {
   24.12  	unsigned int handle;
   24.13  	unsigned int evtchn, irq;
   24.14  
   24.15 -	/* What is the status of our connection to the remote backend? */
   24.16 -#define BEST_CLOSED       0
   24.17 -#define BEST_DISCONNECTED 1
   24.18 -#define BEST_CONNECTED    2
   24.19 -	unsigned int backend_state;
   24.20 -
   24.21  	/* Receive-ring batched refills. */
   24.22  #define RX_MIN_TARGET 8
   24.23  #define RX_DFL_MIN_TARGET 64
   24.24 @@ -144,14 +137,6 @@ static inline unsigned short get_id_from
   24.25  	return id;
   24.26  }
   24.27  
   24.28 -#ifdef DEBUG
   24.29 -static const char *be_state_name[] = {
   24.30 -	[BEST_CLOSED]       = "closed",
   24.31 -	[BEST_DISCONNECTED] = "disconnected",
   24.32 -	[BEST_CONNECTED]    = "connected",
   24.33 -};
   24.34 -#endif
   24.35 -
   24.36  #define DPRINTK(fmt, args...) pr_debug("netfront (%s:%d) " fmt, \
   24.37                                         __FUNCTION__, __LINE__, ##args)
   24.38  #define IPRINTK(fmt, args...)				\
   24.39 @@ -247,6 +232,27 @@ static int netfront_resume(struct xenbus
   24.40  	return talk_to_backend(dev, info);
   24.41  }
   24.42  
   24.43 +static int xen_net_read_mac(struct xenbus_device *dev, u8 mac[])
   24.44 +{
   24.45 +	char *s, *e, *macstr;
   24.46 +	int i;
   24.47 +
   24.48 +	macstr = s = xenbus_read(XBT_NULL, dev->nodename, "mac", NULL);
   24.49 +	if (IS_ERR(macstr))
   24.50 +		return PTR_ERR(macstr);
   24.51 +
   24.52 +	for (i = 0; i < ETH_ALEN; i++) {
   24.53 +		mac[i] = simple_strtoul(s, &e, 16);
   24.54 +		if ((s == e) || (*e != ((i == ETH_ALEN-1) ? '\0' : ':'))) {
   24.55 +			kfree(macstr);
   24.56 +			return -ENOENT;
   24.57 +		}
   24.58 +		s = e+1;
   24.59 +	}
   24.60 +
   24.61 +	kfree(macstr);
   24.62 +	return 0;
   24.63 +}
   24.64  
   24.65  /* Common code used when first setting up, and when resuming. */
   24.66  static int talk_to_backend(struct xenbus_device *dev,
   24.67 @@ -342,7 +348,6 @@ static int setup_device(struct xenbus_de
   24.68  	}
   24.69  	memset(txs, 0, PAGE_SIZE);
   24.70  	memset(rxs, 0, PAGE_SIZE);
   24.71 -	info->backend_state = BEST_DISCONNECTED;
   24.72  
   24.73  	SHARED_RING_INIT(txs);
   24.74  	FRONT_RING_INIT(&info->tx, txs, PAGE_SIZE);
   24.75 @@ -384,7 +389,7 @@ static int setup_device(struct xenbus_de
   24.76   * Callback received when the backend's state changes.
   24.77   */
   24.78  static void backend_changed(struct xenbus_device *dev,
   24.79 -			    XenbusState backend_state)
   24.80 +			    enum xenbus_state backend_state)
   24.81  {
   24.82  	DPRINTK("\n");
   24.83  
   24.84 @@ -465,7 +470,7 @@ static void network_tx_buf_gc(struct net
   24.85  	struct netfront_info *np = netdev_priv(dev);
   24.86  	struct sk_buff *skb;
   24.87  
   24.88 -	if (np->backend_state != BEST_CONNECTED)
   24.89 +	if (unlikely(!netif_carrier_ok(dev)))
   24.90  		return;
   24.91  
   24.92  	do {
   24.93 @@ -527,7 +532,7 @@ static void network_alloc_rx_buffers(str
   24.94  	struct xen_memory_reservation reservation;
   24.95  	grant_ref_t ref;
   24.96  
   24.97 -	if (unlikely(np->backend_state != BEST_CONNECTED))
   24.98 +	if (unlikely(!netif_carrier_ok(dev)))
   24.99  		return;
  24.100  
  24.101  	/*
  24.102 @@ -662,7 +667,7 @@ static int network_start_xmit(struct sk_
  24.103  
  24.104  	spin_lock_irq(&np->tx_lock);
  24.105  
  24.106 -	if (np->backend_state != BEST_CONNECTED) {
  24.107 +	if (unlikely(!netif_carrier_ok(dev))) {
  24.108  		spin_unlock_irq(&np->tx_lock);
  24.109  		goto drop;
  24.110  	}
  24.111 @@ -748,7 +753,7 @@ static int netif_poll(struct net_device 
  24.112  
  24.113  	spin_lock(&np->rx_lock);
  24.114  
  24.115 -	if (np->backend_state != BEST_CONNECTED) {
  24.116 +	if (unlikely(!netif_carrier_ok(dev))) {
  24.117  		spin_unlock(&np->rx_lock);
  24.118  		return 0;
  24.119  	}
  24.120 @@ -1041,7 +1046,7 @@ static void network_connect(struct net_d
  24.121  	 * domain a kick because we've probably just requeued some
  24.122  	 * packets.
  24.123  	 */
  24.124 -	np->backend_state = BEST_CONNECTED;
  24.125 +	netif_carrier_on(dev);
  24.126  	notify_remote_via_irq(np->irq);
  24.127  	network_tx_buf_gc(dev);
  24.128  
  24.129 @@ -1055,7 +1060,7 @@ static void show_device(struct netfront_
  24.130  	if (np) {
  24.131  		IPRINTK("<vif handle=%u %s(%s) evtchn=%u tx=%p rx=%p>\n",
  24.132  			np->handle,
  24.133 -			be_state_name[np->backend_state],
  24.134 +			netif_carrier_ok(np->netdev) ? "on" : "off",
  24.135  			netif_running(np->netdev) ? "open" : "closed",
  24.136  			np->evtchn,
  24.137  			np->tx,
  24.138 @@ -1241,10 +1246,11 @@ static struct net_device * __devinit cre
  24.139  	}
  24.140  
  24.141  	np                = netdev_priv(netdev);
  24.142 -	np->backend_state = BEST_CLOSED;
  24.143  	np->handle        = handle;
  24.144  	np->xbdev         = dev;
  24.145  
  24.146 +	netif_carrier_off(netdev);
  24.147 +
  24.148  	spin_lock_init(&np->tx_lock);
  24.149  	spin_lock_init(&np->rx_lock);
  24.150  
  24.151 @@ -1392,7 +1398,7 @@ static void netif_disconnect_backend(str
  24.152  	/* Stop old i/f to prevent errors whilst we rebuild the state. */
  24.153  	spin_lock_irq(&info->tx_lock);
  24.154  	spin_lock(&info->rx_lock);
  24.155 -	info->backend_state = BEST_DISCONNECTED;
  24.156 +	netif_carrier_off(info->netdev);
  24.157  	spin_unlock(&info->rx_lock);
  24.158  	spin_unlock_irq(&info->tx_lock);
  24.159  
    25.1 --- a/linux-2.6-xen-sparse/drivers/xen/pciback/xenbus.c	Thu May 25 15:59:18 2006 -0600
    25.2 +++ b/linux-2.6-xen-sparse/drivers/xen/pciback/xenbus.c	Fri May 26 13:41:49 2006 -0600
    25.3 @@ -166,7 +166,7 @@ static int pciback_attach(struct pciback
    25.4  }
    25.5  
    25.6  static void pciback_frontend_changed(struct xenbus_device *xdev,
    25.7 -				     XenbusState fe_state)
    25.8 +				     enum xenbus_state fe_state)
    25.9  {
   25.10  	struct pciback_device *pdev = xdev->data;
   25.11  
    26.1 --- a/linux-2.6-xen-sparse/drivers/xen/pcifront/xenbus.c	Thu May 25 15:59:18 2006 -0600
    26.2 +++ b/linux-2.6-xen-sparse/drivers/xen/pcifront/xenbus.c	Fri May 26 13:41:49 2006 -0600
    26.3 @@ -196,7 +196,7 @@ static int pcifront_try_connect(struct p
    26.4  static int pcifront_try_disconnect(struct pcifront_device *pdev)
    26.5  {
    26.6  	int err = 0;
    26.7 -	XenbusState prev_state;
    26.8 +	enum xenbus_state prev_state;
    26.9  
   26.10  	spin_lock(&pdev->dev_lock);
   26.11  
   26.12 @@ -214,7 +214,7 @@ static int pcifront_try_disconnect(struc
   26.13  }
   26.14  
   26.15  static void pcifront_backend_changed(struct xenbus_device *xdev,
   26.16 -				     XenbusState be_state)
   26.17 +				     enum xenbus_state be_state)
   26.18  {
   26.19  	struct pcifront_device *pdev = xdev->data;
   26.20  
    27.1 --- a/linux-2.6-xen-sparse/drivers/xen/privcmd/privcmd.c	Thu May 25 15:59:18 2006 -0600
    27.2 +++ b/linux-2.6-xen-sparse/drivers/xen/privcmd/privcmd.c	Fri May 26 13:41:49 2006 -0600
    27.3 @@ -159,10 +159,6 @@ static int privcmd_ioctl(struct inode *i
    27.4  	break;
    27.5  
    27.6  	case IOCTL_PRIVCMD_MMAPBATCH: {
    27.7 -#ifndef __ia64__
    27.8 -		mmu_update_t u;
    27.9 -		uint64_t ptep;
   27.10 -#endif
   27.11  		privcmd_mmapbatch_t m;
   27.12  		struct vm_area_struct *vma = NULL;
   27.13  		unsigned long __user *p;
   27.14 @@ -200,24 +196,12 @@ static int privcmd_ioctl(struct inode *i
   27.15  		for (i = 0; i < m.num; i++, addr += PAGE_SIZE, p++) {
   27.16  			if (get_user(mfn, p))
   27.17  				return -EFAULT;
   27.18 -#ifdef __ia64__
   27.19 +
   27.20  			ret = direct_remap_pfn_range(vma, addr & PAGE_MASK,
   27.21 -						     mfn, 1 << PAGE_SHIFT,
   27.22 +						     mfn, PAGE_SIZE,
   27.23  						     vma->vm_page_prot, m.dom);
   27.24  			if (ret < 0)
   27.25 -			    goto batch_err;
   27.26 -#else
   27.27 -
   27.28 -			ret = create_lookup_pte_addr(vma->vm_mm, addr, &ptep);
   27.29 -			if (ret)
   27.30 -				goto batch_err;
   27.31 -
   27.32 -			u.val = pte_val_ma(pfn_pte_ma(mfn, vma->vm_page_prot));
   27.33 -			u.ptr = ptep;
   27.34 -
   27.35 -			if (HYPERVISOR_mmu_update(&u, 1, NULL, m.dom) < 0)
   27.36  				put_user(0xF0000000 | mfn, p);
   27.37 -#endif
   27.38  		}
   27.39  
   27.40  		ret = 0;
   27.41 @@ -283,6 +267,9 @@ static int __init privcmd_init(void)
   27.42  	set_bit(__HYPERVISOR_mmuext_op,        hypercall_permission_map);
   27.43  	set_bit(__HYPERVISOR_xen_version,      hypercall_permission_map);
   27.44  	set_bit(__HYPERVISOR_sched_op,         hypercall_permission_map);
   27.45 +	set_bit(__HYPERVISOR_sched_op_compat,  hypercall_permission_map);
   27.46 +	set_bit(__HYPERVISOR_event_channel_op_compat,
   27.47 +		hypercall_permission_map);
   27.48  
   27.49  	privcmd_intf = create_xen_proc_entry("privcmd", 0400);
   27.50  	if (privcmd_intf != NULL)
    28.1 --- a/linux-2.6-xen-sparse/drivers/xen/tpmback/xenbus.c	Thu May 25 15:59:18 2006 -0600
    28.2 +++ b/linux-2.6-xen-sparse/drivers/xen/tpmback/xenbus.c	Fri May 26 13:41:49 2006 -0600
    28.3 @@ -34,7 +34,7 @@ struct backend_info
    28.4  
    28.5  	/* watch front end for changes */
    28.6  	struct xenbus_watch backend_watch;
    28.7 -	XenbusState frontend_state;
    28.8 +	enum xenbus_state frontend_state;
    28.9  };
   28.10  
   28.11  static void maybe_connect(struct backend_info *be);
   28.12 @@ -43,7 +43,7 @@ static int connect_ring(struct backend_i
   28.13  static void backend_changed(struct xenbus_watch *watch,
   28.14                              const char **vec, unsigned int len);
   28.15  static void frontend_changed(struct xenbus_device *dev,
   28.16 -                             XenbusState frontend_state);
   28.17 +                             enum xenbus_state frontend_state);
   28.18  
   28.19  static int tpmback_remove(struct xenbus_device *dev)
   28.20  {
   28.21 @@ -129,7 +129,7 @@ static void backend_changed(struct xenbu
   28.22  
   28.23  
   28.24  static void frontend_changed(struct xenbus_device *dev,
   28.25 -                             XenbusState frontend_state)
   28.26 +                             enum xenbus_state frontend_state)
   28.27  {
   28.28  	struct backend_info *be = dev->data;
   28.29  	int err;
    29.1 --- a/linux-2.6-xen-sparse/drivers/xen/xenbus/xenbus_client.c	Thu May 25 15:59:18 2006 -0600
    29.2 +++ b/linux-2.6-xen-sparse/drivers/xen/xenbus/xenbus_client.c	Fri May 26 13:41:49 2006 -0600
    29.3 @@ -84,7 +84,7 @@ int xenbus_watch_path2(struct xenbus_dev
    29.4  EXPORT_SYMBOL_GPL(xenbus_watch_path2);
    29.5  
    29.6  
    29.7 -int xenbus_switch_state(struct xenbus_device *dev, XenbusState state)
    29.8 +int xenbus_switch_state(struct xenbus_device *dev, enum xenbus_state state)
    29.9  {
   29.10  	/* We check whether the state is currently set to the given value, and
   29.11  	   if not, then the state is set.  We don't want to unconditionally
   29.12 @@ -269,9 +269,9 @@ int xenbus_free_evtchn(struct xenbus_dev
   29.13  }
   29.14  
   29.15  
   29.16 -XenbusState xenbus_read_driver_state(const char *path)
   29.17 +enum xenbus_state xenbus_read_driver_state(const char *path)
   29.18  {
   29.19 -	XenbusState result;
   29.20 +	enum xenbus_state result;
   29.21  	int err = xenbus_gather(XBT_NULL, path, "state", "%d", &result, NULL);
   29.22  	if (err)
   29.23  		result = XenbusStateClosed;
    30.1 --- a/linux-2.6-xen-sparse/drivers/xen/xenbus/xenbus_probe.c	Thu May 25 15:59:18 2006 -0600
    30.2 +++ b/linux-2.6-xen-sparse/drivers/xen/xenbus/xenbus_probe.c	Fri May 26 13:41:49 2006 -0600
    30.3 @@ -284,7 +284,7 @@ static void otherend_changed(struct xenb
    30.4  	struct xenbus_device *dev =
    30.5  		container_of(watch, struct xenbus_device, otherend_watch);
    30.6  	struct xenbus_driver *drv = to_xenbus_driver(dev->dev.driver);
    30.7 -	XenbusState state;
    30.8 +	enum xenbus_state state;
    30.9  
   30.10  	/* Protect us against watches firing on old details when the otherend
   30.11  	   details change, say immediately after a resume. */
   30.12 @@ -539,7 +539,7 @@ static int xenbus_probe_node(struct xen_
   30.13  	size_t stringlen;
   30.14  	char *tmpstring;
   30.15  
   30.16 -	XenbusState state = xenbus_read_driver_state(nodename);
   30.17 +	enum xenbus_state state = xenbus_read_driver_state(nodename);
   30.18  
   30.19  	if (state != XenbusStateInitialising) {
   30.20  		/* Device is not new, so ignore it.  This can happen if a
    31.1 --- a/linux-2.6-xen-sparse/include/asm-x86_64/e820.h	Thu May 25 15:59:18 2006 -0600
    31.2 +++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
    31.3 @@ -1,63 +0,0 @@
    31.4 -/*
    31.5 - * structures and definitions for the int 15, ax=e820 memory map
    31.6 - * scheme.
    31.7 - *
    31.8 - * In a nutshell, setup.S populates a scratch table in the
    31.9 - * empty_zero_block that contains a list of usable address/size
   31.10 - * duples.  setup.c, this information is transferred into the e820map,
   31.11 - * and in init.c/numa.c, that new information is used to mark pages
   31.12 - * reserved or not.
   31.13 - */
   31.14 -#ifndef __E820_HEADER
   31.15 -#define __E820_HEADER
   31.16 -
   31.17 -#include <linux/mmzone.h>
   31.18 -
   31.19 -#define E820MAP	0x2d0		/* our map */
   31.20 -#define E820MAX	128		/* number of entries in E820MAP */
   31.21 -#define E820NR	0x1e8		/* # entries in E820MAP */
   31.22 -
   31.23 -#define E820_RAM	1
   31.24 -#define E820_RESERVED	2
   31.25 -#define E820_ACPI	3 /* usable as RAM once ACPI tables have been read */
   31.26 -#define E820_NVS	4
   31.27 -
   31.28 -#define HIGH_MEMORY	(1024*1024)
   31.29 -
   31.30 -#define LOWMEMSIZE()	(0x9f000)
   31.31 -
   31.32 -#ifndef __ASSEMBLY__
   31.33 -struct e820entry {
   31.34 -	u64 addr;	/* start of memory segment */
   31.35 -	u64 size;	/* size of memory segment */
   31.36 -	u32 type;	/* type of memory segment */
   31.37 -} __attribute__((packed));
   31.38 -
   31.39 -struct e820map {
   31.40 -    int nr_map;
   31.41 -	struct e820entry map[E820MAX];
   31.42 -};
   31.43 -
   31.44 -extern unsigned long find_e820_area(unsigned long start, unsigned long end, 
   31.45 -				    unsigned size);
   31.46 -extern void add_memory_region(unsigned long start, unsigned long size, 
   31.47 -			      int type);
   31.48 -extern void setup_memory_region(void);
   31.49 -extern void contig_e820_setup(void); 
   31.50 -extern unsigned long e820_end_of_ram(void);
   31.51 -extern void e820_reserve_resources(struct e820entry *e820, int nr_map);
   31.52 -extern void e820_print_map(char *who);
   31.53 -extern int e820_mapped(unsigned long start, unsigned long end, unsigned type);
   31.54 -
   31.55 -extern void e820_bootmem_free(pg_data_t *pgdat, unsigned long start,unsigned long end);
   31.56 -extern void e820_setup_gap(struct e820entry *e820, int nr_map);
   31.57 -extern unsigned long e820_hole_size(unsigned long start_pfn,
   31.58 -				    unsigned long end_pfn);
   31.59 -
   31.60 -extern void __init parse_memopt(char *p, char **end);
   31.61 -extern void __init parse_memmapopt(char *p, char **end);
   31.62 -
   31.63 -extern struct e820map e820;
   31.64 -#endif/*!__ASSEMBLY__*/
   31.65 -
   31.66 -#endif/*__E820_HEADER*/
    32.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
    32.2 +++ b/linux-2.6-xen-sparse/include/asm-x86_64/mach-xen/asm/e820.h	Fri May 26 13:41:49 2006 -0600
    32.3 @@ -0,0 +1,63 @@
    32.4 +/*
    32.5 + * structures and definitions for the int 15, ax=e820 memory map
    32.6 + * scheme.
    32.7 + *
    32.8 + * In a nutshell, setup.S populates a scratch table in the
    32.9 + * empty_zero_block that contains a list of usable address/size
   32.10 + * duples.  setup.c, this information is transferred into the e820map,
   32.11 + * and in init.c/numa.c, that new information is used to mark pages
   32.12 + * reserved or not.
   32.13 + */
   32.14 +#ifndef __E820_HEADER
   32.15 +#define __E820_HEADER
   32.16 +
   32.17 +#include <linux/mmzone.h>
   32.18 +
   32.19 +#define E820MAP	0x2d0		/* our map */
   32.20 +#define E820MAX	128		/* number of entries in E820MAP */
   32.21 +#define E820NR	0x1e8		/* # entries in E820MAP */
   32.22 +
   32.23 +#define E820_RAM	1
   32.24 +#define E820_RESERVED	2
   32.25 +#define E820_ACPI	3 /* usable as RAM once ACPI tables have been read */
   32.26 +#define E820_NVS	4
   32.27 +
   32.28 +#define HIGH_MEMORY	(1024*1024)
   32.29 +
   32.30 +#define LOWMEMSIZE()	(0x9f000)
   32.31 +
   32.32 +#ifndef __ASSEMBLY__
   32.33 +struct e820entry {
   32.34 +	u64 addr;	/* start of memory segment */
   32.35 +	u64 size;	/* size of memory segment */
   32.36 +	u32 type;	/* type of memory segment */
   32.37 +} __attribute__((packed));
   32.38 +
   32.39 +struct e820map {
   32.40 +    int nr_map;
   32.41 +	struct e820entry map[E820MAX];
   32.42 +};
   32.43 +
   32.44 +extern unsigned long find_e820_area(unsigned long start, unsigned long end, 
   32.45 +				    unsigned size);
   32.46 +extern void add_memory_region(unsigned long start, unsigned long size, 
   32.47 +			      int type);
   32.48 +extern void setup_memory_region(void);
   32.49 +extern void contig_e820_setup(void); 
   32.50 +extern unsigned long e820_end_of_ram(void);
   32.51 +extern void e820_reserve_resources(struct e820entry *e820, int nr_map);
   32.52 +extern void e820_print_map(char *who);
   32.53 +extern int e820_mapped(unsigned long start, unsigned long end, unsigned type);
   32.54 +
   32.55 +extern void e820_bootmem_free(pg_data_t *pgdat, unsigned long start,unsigned long end);
   32.56 +extern void e820_setup_gap(struct e820entry *e820, int nr_map);
   32.57 +extern unsigned long e820_hole_size(unsigned long start_pfn,
   32.58 +				    unsigned long end_pfn);
   32.59 +
   32.60 +extern void __init parse_memopt(char *p, char **end);
   32.61 +extern void __init parse_memmapopt(char *p, char **end);
   32.62 +
   32.63 +extern struct e820map e820;
   32.64 +#endif/*!__ASSEMBLY__*/
   32.65 +
   32.66 +#endif/*__E820_HEADER*/
    33.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
    33.2 +++ b/linux-2.6-xen-sparse/include/xen/cpu_hotplug.h	Fri May 26 13:41:49 2006 -0600
    33.3 @@ -0,0 +1,42 @@
    33.4 +#ifndef __XEN_CPU_HOTPLUG_H__
    33.5 +#define __XEN_CPU_HOTPLUG_H__
    33.6 +
    33.7 +#include <linux/config.h>
    33.8 +#include <linux/kernel.h>
    33.9 +#include <linux/cpumask.h>
   33.10 +
   33.11 +#if defined(CONFIG_HOTPLUG_CPU)
   33.12 +
   33.13 +#if defined(CONFIG_X86)
   33.14 +void cpu_initialize_context(unsigned int cpu);
   33.15 +#else
   33.16 +#define cpu_initialize_context(cpu)	((void)0)
   33.17 +#endif
   33.18 +
   33.19 +int cpu_up_is_allowed(unsigned int cpu);
   33.20 +void init_xenbus_allowed_cpumask(void);
   33.21 +int smp_suspend(void);
   33.22 +void smp_resume(void);
   33.23 +
   33.24 +#else /* !defined(CONFIG_HOTPLUG_CPU) */
   33.25 +
   33.26 +#define cpu_up_is_allowed(cpu)		(1)
   33.27 +#define init_xenbus_allowed_cpumask()	((void)0)
   33.28 +
   33.29 +static inline int smp_suspend(void)
   33.30 +{
   33.31 +	if (num_online_cpus() > 1) {
   33.32 +		printk(KERN_WARNING "Can't suspend SMP guests "
   33.33 +		       "without CONFIG_HOTPLUG_CPU\n");
   33.34 +		return -EOPNOTSUPP;
   33.35 +	}
   33.36 +	return 0;
   33.37 +}
   33.38 +
   33.39 +static inline void smp_resume(void)
   33.40 +{
   33.41 +}
   33.42 +
   33.43 +#endif /* !defined(CONFIG_HOTPLUG_CPU) */
   33.44 +
   33.45 +#endif /* __XEN_CPU_HOTPLUG_H__ */
    34.1 --- a/linux-2.6-xen-sparse/include/xen/net_driver_util.h	Thu May 25 15:59:18 2006 -0600
    34.2 +++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
    34.3 @@ -1,48 +0,0 @@
    34.4 -/*****************************************************************************
    34.5 - *
    34.6 - * Utility functions for Xen network devices.
    34.7 - *
    34.8 - * Copyright (c) 2005 XenSource Ltd.
    34.9 - * 
   34.10 - * This program is free software; you can redistribute it and/or
   34.11 - * modify it under the terms of the GNU General Public License version 2
   34.12 - * as published by the Free Software Foundation; or, when distributed
   34.13 - * separately from the Linux kernel or incorporated into other
   34.14 - * software packages, subject to the following license:
   34.15 - * 
   34.16 - * Permission is hereby granted, free of charge, to any person obtaining a
   34.17 - * copy of this source file (the "Software"), to deal in the Software without
   34.18 - * restriction, including without limitation the rights to use, copy, modify,
   34.19 - * merge, publish, distribute, sublicense, and/or sell copies of the Software,
   34.20 - * and to permit persons to whom the Software is furnished to do so, subject
   34.21 - * to the following conditions:
   34.22 - * 
   34.23 - * The above copyright notice and this permission notice shall be included in
   34.24 - * all copies or substantial portions of the Software.
   34.25 - * 
   34.26 - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
   34.27 - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
   34.28 - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
   34.29 - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
   34.30 - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
   34.31 - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
   34.32 - * DEALINGS IN THE SOFTWARE.
   34.33 - */
   34.34 -
   34.35 -#ifndef _ASM_XEN_NET_DRIVER_UTIL_H
   34.36 -#define _ASM_XEN_NET_DRIVER_UTIL_H
   34.37 -
   34.38 -
   34.39 -#include <xen/xenbus.h>
   34.40 -
   34.41 -
   34.42 -/**
   34.43 - * Read the 'mac' node at the given device's node in the store, and parse that
   34.44 - * as colon-separated octets, placing result the given mac array.  mac must be
   34.45 - * a preallocated array of length ETH_ALEN (as declared in linux/if_ether.h).
   34.46 - * Return 0 on success, or -errno on error.
   34.47 - */
   34.48 -int xen_net_read_mac(struct xenbus_device *dev, u8 mac[]);
   34.49 -
   34.50 -
   34.51 -#endif /* _ASM_XEN_NET_DRIVER_UTIL_H */
    35.1 --- a/linux-2.6-xen-sparse/include/xen/xenbus.h	Thu May 25 15:59:18 2006 -0600
    35.2 +++ b/linux-2.6-xen-sparse/include/xen/xenbus.h	Fri May 26 13:41:49 2006 -0600
    35.3 @@ -75,7 +75,7 @@ struct xenbus_device {
    35.4  	int otherend_id;
    35.5  	struct xenbus_watch otherend_watch;
    35.6  	struct device dev;
    35.7 -	XenbusState state;
    35.8 +	enum xenbus_state state;
    35.9  	void *data;
   35.10  };
   35.11  
   35.12 @@ -98,7 +98,7 @@ struct xenbus_driver {
   35.13  	int (*probe)(struct xenbus_device *dev,
   35.14  		     const struct xenbus_device_id *id);
   35.15  	void (*otherend_changed)(struct xenbus_device *dev,
   35.16 -				 XenbusState backend_state);
   35.17 +				 enum xenbus_state backend_state);
   35.18  	int (*remove)(struct xenbus_device *dev);
   35.19  	int (*suspend)(struct xenbus_device *dev);
   35.20  	int (*resume)(struct xenbus_device *dev);
   35.21 @@ -207,7 +207,7 @@ int xenbus_watch_path2(struct xenbus_dev
   35.22   * Return 0 on success, or -errno on error.  On error, the device will switch
   35.23   * to XenbusStateClosing, and the error will be saved in the store.
   35.24   */
   35.25 -int xenbus_switch_state(struct xenbus_device *dev, XenbusState new_state);
   35.26 +int xenbus_switch_state(struct xenbus_device *dev, enum xenbus_state new_state);
   35.27  
   35.28  
   35.29  /**
   35.30 @@ -273,7 +273,7 @@ int xenbus_free_evtchn(struct xenbus_dev
   35.31   * Return the state of the driver rooted at the given store path, or
   35.32   * XenbusStateClosed if no state can be read.
   35.33   */
   35.34 -XenbusState xenbus_read_driver_state(const char *path);
   35.35 +enum xenbus_state xenbus_read_driver_state(const char *path);
   35.36  
   35.37  
   35.38  /***
    36.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
    36.2 +++ b/patches/linux-2.6.16.13/fix-ide-cd-pio-mode.patch	Fri May 26 13:41:49 2006 -0600
    36.3 @@ -0,0 +1,18 @@
    36.4 +diff -ru ../pristine-linux-2.6.16.13/drivers/ide/ide-lib.c ./drivers/ide/ide-lib.c
    36.5 +--- ../pristine-linux-2.6.16.13/drivers/ide/ide-lib.c	2006-05-02 22:38:44.000000000 +0100
    36.6 ++++ ./drivers/ide/ide-lib.c	2006-05-24 18:37:05.000000000 +0100
    36.7 +@@ -410,10 +410,10 @@
    36.8 + {
    36.9 + 	u64 addr = BLK_BOUNCE_HIGH;	/* dma64_addr_t */
   36.10 + 
   36.11 +-	if (!PCI_DMA_BUS_IS_PHYS) {
   36.12 +-		addr = BLK_BOUNCE_ANY;
   36.13 +-	} else if (on && drive->media == ide_disk) {
   36.14 +-		if (HWIF(drive)->pci_dev)
   36.15 ++	if (on && drive->media == ide_disk) {
   36.16 ++		if (!PCI_DMA_BUS_IS_PHYS)
   36.17 ++			addr = BLK_BOUNCE_ANY;
   36.18 ++		else if (HWIF(drive)->pci_dev)
   36.19 + 			addr = HWIF(drive)->pci_dev->dma_mask;
   36.20 + 	}
   36.21 + 
    37.1 --- a/tools/libxc/Makefile	Thu May 25 15:59:18 2006 -0600
    37.2 +++ b/tools/libxc/Makefile	Fri May 26 13:41:49 2006 -0600
    37.3 @@ -20,6 +20,7 @@ SRCS       += xc_acm.c
    37.4  SRCS       += xc_physdev.c
    37.5  SRCS       += xc_private.c
    37.6  SRCS       += xc_sedf.c
    37.7 +SRCS       += xc_csched.c
    37.8  SRCS       += xc_tbuf.c
    37.9  
   37.10  ifeq ($(patsubst x86%,x86,$(XEN_TARGET_ARCH)),x86)
    38.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
    38.2 +++ b/tools/libxc/xc_csched.c	Fri May 26 13:41:49 2006 -0600
    38.3 @@ -0,0 +1,50 @@
    38.4 +/****************************************************************************
    38.5 + * (C) 2006 - Emmanuel Ackaouy - XenSource Inc.
    38.6 + ****************************************************************************
    38.7 + *
    38.8 + *        File: xc_csched.c
    38.9 + *      Author: Emmanuel Ackaouy
   38.10 + *
   38.11 + * Description: XC Interface to the credit scheduler
   38.12 + *
   38.13 + */
   38.14 +#include "xc_private.h"
   38.15 +
   38.16 +
   38.17 +int
   38.18 +xc_csched_domain_set(
   38.19 +    int xc_handle,
   38.20 +    uint32_t domid,
   38.21 +    struct csched_domain *sdom)
   38.22 +{
   38.23 +    DECLARE_DOM0_OP;
   38.24 +
   38.25 +    op.cmd = DOM0_ADJUSTDOM;    
   38.26 +    op.u.adjustdom.domain = (domid_t) domid;
   38.27 +    op.u.adjustdom.sched_id = SCHED_CREDIT;
   38.28 +    op.u.adjustdom.direction = SCHED_INFO_PUT;
   38.29 +    op.u.adjustdom.u.credit = *sdom;
   38.30 +
   38.31 +    return do_dom0_op(xc_handle, &op);
   38.32 +}
   38.33 +
   38.34 +int
   38.35 +xc_csched_domain_get(
   38.36 +    int xc_handle,
   38.37 +    uint32_t domid,
   38.38 +    struct csched_domain *sdom)
   38.39 +{
   38.40 +    DECLARE_DOM0_OP;
   38.41 +    int err;
   38.42 +
   38.43 +    op.cmd = DOM0_ADJUSTDOM;    
   38.44 +    op.u.adjustdom.domain = (domid_t) domid;
   38.45 +    op.u.adjustdom.sched_id = SCHED_CREDIT;
   38.46 +    op.u.adjustdom.direction = SCHED_INFO_GET;
   38.47 +
   38.48 +    err = do_dom0_op(xc_handle, &op);
   38.49 +    if ( err == 0 )
   38.50 +        *sdom = op.u.adjustdom.u.credit;
   38.51 +
   38.52 +    return err;
   38.53 +}
    39.1 --- a/tools/libxc/xc_linux_build.c	Thu May 25 15:59:18 2006 -0600
    39.2 +++ b/tools/libxc/xc_linux_build.c	Fri May 26 13:41:49 2006 -0600
    39.3 @@ -268,22 +268,11 @@ static int setup_pg_tables_pae(int xc_ha
    39.4      l2_pgentry_64_t *vl2tab = NULL, *vl2e = NULL;
    39.5      l3_pgentry_64_t *vl3tab = NULL, *vl3e = NULL;
    39.6      uint64_t l1tab, l2tab, l3tab, pl1tab, pl2tab, pl3tab;
    39.7 -    unsigned long ppt_alloc, count, nmfn;
    39.8 +    unsigned long ppt_alloc, count;
    39.9  
   39.10      /* First allocate page for page dir. */
   39.11      ppt_alloc = (vpt_start - dsi_v_start) >> PAGE_SHIFT;
   39.12  
   39.13 -    if ( page_array[ppt_alloc] > 0xfffff )
   39.14 -    {
   39.15 -        nmfn = xc_make_page_below_4G(xc_handle, dom, page_array[ppt_alloc]);
   39.16 -        if ( nmfn == 0 )
   39.17 -        {
   39.18 -            fprintf(stderr, "Couldn't get a page below 4GB :-(\n");
   39.19 -            goto error_out;
   39.20 -        }
   39.21 -        page_array[ppt_alloc] = nmfn;
   39.22 -    }
   39.23 -
   39.24      alloc_pt(l3tab, vl3tab, pl3tab);
   39.25      vl3e = &vl3tab[l3_table_offset_pae(dsi_v_start)];
   39.26      if (shadow_mode_enabled)
    40.1 --- a/tools/libxc/xc_linux_restore.c	Thu May 25 15:59:18 2006 -0600
    40.2 +++ b/tools/libxc/xc_linux_restore.c	Fri May 26 13:41:49 2006 -0600
    40.3 @@ -331,25 +331,17 @@ int xc_linux_restore(int xc_handle, int 
    40.4                  ** A page table page - need to 'uncanonicalize' it, i.e.
    40.5                  ** replace all the references to pfns with the corresponding
    40.6                  ** mfns for the new domain.
    40.7 -                **
    40.8 -                ** On PAE we need to ensure that PGDs are in MFNs < 4G, and
    40.9 -                ** so we may need to update the p2m after the main loop.
   40.10 -                ** Hence we defer canonicalization of L1s until then.
   40.11                  */
   40.12 -                if(pt_levels != 3 || pagetype != L1TAB) {
   40.13 -
   40.14 -                    if(!uncanonicalize_pagetable(pagetype, page)) {
   40.15 -                        /*
   40.16 -                        ** Failing to uncanonicalize a page table can be ok
   40.17 -                        ** under live migration since the pages type may have
   40.18 -                        ** changed by now (and we'll get an update later).
   40.19 -                        */
   40.20 -                        DPRINTF("PT L%ld race on pfn=%08lx mfn=%08lx\n",
   40.21 -                                pagetype >> 28, pfn, mfn);
   40.22 -                        nraces++;
   40.23 -                        continue;
   40.24 -                    }
   40.25 -
   40.26 +                if(!uncanonicalize_pagetable(pagetype, page)) {
   40.27 +                    /*
   40.28 +                    ** Failing to uncanonicalize a page table can be ok
   40.29 +                    ** under live migration since the pages type may have
   40.30 +                    ** changed by now (and we'll get an update later).
   40.31 +                    */
   40.32 +                    DPRINTF("PT L%ld race on pfn=%08lx mfn=%08lx\n",
   40.33 +                            pagetype >> 28, pfn, mfn);
   40.34 +                    nraces++;
   40.35 +                    continue;
   40.36                  }
   40.37  
   40.38              } else if(pagetype != NOTAB) {
   40.39 @@ -398,100 +390,6 @@ int xc_linux_restore(int xc_handle, int 
   40.40  
   40.41      DPRINTF("Received all pages (%d races)\n", nraces);
   40.42  
   40.43 -    if(pt_levels == 3) {
   40.44 -
   40.45 -        /*
   40.46 -        ** XXX SMH on PAE we need to ensure PGDs are in MFNs < 4G. This
   40.47 -        ** is a little awkward and involves (a) finding all such PGDs and
   40.48 -        ** replacing them with 'lowmem' versions; (b) upating the p2m[]
   40.49 -        ** with the new info; and (c) canonicalizing all the L1s using the
   40.50 -        ** (potentially updated) p2m[].
   40.51 -        **
   40.52 -        ** This is relatively slow (and currently involves two passes through
   40.53 -        ** the pfn_type[] array), but at least seems to be correct. May wish
   40.54 -        ** to consider more complex approaches to optimize this later.
   40.55 -        */
   40.56 -
   40.57 -        int j, k;
   40.58 -
   40.59 -        /* First pass: find all L3TABs current in > 4G mfns and get new mfns */
   40.60 -        for (i = 0; i < max_pfn; i++) {
   40.61 -
   40.62 -            if (((pfn_type[i] & LTABTYPE_MASK)==L3TAB) && (p2m[i]>0xfffffUL)) {
   40.63 -
   40.64 -                unsigned long new_mfn;
   40.65 -                uint64_t l3ptes[4];
   40.66 -                uint64_t *l3tab;
   40.67 -
   40.68 -                l3tab = (uint64_t *)
   40.69 -                    xc_map_foreign_range(xc_handle, dom, PAGE_SIZE,
   40.70 -                                         PROT_READ, p2m[i]);
   40.71 -
   40.72 -                for(j = 0; j < 4; j++)
   40.73 -                    l3ptes[j] = l3tab[j];
   40.74 -
   40.75 -                munmap(l3tab, PAGE_SIZE);
   40.76 -
   40.77 -                if (!(new_mfn=xc_make_page_below_4G(xc_handle, dom, p2m[i]))) {
   40.78 -                    ERR("Couldn't get a page below 4GB :-(");
   40.79 -                    goto out;
   40.80 -                }
   40.81 -
   40.82 -                p2m[i] = new_mfn;
   40.83 -                if (xc_add_mmu_update(xc_handle, mmu,
   40.84 -                                      (((unsigned long long)new_mfn)
   40.85 -                                       << PAGE_SHIFT) |
   40.86 -                                      MMU_MACHPHYS_UPDATE, i)) {
   40.87 -                    ERR("Couldn't m2p on PAE root pgdir");
   40.88 -                    goto out;
   40.89 -                }
   40.90 -
   40.91 -                l3tab = (uint64_t *)
   40.92 -                    xc_map_foreign_range(xc_handle, dom, PAGE_SIZE,
   40.93 -                                         PROT_READ | PROT_WRITE, p2m[i]);
   40.94 -
   40.95 -                for(j = 0; j < 4; j++)
   40.96 -                    l3tab[j] = l3ptes[j];
   40.97 -
   40.98 -                munmap(l3tab, PAGE_SIZE);
   40.99 -
  40.100 -            }
  40.101 -        }
  40.102 -
  40.103 -        /* Second pass: find all L1TABs and uncanonicalize them */
  40.104 -        j = 0;
  40.105 -
  40.106 -        for(i = 0; i < max_pfn; i++) {
  40.107 -
  40.108 -            if (((pfn_type[i] & LTABTYPE_MASK)==L1TAB)) {
  40.109 -                region_mfn[j] = p2m[i];
  40.110 -                j++;
  40.111 -            }
  40.112 -
  40.113 -            if(i == (max_pfn-1) || j == MAX_BATCH_SIZE) {
  40.114 -
  40.115 -                if (!(region_base = xc_map_foreign_batch(
  40.116 -                          xc_handle, dom, PROT_READ | PROT_WRITE,
  40.117 -                          region_mfn, j))) {
  40.118 -                    ERR("map batch failed");
  40.119 -                    goto out;
  40.120 -                }
  40.121 -
  40.122 -                for(k = 0; k < j; k++) {
  40.123 -                    if(!uncanonicalize_pagetable(L1TAB,
  40.124 -                                                 region_base + k*PAGE_SIZE)) {
  40.125 -                        ERR("failed uncanonicalize pt!");
  40.126 -                        goto out;
  40.127 -                    }
  40.128 -                }
  40.129 -
  40.130 -                munmap(region_base, j*PAGE_SIZE);
  40.131 -                j = 0;
  40.132 -            }
  40.133 -        }
  40.134 -
  40.135 -    }
  40.136 -
  40.137  
  40.138      if (xc_finish_mmu_updates(xc_handle, mmu)) {
  40.139          ERR("Error doing finish_mmu_updates()");
    41.1 --- a/tools/libxc/xc_private.c	Thu May 25 15:59:18 2006 -0600
    41.2 +++ b/tools/libxc/xc_private.c	Fri May 26 13:41:49 2006 -0600
    41.3 @@ -430,28 +430,6 @@ int xc_version(int xc_handle, int cmd, v
    41.4      return rc;
    41.5  }
    41.6  
    41.7 -unsigned long xc_make_page_below_4G(
    41.8 -    int xc_handle, uint32_t domid, unsigned long mfn)
    41.9 -{
   41.10 -    unsigned long new_mfn;
   41.11 -
   41.12 -    if ( xc_domain_memory_decrease_reservation(
   41.13 -        xc_handle, domid, 1, 0, &mfn) != 0 )
   41.14 -    {
   41.15 -        fprintf(stderr,"xc_make_page_below_4G decrease failed. mfn=%lx\n",mfn);
   41.16 -        return 0;
   41.17 -    }
   41.18 -
   41.19 -    if ( xc_domain_memory_increase_reservation(
   41.20 -        xc_handle, domid, 1, 0, 32, &new_mfn) != 0 )
   41.21 -    {
   41.22 -        fprintf(stderr,"xc_make_page_below_4G increase failed. mfn=%lx\n",mfn);
   41.23 -        return 0;
   41.24 -    }
   41.25 -
   41.26 -    return new_mfn;
   41.27 -}
   41.28 -
   41.29  /*
   41.30   * Local variables:
   41.31   * mode: C
    42.1 --- a/tools/libxc/xc_ptrace.c	Thu May 25 15:59:18 2006 -0600
    42.2 +++ b/tools/libxc/xc_ptrace.c	Fri May 26 13:41:49 2006 -0600
    42.3 @@ -185,7 +185,7 @@ map_domain_va_32(
    42.4      void *guest_va,
    42.5      int perm)
    42.6  {
    42.7 -    unsigned long l1p, p, va = (unsigned long)guest_va;
    42.8 +    unsigned long l2e, l1e, l1p, p, va = (unsigned long)guest_va;
    42.9      uint32_t *l2, *l1;
   42.10      static void *v[MAX_VIRT_CPUS];
   42.11  
   42.12 @@ -194,18 +194,20 @@ map_domain_va_32(
   42.13      if ( l2 == NULL )
   42.14          return NULL;
   42.15  
   42.16 -    l1p = to_ma(cpu, l2[l2_table_offset(va)]);
   42.17 +    l2e = l2[l2_table_offset_i386(va)];
   42.18      munmap(l2, PAGE_SIZE);
   42.19 -    if ( !(l1p & _PAGE_PRESENT) )
   42.20 +    if ( !(l2e & _PAGE_PRESENT) )
   42.21          return NULL;
   42.22 +    l1p = to_ma(cpu, l2e);
   42.23      l1 = xc_map_foreign_range(xc_handle, current_domid, PAGE_SIZE, PROT_READ, l1p >> PAGE_SHIFT);
   42.24      if ( l1 == NULL )
   42.25          return NULL;
   42.26  
   42.27 -    p = to_ma(cpu, l1[l1_table_offset(va)]);
   42.28 +    l1e = l1[l1_table_offset_i386(va)];
   42.29      munmap(l1, PAGE_SIZE);
   42.30 -    if ( !(p & _PAGE_PRESENT) )
   42.31 +    if ( !(l1e & _PAGE_PRESENT) )
   42.32          return NULL;
   42.33 +    p = to_ma(cpu, l1e);
   42.34      if ( v[cpu] != NULL )
   42.35          munmap(v[cpu], PAGE_SIZE);
   42.36      v[cpu] = xc_map_foreign_range(xc_handle, current_domid, PAGE_SIZE, perm, p >> PAGE_SHIFT);
   42.37 @@ -223,7 +225,7 @@ map_domain_va_pae(
   42.38      void *guest_va,
   42.39      int perm)
   42.40  {
   42.41 -    unsigned long l2p, l1p, p, va = (unsigned long)guest_va;
   42.42 +    unsigned long l3e, l2e, l1e, l2p, l1p, p, va = (unsigned long)guest_va;
   42.43      uint64_t *l3, *l2, *l1;
   42.44      static void *v[MAX_VIRT_CPUS];
   42.45  
   42.46 @@ -232,26 +234,29 @@ map_domain_va_pae(
   42.47      if ( l3 == NULL )
   42.48          return NULL;
   42.49  
   42.50 -    l2p = to_ma(cpu, l3[l3_table_offset_pae(va)]);
   42.51 +    l3e = l3[l3_table_offset_pae(va)];
   42.52      munmap(l3, PAGE_SIZE);
   42.53 -    if ( !(l2p & _PAGE_PRESENT) )
   42.54 +    if ( !(l3e & _PAGE_PRESENT) )
   42.55          return NULL;
   42.56 +    l2p = to_ma(cpu, l3e);
   42.57      l2 = xc_map_foreign_range(xc_handle, current_domid, PAGE_SIZE, PROT_READ, l2p >> PAGE_SHIFT);
   42.58      if ( l2 == NULL )
   42.59          return NULL;
   42.60  
   42.61 -    l1p = to_ma(cpu, l2[l2_table_offset_pae(va)]);
   42.62 +    l2e = l2[l2_table_offset_pae(va)];
   42.63      munmap(l2, PAGE_SIZE);
   42.64 -    if ( !(l1p & _PAGE_PRESENT) )
   42.65 +    if ( !(l2e & _PAGE_PRESENT) )
   42.66          return NULL;
   42.67 +    l1p = to_ma(cpu, l2e);
   42.68      l1 = xc_map_foreign_range(xc_handle, current_domid, PAGE_SIZE, perm, l1p >> PAGE_SHIFT);
   42.69      if ( l1 == NULL )
   42.70          return NULL;
   42.71  
   42.72 -    p = to_ma(cpu, l1[l1_table_offset_pae(va)]);
   42.73 +    l1e = l1[l1_table_offset_pae(va)];
   42.74      munmap(l1, PAGE_SIZE);
   42.75 -    if ( !(p & _PAGE_PRESENT) )
   42.76 +    if ( !(l1e & _PAGE_PRESENT) )
   42.77          return NULL;
   42.78 +    p = to_ma(cpu, l1e);
   42.79      if ( v[cpu] != NULL )
   42.80          munmap(v[cpu], PAGE_SIZE);
   42.81      v[cpu] = xc_map_foreign_range(xc_handle, current_domid, PAGE_SIZE, perm, p >> PAGE_SHIFT);
   42.82 @@ -269,10 +274,11 @@ map_domain_va_64(
   42.83      void *guest_va,
   42.84      int perm)
   42.85  {
   42.86 -    unsigned long l3p, l2p, l1p, l1e, p, va = (unsigned long)guest_va;
   42.87 +    unsigned long l4e, l3e, l2e, l1e, l3p, l2p, l1p, p, va = (unsigned long)guest_va;
   42.88      uint64_t *l4, *l3, *l2, *l1;
   42.89      static void *v[MAX_VIRT_CPUS];
   42.90  
   42.91 +
   42.92      if ((ctxt[cpu].ctrlreg[4] & 0x20) == 0 ) /* legacy ia32 mode */
   42.93          return map_domain_va_32(xc_handle, cpu, guest_va, perm);
   42.94  
   42.95 @@ -281,40 +287,41 @@ map_domain_va_64(
   42.96      if ( l4 == NULL )
   42.97          return NULL;
   42.98  
   42.99 -    l3p = to_ma(cpu, l4[l4_table_offset(va)]);
  42.100 +    l4e = l4[l4_table_offset(va)];
  42.101      munmap(l4, PAGE_SIZE);
  42.102 -    if ( !(l3p & _PAGE_PRESENT) )
  42.103 +    if ( !(l4e & _PAGE_PRESENT) )
  42.104          return NULL;
  42.105 +    l3p = to_ma(cpu, l4e);
  42.106      l3 = xc_map_foreign_range(xc_handle, current_domid, PAGE_SIZE, PROT_READ, l3p >> PAGE_SHIFT);
  42.107      if ( l3 == NULL )
  42.108          return NULL;
  42.109  
  42.110 -    l2p = to_ma(cpu, l3[l3_table_offset(va)]);
  42.111 +    l3e = l3[l3_table_offset(va)];
  42.112      munmap(l3, PAGE_SIZE);
  42.113 -    if ( !(l2p & _PAGE_PRESENT) )
  42.114 +    if ( !(l3e & _PAGE_PRESENT) )
  42.115          return NULL;
  42.116 +    l2p = to_ma(cpu, l3e);
  42.117      l2 = xc_map_foreign_range(xc_handle, current_domid, PAGE_SIZE, PROT_READ, l2p >> PAGE_SHIFT);
  42.118      if ( l2 == NULL )
  42.119          return NULL;
  42.120  
  42.121      l1 = NULL;
  42.122 -    l1e = to_ma(cpu, l2[l2_table_offset(va)]);
  42.123 -    if ( !(l1e & _PAGE_PRESENT) )
  42.124 -    {
  42.125 -        munmap(l2, PAGE_SIZE);
  42.126 +    l2e = l2[l2_table_offset(va)];
  42.127 +    munmap(l2, PAGE_SIZE);
  42.128 +    if ( !(l2e & _PAGE_PRESENT) )
  42.129          return NULL;
  42.130 -    }
  42.131 -    l1p = l1e >> PAGE_SHIFT;
  42.132 -    if (l1e & 0x80)  { /* 2M pages */
  42.133 +    l1p = to_ma(cpu, l2e);
  42.134 +    if (l2e & 0x80)  { /* 2M pages */
  42.135          p = to_ma(cpu, (l1p + l1_table_offset(va)) << PAGE_SHIFT);
  42.136      } else { /* 4K pages */
  42.137 -        l1p = to_ma(cpu, l1p);
  42.138          l1 = xc_map_foreign_range(xc_handle, current_domid, PAGE_SIZE, perm, l1p >> PAGE_SHIFT);
  42.139 -        munmap(l2, PAGE_SIZE);
  42.140          if ( l1 == NULL )
  42.141              return NULL;
  42.142  
  42.143 -        p = to_ma(cpu, l1[l1_table_offset(va)]);
  42.144 +        l1e = l1[l1_table_offset(va)];
  42.145 +        if ( !(l1e & _PAGE_PRESENT) )
  42.146 +            return NULL;
  42.147 +        p = to_ma(cpu, l1e);
  42.148      }
  42.149      if ( v[cpu] != NULL )
  42.150          munmap(v[cpu], PAGE_SIZE);
    43.1 --- a/tools/libxc/xc_ptrace.h	Thu May 25 15:59:18 2006 -0600
    43.2 +++ b/tools/libxc/xc_ptrace.h	Fri May 26 13:41:49 2006 -0600
    43.3 @@ -7,7 +7,6 @@
    43.4  #define X86_CR0_PE              0x00000001 /* Enable Protected Mode    (RW) */
    43.5  #define X86_CR0_PG              0x80000000 /* Paging                   (RW) */
    43.6  #define BSD_PAGE_MASK (PAGE_SIZE-1)
    43.7 -#define PDRSHIFT        22
    43.8  #define PSL_T  0x00000100 /* trace enable bit */
    43.9  
   43.10  #ifdef __x86_64__
   43.11 @@ -162,8 +161,6 @@ struct gdb_regs {
   43.12  #endif
   43.13  
   43.14  #define printval(x) printf("%s = %lx\n", #x, (long)x);
   43.15 -#define vtopdi(va) ((va) >> PDRSHIFT)
   43.16 -#define vtopti(va) (((va) >> PAGE_SHIFT) & 0x3ff)
   43.17  #endif
   43.18  
   43.19  typedef void (*thr_ev_handler_t)(long);
    44.1 --- a/tools/libxc/xc_ptrace_core.c	Thu May 25 15:59:18 2006 -0600
    44.2 +++ b/tools/libxc/xc_ptrace_core.c	Fri May 26 13:41:49 2006 -0600
    44.3 @@ -3,6 +3,7 @@
    44.4  #include <sys/ptrace.h>
    44.5  #include <sys/wait.h>
    44.6  #include "xc_private.h"
    44.7 +#include "xg_private.h"
    44.8  #include "xc_ptrace.h"
    44.9  #include <time.h>
   44.10  
   44.11 @@ -54,7 +55,7 @@ map_domain_va_core(unsigned long domfd, 
   44.12          }
   44.13          cr3_virt[cpu] = v;
   44.14      }
   44.15 -    if ((pde = cr3_virt[cpu][vtopdi(va)]) == 0) /* logical address */
   44.16 +    if ((pde = cr3_virt[cpu][l2_table_offset_i386(va)]) == 0) /* logical address */
   44.17          return NULL;
   44.18      if (ctxt[cpu].flags & VGCF_HVM_GUEST)
   44.19          pde = p2m_array[pde >> PAGE_SHIFT] << PAGE_SHIFT;
   44.20 @@ -70,7 +71,7 @@ map_domain_va_core(unsigned long domfd, 
   44.21              return NULL;
   44.22          pde_virt[cpu] = v;
   44.23      }
   44.24 -    if ((page = pde_virt[cpu][vtopti(va)]) == 0) /* logical address */
   44.25 +    if ((page = pde_virt[cpu][l1_table_offset_i386(va)]) == 0) /* logical address */
   44.26          return NULL;
   44.27      if (ctxt[cpu].flags & VGCF_HVM_GUEST)
   44.28          page = p2m_array[page >> PAGE_SHIFT] << PAGE_SHIFT;
   44.29 @@ -84,7 +85,7 @@ map_domain_va_core(unsigned long domfd, 
   44.30              map_mtop_offset(page_phys[cpu]));
   44.31          if (v == MAP_FAILED)
   44.32          {
   44.33 -            printf("cr3 %lx pde %lx page %lx pti %lx\n", cr3[cpu], pde, page, vtopti(va));
   44.34 +            printf("cr3 %lx pde %lx page %lx pti %lx\n", cr3[cpu], pde, page, l1_table_offset_i386(va));
   44.35              page_phys[cpu] = 0;
   44.36              return NULL;
   44.37          }
    45.1 --- a/tools/libxc/xc_tbuf.c	Thu May 25 15:59:18 2006 -0600
    45.2 +++ b/tools/libxc/xc_tbuf.c	Fri May 26 13:41:49 2006 -0600
    45.3 @@ -18,53 +18,57 @@
    45.4  
    45.5  static int tbuf_enable(int xc_handle, int enable)
    45.6  {
    45.7 -  DECLARE_DOM0_OP;
    45.8 +    DECLARE_DOM0_OP;
    45.9  
   45.10 -  op.cmd = DOM0_TBUFCONTROL;
   45.11 -  op.interface_version = DOM0_INTERFACE_VERSION;
   45.12 -  if (enable)
   45.13 -    op.u.tbufcontrol.op  = DOM0_TBUF_ENABLE;
   45.14 -  else
   45.15 -    op.u.tbufcontrol.op  = DOM0_TBUF_DISABLE;
   45.16 +    op.cmd = DOM0_TBUFCONTROL;
   45.17 +    op.interface_version = DOM0_INTERFACE_VERSION;
   45.18 +    if (enable)
   45.19 +        op.u.tbufcontrol.op  = DOM0_TBUF_ENABLE;
   45.20 +    else
   45.21 +        op.u.tbufcontrol.op  = DOM0_TBUF_DISABLE;
   45.22  
   45.23 -  return xc_dom0_op(xc_handle, &op);
   45.24 +    return xc_dom0_op(xc_handle, &op);
   45.25  }
   45.26  
   45.27  int xc_tbuf_set_size(int xc_handle, unsigned long size)
   45.28  {
   45.29 -  DECLARE_DOM0_OP;
   45.30 +    DECLARE_DOM0_OP;
   45.31  
   45.32 -  op.cmd = DOM0_TBUFCONTROL;
   45.33 -  op.interface_version = DOM0_INTERFACE_VERSION;
   45.34 -  op.u.tbufcontrol.op  = DOM0_TBUF_SET_SIZE;
   45.35 -  op.u.tbufcontrol.size = size;
   45.36 +    op.cmd = DOM0_TBUFCONTROL;
   45.37 +    op.interface_version = DOM0_INTERFACE_VERSION;
   45.38 +    op.u.tbufcontrol.op  = DOM0_TBUF_SET_SIZE;
   45.39 +    op.u.tbufcontrol.size = size;
   45.40  
   45.41 -  return xc_dom0_op(xc_handle, &op);
   45.42 +    return xc_dom0_op(xc_handle, &op);
   45.43  }
   45.44  
   45.45  int xc_tbuf_get_size(int xc_handle, unsigned long *size)
   45.46  {
   45.47 -  int rc;
   45.48 -  DECLARE_DOM0_OP;
   45.49 +    int rc;
   45.50 +    DECLARE_DOM0_OP;
   45.51  
   45.52 -  op.cmd = DOM0_TBUFCONTROL;
   45.53 -  op.interface_version = DOM0_INTERFACE_VERSION;
   45.54 -  op.u.tbufcontrol.op  = DOM0_TBUF_GET_INFO;
   45.55 +    op.cmd = DOM0_TBUFCONTROL;
   45.56 +    op.interface_version = DOM0_INTERFACE_VERSION;
   45.57 +    op.u.tbufcontrol.op  = DOM0_TBUF_GET_INFO;
   45.58  
   45.59 -  rc = xc_dom0_op(xc_handle, &op);
   45.60 -  if (rc == 0)
   45.61 -    *size = op.u.tbufcontrol.size;
   45.62 -  return rc;
   45.63 +    rc = xc_dom0_op(xc_handle, &op);
   45.64 +    if (rc == 0)
   45.65 +        *size = op.u.tbufcontrol.size;
   45.66 +    return rc;
   45.67  }
   45.68  
   45.69  int xc_tbuf_enable(int xc_handle, size_t cnt, unsigned long *mfn,
   45.70 -    unsigned long *size)
   45.71 +                   unsigned long *size)
   45.72  {
   45.73      DECLARE_DOM0_OP;
   45.74      int rc;
   45.75  
   45.76 -    if ( xc_tbuf_set_size(xc_handle, cnt) != 0 )
   45.77 -        return -1;
   45.78 +    /*
   45.79 +     * Ignore errors (at least for now) as we get an error if size is already
   45.80 +     * set (since trace buffers cannot be reallocated). If we really have no
   45.81 +     * buffers at all then tbuf_enable() will fail, so this is safe.
   45.82 +     */
   45.83 +    (void)xc_tbuf_set_size(xc_handle, cnt);
   45.84  
   45.85      if ( tbuf_enable(xc_handle, 1) != 0 )
   45.86          return -1;
    46.1 --- a/tools/libxc/xenctrl.h	Thu May 25 15:59:18 2006 -0600
    46.2 +++ b/tools/libxc/xenctrl.h	Fri May 26 13:41:49 2006 -0600
    46.3 @@ -354,6 +354,14 @@ int xc_sedf_domain_get(int xc_handle,
    46.4                         uint64_t *latency, uint16_t *extratime,
    46.5                         uint16_t *weight);
    46.6  
    46.7 +int xc_csched_domain_set(int xc_handle,
    46.8 +                         uint32_t domid,
    46.9 +                         struct csched_domain *sdom);
   46.10 +
   46.11 +int xc_csched_domain_get(int xc_handle,
   46.12 +                         uint32_t domid,
   46.13 +                         struct csched_domain *sdom);
   46.14 +
   46.15  typedef evtchn_status_t xc_evtchn_status_t;
   46.16  
   46.17  /*
   46.18 @@ -445,9 +453,6 @@ int xc_domain_iomem_permission(int xc_ha
   46.19                                 unsigned long nr_mfns,
   46.20                                 uint8_t allow_access);
   46.21  
   46.22 -unsigned long xc_make_page_below_4G(int xc_handle, uint32_t domid,
   46.23 -                                    unsigned long mfn);
   46.24 -
   46.25  typedef dom0_perfc_desc_t xc_perfc_desc_t;
   46.26  /* IMPORTANT: The caller is responsible for mlock()'ing the @desc array. */
   46.27  int xc_perfc_control(int xc_handle,
    47.1 --- a/tools/libxc/xg_private.h	Thu May 25 15:59:18 2006 -0600
    47.2 +++ b/tools/libxc/xg_private.h	Fri May 26 13:41:49 2006 -0600
    47.3 @@ -48,6 +48,8 @@ unsigned long csum_page (void * page);
    47.4  #define L2_PAGETABLE_SHIFT_PAE   21
    47.5  #define L3_PAGETABLE_SHIFT_PAE   30
    47.6  
    47.7 +#define L2_PAGETABLE_SHIFT_I386  22
    47.8 +
    47.9  #if defined(__i386__)
   47.10  #define L1_PAGETABLE_SHIFT       12
   47.11  #define L2_PAGETABLE_SHIFT       22
   47.12 @@ -62,6 +64,9 @@ unsigned long csum_page (void * page);
   47.13  #define L2_PAGETABLE_ENTRIES_PAE  512
   47.14  #define L3_PAGETABLE_ENTRIES_PAE    4
   47.15  
   47.16 +#define L1_PAGETABLE_ENTRIES_I386 1024
   47.17 +#define L2_PAGETABLE_ENTRIES_I386 1024
   47.18 +
   47.19  #if defined(__i386__)
   47.20  #define L1_PAGETABLE_ENTRIES   1024
   47.21  #define L2_PAGETABLE_ENTRIES   1024
   47.22 @@ -95,6 +100,11 @@ typedef unsigned long l4_pgentry_t;
   47.23  #define l3_table_offset_pae(_a) \
   47.24    (((_a) >> L3_PAGETABLE_SHIFT_PAE) & (L3_PAGETABLE_ENTRIES_PAE - 1))
   47.25  
   47.26 +#define l1_table_offset_i386(_a) \
   47.27 +  (((_a) >> L1_PAGETABLE_SHIFT) & (L1_PAGETABLE_ENTRIES_I386 - 1))
   47.28 +#define l2_table_offset_i386(_a) \
   47.29 +  (((_a) >> L2_PAGETABLE_SHIFT_I386) & (L2_PAGETABLE_ENTRIES_I386 - 1))
   47.30 +
   47.31  #if defined(__i386__)
   47.32  #define l1_table_offset(_a) \
   47.33            (((_a) >> L1_PAGETABLE_SHIFT) & (L1_PAGETABLE_ENTRIES - 1))
    48.1 --- a/tools/python/xen/lowlevel/xc/xc.c	Thu May 25 15:59:18 2006 -0600
    48.2 +++ b/tools/python/xen/lowlevel/xc/xc.c	Fri May 26 13:41:49 2006 -0600
    48.3 @@ -716,6 +716,49 @@ static PyObject *pyxc_sedf_domain_get(Xc
    48.4                           "weight",    weight);
    48.5  }
    48.6  
    48.7 +static PyObject *pyxc_csched_domain_set(XcObject *self,
    48.8 +                                        PyObject *args,
    48.9 +                                        PyObject *kwds)
   48.10 +{
   48.11 +    uint32_t domid;
   48.12 +    uint16_t weight;
   48.13 +    uint16_t cap;
   48.14 +    static char *kwd_list[] = { "dom", "weight", "cap", NULL };
   48.15 +    static char kwd_type[] = "I|HH";
   48.16 +    struct csched_domain sdom;
   48.17 +    
   48.18 +    weight = 0;
   48.19 +    cap = (uint16_t)~0U;
   48.20 +    if( !PyArg_ParseTupleAndKeywords(args, kwds, kwd_type, kwd_list, 
   48.21 +                                     &domid, &weight, &cap) )
   48.22 +        return NULL;
   48.23 +
   48.24 +    sdom.weight = weight;
   48.25 +    sdom.cap = cap;
   48.26 +
   48.27 +    if ( xc_csched_domain_set(self->xc_handle, domid, &sdom) != 0 )
   48.28 +        return PyErr_SetFromErrno(xc_error);
   48.29 +
   48.30 +    Py_INCREF(zero);
   48.31 +    return zero;
   48.32 +}
   48.33 +
   48.34 +static PyObject *pyxc_csched_domain_get(XcObject *self, PyObject *args)
   48.35 +{
   48.36 +    uint32_t domid;
   48.37 +    struct csched_domain sdom;
   48.38 +    
   48.39 +    if( !PyArg_ParseTuple(args, "I", &domid) )
   48.40 +        return NULL;
   48.41 +    
   48.42 +    if ( xc_csched_domain_get(self->xc_handle, domid, &sdom) != 0 )
   48.43 +        return PyErr_SetFromErrno(xc_error);
   48.44 +
   48.45 +    return Py_BuildValue("{s:H,s:H}",
   48.46 +                         "weight",  sdom.weight,
   48.47 +                         "cap",     sdom.cap);
   48.48 +}
   48.49 +
   48.50  static PyObject *pyxc_domain_setmaxmem(XcObject *self, PyObject *args)
   48.51  {
   48.52      uint32_t dom;
   48.53 @@ -1040,6 +1083,24 @@ static PyMethodDef pyxc_methods[] = {
   48.54        " slice     [long]: CPU reservation per period\n"
   48.55        " latency   [long]: domain's wakeup latency hint\n"
   48.56        " extratime [int]:  domain aware of extratime?\n"},
   48.57 +    
   48.58 +    { "csched_domain_set",
   48.59 +      (PyCFunction)pyxc_csched_domain_set,
   48.60 +      METH_KEYWORDS, "\n"
   48.61 +      "Set the scheduling parameters for a domain when running with the\n"
   48.62 +      "SMP credit scheduler.\n"
   48.63 +      " domid     [int]:   domain id to set\n"
   48.64 +      " weight    [short]: domain's scheduling weight\n"
   48.65 +      "Returns: [int] 0 on success; -1 on error.\n" },
   48.66 +
   48.67 +    { "csched_domain_get",
   48.68 +      (PyCFunction)pyxc_csched_domain_get,
   48.69 +      METH_VARARGS, "\n"
   48.70 +      "Get the scheduling parameters for a domain when running with the\n"
   48.71 +      "SMP credit scheduler.\n"
   48.72 +      " domid     [int]:   domain id to get\n"
   48.73 +      "Returns:   [dict]\n"
   48.74 +      " weight    [short]: domain's scheduling weight\n"},
   48.75  
   48.76      { "evtchn_alloc_unbound", 
   48.77        (PyCFunction)pyxc_evtchn_alloc_unbound,
    49.1 --- a/tools/python/xen/lowlevel/xs/xs.c	Thu May 25 15:59:18 2006 -0600
    49.2 +++ b/tools/python/xen/lowlevel/xs/xs.c	Fri May 26 13:41:49 2006 -0600
    49.3 @@ -272,11 +272,12 @@ static PyObject *xspy_get_permissions(Xs
    49.4  
    49.5      if (perms) {
    49.6          PyObject *val = PyList_New(perms_n);
    49.7 -        for (i = 0; i < perms_n; i++, perms++) {
    49.8 -            PyObject *p = Py_BuildValue("{s:i,s:i,s:i}",
    49.9 -                                        "dom",  perms->id,
   49.10 -                                        "read", perms->perms & XS_PERM_READ,
   49.11 -                                        "write",perms->perms & XS_PERM_WRITE);
   49.12 +        for (i = 0; i < perms_n; i++) {
   49.13 +            PyObject *p =
   49.14 +                Py_BuildValue("{s:i,s:i,s:i}",
   49.15 +                              "dom",   perms[i].id,
   49.16 +                              "read",  perms[i].perms & XS_PERM_READ,
   49.17 +                              "write", perms[i].perms & XS_PERM_WRITE);
   49.18              PyList_SetItem(val, i, p);
   49.19          }
   49.20  
    50.1 --- a/tools/python/xen/xend/XendDomain.py	Thu May 25 15:59:18 2006 -0600
    50.2 +++ b/tools/python/xen/xend/XendDomain.py	Fri May 26 13:41:49 2006 -0600
    50.3 @@ -522,6 +522,28 @@ class XendDomain:
    50.4          except Exception, ex:
    50.5              raise XendError(str(ex))
    50.6  
    50.7 +    def domain_csched_get(self, domid):
    50.8 +        """Get credit scheduler parameters for a domain.
    50.9 +        """
   50.10 +        dominfo = self.domain_lookup_by_name_or_id_nr(domid)
   50.11 +        if not dominfo:
   50.12 +            raise XendInvalidDomain(str(domid))
   50.13 +        try:
   50.14 +            return xc.csched_domain_get(dominfo.getDomid())
   50.15 +        except Exception, ex:
   50.16 +            raise XendError(str(ex))
   50.17 +    
   50.18 +    def domain_csched_set(self, domid, weight, cap):
   50.19 +        """Set credit scheduler parameters for a domain.
   50.20 +        """
   50.21 +        dominfo = self.domain_lookup_by_name_or_id_nr(domid)
   50.22 +        if not dominfo:
   50.23 +            raise XendInvalidDomain(str(domid))
   50.24 +        try:
   50.25 +            return xc.csched_domain_set(dominfo.getDomid(), weight, cap)
   50.26 +        except Exception, ex:
   50.27 +            raise XendError(str(ex))
   50.28 +
   50.29      def domain_maxmem_set(self, domid, mem):
   50.30          """Set the memory limit for a domain.
   50.31  
    51.1 --- a/tools/python/xen/xend/XendDomainInfo.py	Thu May 25 15:59:18 2006 -0600
    51.2 +++ b/tools/python/xen/xend/XendDomainInfo.py	Fri May 26 13:41:49 2006 -0600
    51.3 @@ -701,6 +701,16 @@ class XendDomainInfo:
    51.4          log.debug("Storing VM details: %s", to_store)
    51.5  
    51.6          self.writeVm(to_store)
    51.7 +        self.setVmPermissions()
    51.8 +
    51.9 +
   51.10 +    def setVmPermissions(self):
   51.11 +        """Allow the guest domain to read its UUID.  We don't allow it to
   51.12 +        access any other entry, for security."""
   51.13 +        xstransact.SetPermissions('%s/uuid' % self.vmpath,
   51.14 +                                  { 'dom' : self.domid,
   51.15 +                                    'read' : True,
   51.16 +                                    'write' : False })
   51.17  
   51.18  
   51.19      def storeDomDetails(self):
   51.20 @@ -1536,6 +1546,10 @@ class XendDomainInfo:
   51.21          self.configure_bootloader()
   51.22          config = self.sxpr()
   51.23  
   51.24 +        if self.infoIsSet('cpus') and len(self.info['cpus']) != 0:
   51.25 +            config.append(['cpus', reduce(lambda x, y: str(x) + "," + str(y),
   51.26 +                                          self.info['cpus'])])
   51.27 +
   51.28          if self.readVm(RESTART_IN_PROGRESS):
   51.29              log.error('Xend failed during restart of domain %d.  '
   51.30                        'Refusing to restart to avoid loops.',
    52.1 --- a/tools/python/xen/xend/balloon.py	Thu May 25 15:59:18 2006 -0600
    52.2 +++ b/tools/python/xen/xend/balloon.py	Fri May 26 13:41:49 2006 -0600
    52.3 @@ -32,6 +32,7 @@ PROC_XEN_BALLOON = '/proc/xen/balloon'
    52.4  BALLOON_OUT_SLACK = 1 # MiB.  We need this because the physinfo details are
    52.5                        # rounded.
    52.6  RETRY_LIMIT = 10
    52.7 +RETRY_LIMIT_INCR = 5
    52.8  ##
    52.9  # The time to sleep between retries grows linearly, using this value (in
   52.10  # seconds).  When the system is lightly loaded, memory should be scrubbed and
   52.11 @@ -118,7 +119,8 @@ def free(required):
   52.12          retries = 0
   52.13          sleep_time = SLEEP_TIME_GROWTH
   52.14          last_new_alloc = None
   52.15 -        while retries < RETRY_LIMIT:
   52.16 +        rlimit = RETRY_LIMIT
   52.17 +        while retries < rlimit:
   52.18              free_mem = xc.physinfo()['free_memory']
   52.19  
   52.20              if free_mem >= need_mem:
   52.21 @@ -127,7 +129,9 @@ def free(required):
   52.22                  return
   52.23  
   52.24              if retries == 0:
   52.25 -                log.debug("Balloon: free %d; need %d.", free_mem, need_mem)
   52.26 +                rlimit += ((need_mem - free_mem)/1024) * RETRY_LIMIT_INCR
   52.27 +                log.debug("Balloon: free %d; need %d; retries: %d.", 
   52.28 +                          free_mem, need_mem, rlimit)
   52.29  
   52.30              if dom0_min_mem > 0:
   52.31                  dom0_alloc = get_dom0_current_alloc()
   52.32 @@ -143,8 +147,9 @@ def free(required):
   52.33                      # Continue to retry, waiting for ballooning.
   52.34  
   52.35              time.sleep(sleep_time)
   52.36 +            if retries < 2 * RETRY_LIMIT:
   52.37 +                sleep_time += SLEEP_TIME_GROWTH
   52.38              retries += 1
   52.39 -            sleep_time += SLEEP_TIME_GROWTH
   52.40  
   52.41          # Not enough memory; diagnose the problem.
   52.42          if dom0_min_mem == 0:
    53.1 --- a/tools/python/xen/xend/server/SrvDomain.py	Thu May 25 15:59:18 2006 -0600
    53.2 +++ b/tools/python/xen/xend/server/SrvDomain.py	Fri May 26 13:41:49 2006 -0600
    53.3 @@ -131,6 +131,20 @@ class SrvDomain(SrvDir):
    53.4  		     ['weight', 'int']])
    53.5          val = fn(req.args, {'dom': self.dom.domid})
    53.6          return val
    53.7 +    
    53.8 +    def op_domain_csched_get(self, _, req):
    53.9 +        fn = FormFn(self.xd.domain_csched_get,
   53.10 +                    [['dom', 'int']])
   53.11 +        val = fn(req.args, {'dom': self.dom.domid})
   53.12 +        return val
   53.13 +
   53.14 +
   53.15 +    def op_domain_csched_set(self, _, req):
   53.16 +        fn = FormFn(self.xd.domain_csched_set,
   53.17 +                    [['dom', 'int'],
   53.18 +                     ['weight', 'int']])
   53.19 +        val = fn(req.args, {'dom': self.dom.domid})
   53.20 +        return val
   53.21  
   53.22      def op_maxmem_set(self, _, req):
   53.23          fn = FormFn(self.xd.domain_maxmem_set,
    54.1 --- a/tools/python/xen/xend/xenstore/xstransact.py	Thu May 25 15:59:18 2006 -0600
    54.2 +++ b/tools/python/xen/xend/xenstore/xstransact.py	Fri May 26 13:41:49 2006 -0600
    54.3 @@ -221,6 +221,34 @@ class xstransact:
    54.4                  xshandle().mkdir(self.transaction, self.prependPath(key))
    54.5  
    54.6  
    54.7 +    def get_permissions(self, *args):
    54.8 +        """If no arguments are given, return the permissions at this
    54.9 +        transaction's path.  If one argument is given, treat that argument as
   54.10 +        a subpath to this transaction's path, and return the permissions at
   54.11 +        that path.  Otherwise, treat each argument as a subpath to this
   54.12 +        transaction's path, and return a list composed of the permissions at
   54.13 +        each of those instead.
   54.14 +        """
   54.15 +        if len(args) == 0:
   54.16 +            return xshandle().get_permissions(self.transaction, self.path)
   54.17 +        if len(args) == 1:
   54.18 +            return self._get_permissions(args[0])
   54.19 +        ret = []
   54.20 +        for key in args:
   54.21 +            ret.append(self._get_permissions(key))
   54.22 +        return ret
   54.23 +
   54.24 +
   54.25 +    def _get_permissions(self, key):
   54.26 +        path = self.prependPath(key)
   54.27 +        try:
   54.28 +            return xshandle().get_permissions(self.transaction, path)
   54.29 +        except RuntimeError, ex:
   54.30 +            raise RuntimeError(ex.args[0],
   54.31 +                               '%s, while getting permissions from %s' %
   54.32 +                               (ex.args[1], path))
   54.33 +
   54.34 +
   54.35      def set_permissions(self, *args):
   54.36          if len(args) == 0:
   54.37              raise TypeError
    55.1 --- a/tools/python/xen/xm/main.py	Thu May 25 15:59:18 2006 -0600
    55.2 +++ b/tools/python/xen/xm/main.py	Fri May 26 13:41:49 2006 -0600
    55.3 @@ -99,6 +99,7 @@ sched_sedf_help = "sched-sedf [DOM] [OPT
    55.4                                      specifies another way of setting a domain's\n\
    55.5                                      cpu period/slice."
    55.6  
    55.7 +csched_help = "csched                           Set or get credit scheduler parameters"
    55.8  block_attach_help = """block-attach <DomId> <BackDev> <FrontDev> <Mode>
    55.9                  [BackDomId]         Create a new virtual block device"""
   55.10  block_detach_help = """block-detach  <DomId> <DevId>    Destroy a domain's virtual block device,
   55.11 @@ -174,6 +175,7 @@ host_commands = [
   55.12      ]
   55.13  
   55.14  scheduler_commands = [
   55.15 +    "csched",
   55.16      "sched-bvt",
   55.17      "sched-bvt-ctxallow",
   55.18      "sched-sedf",
   55.19 @@ -735,6 +737,48 @@ def xm_sched_sedf(args):
   55.20          else:
   55.21              print_sedf(sedf_info)
   55.22  
   55.23 +def xm_csched(args):
   55.24 +    usage_msg = """Csched:     Set or get credit scheduler parameters
   55.25 + Usage:
   55.26 +
   55.27 +        csched -d domain [-w weight] [-c cap]
   55.28 +    """
   55.29 +    try:
   55.30 +        opts, args = getopt.getopt(args[0:], "d:w:c:",
   55.31 +            ["domain=", "weight=", "cap="])
   55.32 +    except getopt.GetoptError:
   55.33 +        # print help information and exit:
   55.34 +        print usage_msg
   55.35 +        sys.exit(1)
   55.36 +
   55.37 +    domain = None
   55.38 +    weight = None
   55.39 +    cap = None
   55.40 +
   55.41 +    for o, a in opts:
   55.42 +        if o == "-d":
   55.43 +            domain = a
   55.44 +        elif o == "-w":
   55.45 +            weight = int(a)
   55.46 +        elif o == "-c":
   55.47 +            cap = int(a);
   55.48 +
   55.49 +    if domain is None:
   55.50 +        # place holder for system-wide scheduler parameters
   55.51 +        print usage_msg
   55.52 +        sys.exit(1)
   55.53 +
   55.54 +    if weight is None and cap is None:
   55.55 +        print server.xend.domain.csched_get(domain)
   55.56 +    else:
   55.57 +        if weight is None:
   55.58 +            weight = int(0)
   55.59 +        if cap is None:
   55.60 +            cap = int(~0)
   55.61 +
   55.62 +        err = server.xend.domain.csched_set(domain, weight, cap)
   55.63 +        if err != 0:
   55.64 +            print err
   55.65  
   55.66  def xm_info(args):
   55.67      arg_check(args, "info", 0)
   55.68 @@ -1032,6 +1076,7 @@ commands = {
   55.69      "sched-bvt": xm_sched_bvt,
   55.70      "sched-bvt-ctxallow": xm_sched_bvt_ctxallow,
   55.71      "sched-sedf": xm_sched_sedf,
   55.72 +    "csched": xm_csched,
   55.73      # block
   55.74      "block-attach": xm_block_attach,
   55.75      "block-detach": xm_block_detach,
    56.1 --- a/tools/tests/test_x86_emulator.c	Thu May 25 15:59:18 2006 -0600
    56.2 +++ b/tools/tests/test_x86_emulator.c	Fri May 26 13:41:49 2006 -0600
    56.3 @@ -17,7 +17,8 @@ typedef int64_t            s64;
    56.4  static int read_any(
    56.5      unsigned long addr,
    56.6      unsigned long *val,
    56.7 -    unsigned int bytes)
    56.8 +    unsigned int bytes,
    56.9 +    struct x86_emulate_ctxt *ctxt)
   56.10  {
   56.11      switch ( bytes )
   56.12      {
   56.13 @@ -32,7 +33,8 @@ static int read_any(
   56.14  static int write_any(
   56.15      unsigned long addr,
   56.16      unsigned long val,
   56.17 -    unsigned int bytes)
   56.18 +    unsigned int bytes,
   56.19 +    struct x86_emulate_ctxt *ctxt)
   56.20  {
   56.21      switch ( bytes )
   56.22      {
   56.23 @@ -48,7 +50,8 @@ static int cmpxchg_any(
   56.24      unsigned long addr,
   56.25      unsigned long old,
   56.26      unsigned long new,
   56.27 -    unsigned int bytes)
   56.28 +    unsigned int bytes,
   56.29 +    struct x86_emulate_ctxt *ctxt)
   56.30  {
   56.31      switch ( bytes )
   56.32      {
   56.33 @@ -65,34 +68,38 @@ static int cmpxchg8b_any(
   56.34      unsigned long old_lo,
   56.35      unsigned long old_hi,
   56.36      unsigned long new_lo,
   56.37 -    unsigned long new_hi)
   56.38 +    unsigned long new_hi,
   56.39 +    struct x86_emulate_ctxt *ctxt)
   56.40  {
   56.41      ((unsigned long *)addr)[0] = new_lo;
   56.42      ((unsigned long *)addr)[1] = new_hi;
   56.43      return X86EMUL_CONTINUE;
   56.44  }
   56.45  
   56.46 -static struct x86_mem_emulator emulops = {
   56.47 +static struct x86_emulate_ops emulops = {
   56.48      read_any, write_any, read_any, write_any, cmpxchg_any, cmpxchg8b_any
   56.49  };
   56.50  
   56.51  int main(int argc, char **argv)
   56.52  {
   56.53 +    struct x86_emulate_ctxt ctxt;
   56.54      struct cpu_user_regs regs;
   56.55      char instr[20] = { 0x01, 0x08 }; /* add %ecx,(%eax) */
   56.56      unsigned int res = 0x7FFFFFFF;
   56.57      u32 cmpxchg8b_res[2] = { 0x12345678, 0x87654321 };
   56.58 -    unsigned long cr2;
   56.59      int rc;
   56.60  
   56.61 +    ctxt.regs = &regs;
   56.62 +    ctxt.mode = X86EMUL_MODE_PROT32;
   56.63 +
   56.64      printf("%-40s", "Testing addl %%ecx,(%%eax)...");
   56.65      instr[0] = 0x01; instr[1] = 0x08;
   56.66      regs.eflags = 0x200;
   56.67      regs.eip    = (unsigned long)&instr[0];
   56.68      regs.ecx    = 0x12345678;
   56.69 -    cr2         = (unsigned long)&res;
   56.70 +    ctxt.cr2    = (unsigned long)&res;
   56.71      res         = 0x7FFFFFFF;
   56.72 -    rc = x86_emulate_memop(&regs, cr2, &emulops, X86EMUL_MODE_PROT32);
   56.73 +    rc = x86_emulate_memop(&ctxt, &emulops);
   56.74      if ( (rc != 0) || 
   56.75           (res != 0x92345677) || 
   56.76           (regs.eflags != 0xa94) ||
   56.77 @@ -109,8 +116,8 @@ int main(int argc, char **argv)
   56.78  #else
   56.79      regs.ecx    = 0x12345678UL;
   56.80  #endif
   56.81 -    cr2         = (unsigned long)&res;
   56.82 -    rc = x86_emulate_memop(&regs, cr2, &emulops, X86EMUL_MODE_PROT32);
   56.83 +    ctxt.cr2    = (unsigned long)&res;
   56.84 +    rc = x86_emulate_memop(&ctxt, &emulops);
   56.85      if ( (rc != 0) || 
   56.86           (res != 0x92345677) || 
   56.87           (regs.ecx != 0x8000000FUL) ||
   56.88 @@ -124,8 +131,8 @@ int main(int argc, char **argv)
   56.89      regs.eip    = (unsigned long)&instr[0];
   56.90      regs.eax    = 0x92345677UL;
   56.91      regs.ecx    = 0xAA;
   56.92 -    cr2         = (unsigned long)&res;
   56.93 -    rc = x86_emulate_memop(&regs, cr2, &emulops, X86EMUL_MODE_PROT32);    
   56.94 +    ctxt.cr2    = (unsigned long)&res;
   56.95 +    rc = x86_emulate_memop(&ctxt, &emulops);
   56.96      if ( (rc != 0) || 
   56.97           (res != 0x923456AA) || 
   56.98           (regs.eflags != 0x244) ||
   56.99 @@ -140,8 +147,8 @@ int main(int argc, char **argv)
  56.100      regs.eip    = (unsigned long)&instr[0];
  56.101      regs.eax    = 0xAABBCC77UL;
  56.102      regs.ecx    = 0xFF;
  56.103 -    cr2         = (unsigned long)&res;
  56.104 -    rc = x86_emulate_memop(&regs, cr2, &emulops, X86EMUL_MODE_PROT32);    
  56.105 +    ctxt.cr2    = (unsigned long)&res;
  56.106 +    rc = x86_emulate_memop(&ctxt, &emulops);
  56.107      if ( (rc != 0) || 
  56.108           (res != 0x923456AA) || 
  56.109           ((regs.eflags&0x240) != 0x200) ||
  56.110 @@ -156,8 +163,8 @@ int main(int argc, char **argv)
  56.111      regs.eflags = 0x200;
  56.112      regs.eip    = (unsigned long)&instr[0];
  56.113      regs.ecx    = 0x12345678;
  56.114 -    cr2         = (unsigned long)&res;
  56.115 -    rc = x86_emulate_memop(&regs, cr2, &emulops, X86EMUL_MODE_PROT32);    
  56.116 +    ctxt.cr2    = (unsigned long)&res;
  56.117 +    rc = x86_emulate_memop(&ctxt, &emulops);
  56.118      if ( (rc != 0) || 
  56.119           (res != 0x12345678) || 
  56.120           (regs.eflags != 0x200) ||
  56.121 @@ -173,8 +180,8 @@ int main(int argc, char **argv)
  56.122      regs.eip    = (unsigned long)&instr[0];
  56.123      regs.eax    = 0x923456AAUL;
  56.124      regs.ecx    = 0xDDEEFF00L;
  56.125 -    cr2         = (unsigned long)&res;
  56.126 -    rc = x86_emulate_memop(&regs, cr2, &emulops, X86EMUL_MODE_PROT32);    
  56.127 +    ctxt.cr2    = (unsigned long)&res;
  56.128 +    rc = x86_emulate_memop(&ctxt, &emulops);
  56.129      if ( (rc != 0) || 
  56.130           (res != 0xDDEEFF00) || 
  56.131           (regs.eflags != 0x244) ||
  56.132 @@ -192,8 +199,8 @@ int main(int argc, char **argv)
  56.133      regs.esi    = (unsigned long)&res + 0;
  56.134      regs.edi    = (unsigned long)&res + 2;
  56.135      regs.error_code = 0; /* read fault */
  56.136 -    cr2         = regs.esi;
  56.137 -    rc = x86_emulate_memop(&regs, cr2, &emulops, X86EMUL_MODE_PROT32);    
  56.138 +    ctxt.cr2    = regs.esi;
  56.139 +    rc = x86_emulate_memop(&ctxt, &emulops);
  56.140      if ( (rc != 0) || 
  56.141           (res != 0x44554455) ||
  56.142           (regs.eflags != 0x200) ||
  56.143 @@ -210,8 +217,8 @@ int main(int argc, char **argv)
  56.144      regs.eflags = 0x200;
  56.145      regs.eip    = (unsigned long)&instr[0];
  56.146      regs.edi    = (unsigned long)&res;
  56.147 -    cr2         = regs.edi;
  56.148 -    rc = x86_emulate_memop(&regs, cr2, &emulops, X86EMUL_MODE_PROT32);    
  56.149 +    ctxt.cr2    = regs.edi;
  56.150 +    rc = x86_emulate_memop(&ctxt, &emulops);
  56.151      if ( (rc != 0) || 
  56.152           (res != 0x2233445D) ||
  56.153           ((regs.eflags&0x201) != 0x201) ||
  56.154 @@ -228,8 +235,8 @@ int main(int argc, char **argv)
  56.155      regs.ecx    = 0xCCCCFFFF;
  56.156      regs.eip    = (unsigned long)&instr[0];
  56.157      regs.edi    = (unsigned long)cmpxchg8b_res;
  56.158 -    cr2         = regs.edi;
  56.159 -    rc = x86_emulate_memop(&regs, cr2, &emulops, X86EMUL_MODE_PROT32);
  56.160 +    ctxt.cr2    = regs.edi;
  56.161 +    rc = x86_emulate_memop(&ctxt, &emulops);
  56.162      if ( (rc != 0) || 
  56.163           (cmpxchg8b_res[0] != 0x9999AAAA) ||
  56.164           (cmpxchg8b_res[1] != 0xCCCCFFFF) ||
  56.165 @@ -242,8 +249,8 @@ int main(int argc, char **argv)
  56.166      instr[0] = 0x0f; instr[1] = 0xc7; instr[2] = 0x0f;
  56.167      regs.eip    = (unsigned long)&instr[0];
  56.168      regs.edi    = (unsigned long)cmpxchg8b_res;
  56.169 -    cr2         = regs.edi;
  56.170 -    rc = x86_emulate_memop(&regs, cr2, &emulops, X86EMUL_MODE_PROT32);
  56.171 +    ctxt.cr2    = regs.edi;
  56.172 +    rc = x86_emulate_memop(&ctxt, &emulops);
  56.173      if ( (rc != 0) || 
  56.174           (cmpxchg8b_res[0] != 0x9999AAAA) ||
  56.175           (cmpxchg8b_res[1] != 0xCCCCFFFF) ||
  56.176 @@ -258,9 +265,9 @@ int main(int argc, char **argv)
  56.177      instr[0] = 0x0f; instr[1] = 0xbe; instr[2] = 0x08;
  56.178      regs.eip    = (unsigned long)&instr[0];
  56.179      regs.ecx    = 0x12345678;
  56.180 -    cr2         = (unsigned long)&res;
  56.181 +    ctxt.cr2    = (unsigned long)&res;
  56.182      res         = 0x82;
  56.183 -    rc = x86_emulate_memop(&regs, cr2, &emulops, X86EMUL_MODE_PROT32);
  56.184 +    rc = x86_emulate_memop(&ctxt, &emulops);
  56.185      if ( (rc != 0) ||
  56.186           (res != 0x82) ||
  56.187           (regs.ecx != 0xFFFFFF82) ||
  56.188 @@ -273,9 +280,9 @@ int main(int argc, char **argv)
  56.189      instr[0] = 0x0f; instr[1] = 0xb7; instr[2] = 0x08;
  56.190      regs.eip    = (unsigned long)&instr[0];
  56.191      regs.ecx    = 0x12345678;
  56.192 -    cr2         = (unsigned long)&res;
  56.193 +    ctxt.cr2    = (unsigned long)&res;
  56.194      res         = 0x1234aa82;
  56.195 -    rc = x86_emulate_memop(&regs, cr2, &emulops, X86EMUL_MODE_PROT32);
  56.196 +    rc = x86_emulate_memop(&ctxt, &emulops);
  56.197      if ( (rc != 0) ||
  56.198           (res != 0x1234aa82) ||
  56.199           (regs.ecx != 0xaa82) ||
    57.1 --- a/tools/xenstore/Makefile	Thu May 25 15:59:18 2006 -0600
    57.2 +++ b/tools/xenstore/Makefile	Fri May 26 13:41:49 2006 -0600
    57.3 @@ -27,6 +27,12 @@ CLIENTS := xenstore-exists xenstore-list
    57.4  CLIENTS += xenstore-write
    57.5  CLIENTS_OBJS := $(patsubst xenstore-%,xenstore_%.o,$(CLIENTS))
    57.6  
    57.7 +XENSTORED_OBJS = xenstored_core.o xenstored_watch.o xenstored_domain.o xenstored_transaction.o xs_lib.o talloc.o utils.o tdb.o hashtable.o
    57.8 +
    57.9 +XENSTORED_Linux = xenstored_linux.o
   57.10 +
   57.11 +XENSTORED_OBJS += $(XENSTORED_$(OS))
   57.12 +
   57.13  .PHONY: all
   57.14  all: libxenstore.so xenstored $(CLIENTS) xs_tdb_dump xenstore-control xenstore-ls
   57.15  
   57.16 @@ -36,7 +42,7 @@ test_interleaved_transactions: test_inte
   57.17  .PHONY: testcode
   57.18  testcode: xs_test xenstored_test xs_random
   57.19  
   57.20 -xenstored: xenstored_core.o xenstored_watch.o xenstored_domain.o xenstored_transaction.o xs_lib.o talloc.o utils.o tdb.o hashtable.o
   57.21 +xenstored: $(XENSTORED_OBJS)
   57.22  	$(LINK.o) $^ $(LOADLIBES) $(LDLIBS) -lxenctrl -o $@
   57.23  
   57.24  $(CLIENTS): xenstore-%: xenstore_%.o libxenstore.so
    58.1 --- a/tools/xenstore/xenstored_core.c	Thu May 25 15:59:18 2006 -0600
    58.2 +++ b/tools/xenstore/xenstored_core.c	Fri May 26 13:41:49 2006 -0600
    58.3 @@ -451,6 +451,11 @@ static struct node *read_node(struct con
    58.4  
    58.5  static bool write_node(struct connection *conn, const struct node *node)
    58.6  {
    58.7 +	/*
    58.8 +	 * conn will be null when this is called from manual_node.
    58.9 +	 * tdb_context copes with this.
   58.10 +	 */
   58.11 +
   58.12  	TDB_DATA key, data;
   58.13  	void *p;
   58.14  
   58.15 @@ -478,7 +483,7 @@ static bool write_node(struct connection
   58.16  
   58.17  	/* TDB should set errno, but doesn't even set ecode AFAICT. */
   58.18  	if (tdb_store(tdb_context(conn), key, data, TDB_REPLACE) != 0) {
   58.19 -		corrupt(conn, "Write of %s = %s failed", key, data);
   58.20 +		corrupt(conn, "Write of %s failed", key.dptr);
   58.21  		goto error;
   58.22  	}
   58.23  	return true;
    59.1 --- a/tools/xenstore/xenstored_core.h	Thu May 25 15:59:18 2006 -0600
    59.2 +++ b/tools/xenstore/xenstored_core.h	Fri May 26 13:41:49 2006 -0600
    59.3 @@ -20,6 +20,8 @@
    59.4  #ifndef _XENSTORED_CORE_H
    59.5  #define _XENSTORED_CORE_H
    59.6  
    59.7 +#include <xenctrl.h>
    59.8 +
    59.9  #include <sys/types.h>
   59.10  #include <dirent.h>
   59.11  #include <stdbool.h>
   59.12 @@ -163,6 +165,12 @@ void trace(const char *fmt, ...);
   59.13  
   59.14  extern int event_fd;
   59.15  
   59.16 +/* Map the kernel's xenstore page. */
   59.17 +void *xenbus_map(void);
   59.18 +
   59.19 +/* Return the event channel used by xenbus. */
   59.20 +evtchn_port_t xenbus_evtchn(void);
   59.21 +
   59.22  #endif /* _XENSTORED_CORE_H */
   59.23  
   59.24  /*
    60.1 --- a/tools/xenstore/xenstored_domain.c	Thu May 25 15:59:18 2006 -0600
    60.2 +++ b/tools/xenstore/xenstored_domain.c	Fri May 26 13:41:49 2006 -0600
    60.3 @@ -33,12 +33,11 @@
    60.4  #include "talloc.h"
    60.5  #include "xenstored_core.h"
    60.6  #include "xenstored_domain.h"
    60.7 -#include "xenstored_proc.h"
    60.8  #include "xenstored_watch.h"
    60.9  #include "xenstored_test.h"
   60.10  
   60.11  #include <xenctrl.h>
   60.12 -#include <xen/linux/evtchn.h>
   60.13 +#include <xen/sys/evtchn.h>
   60.14  
   60.15  static int *xc_handle;
   60.16  static evtchn_port_t virq_port;
   60.17 @@ -476,44 +475,24 @@ void restore_existing_connections(void)
   60.18  
   60.19  static int dom0_init(void) 
   60.20  { 
   60.21 -	int rc, fd;
   60.22 -	evtchn_port_t port; 
   60.23 -	char str[20]; 
   60.24 -	struct domain *dom0; 
   60.25 +	evtchn_port_t port;
   60.26 +	struct domain *dom0;
   60.27  
   60.28 -	fd = open(XENSTORED_PROC_PORT, O_RDONLY); 
   60.29 -	if (fd == -1)
   60.30 +	port = xenbus_evtchn();
   60.31 +	if (port == -1)
   60.32  		return -1;
   60.33  
   60.34 -	rc = read(fd, str, sizeof(str)); 
   60.35 -	if (rc == -1)
   60.36 -		goto outfd;
   60.37 -	str[rc] = '\0'; 
   60.38 -	port = strtoul(str, NULL, 0); 
   60.39 -
   60.40 -	close(fd); 
   60.41 -
   60.42  	dom0 = new_domain(NULL, 0, port); 
   60.43  
   60.44 -	fd = open(XENSTORED_PROC_KVA, O_RDWR);
   60.45 -	if (fd == -1)
   60.46 +	dom0->interface = xenbus_map();
   60.47 +	if (dom0->interface == NULL)
   60.48  		return -1;
   60.49  
   60.50 -	dom0->interface = mmap(NULL, getpagesize(), PROT_READ|PROT_WRITE,
   60.51 -			       MAP_SHARED, fd, 0);
   60.52 -	if (dom0->interface == MAP_FAILED)
   60.53 -		goto outfd;
   60.54 -
   60.55 -	close(fd);
   60.56 -
   60.57  	talloc_steal(dom0->conn, dom0); 
   60.58  
   60.59  	evtchn_notify(dom0->port); 
   60.60  
   60.61  	return 0; 
   60.62 -outfd:
   60.63 -	close(fd);
   60.64 -	return -1;
   60.65  }
   60.66  
   60.67  
    61.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
    61.2 +++ b/tools/xenstore/xenstored_linux.c	Fri May 26 13:41:49 2006 -0600
    61.3 @@ -0,0 +1,69 @@
    61.4 +/******************************************************************************
    61.5 + *
    61.6 + * Copyright 2006 Sun Microsystems, Inc.  All rights reserved.
    61.7 + * Use is subject to license terms.
    61.8 + *
    61.9 + * Copyright (C) 2005 Rusty Russell IBM Corporation
   61.10 + *
   61.11 + * This program is free software; you can redistribute it and/or
   61.12 + * modify it under the terms of the GNU General Public License as
   61.13 + * published by the Free Software Foundation, version 2 of the
   61.14 + * License.
   61.15 + */
   61.16 +
   61.17 +#include <fcntl.h>
   61.18 +#include <unistd.h>
   61.19 +#include <stdlib.h>
   61.20 +#include <sys/mman.h>
   61.21 +
   61.22 +#include "xenstored_core.h"
   61.23 +
   61.24 +#define XENSTORED_PROC_KVA  "/proc/xen/xsd_kva"
   61.25 +#define XENSTORED_PROC_PORT "/proc/xen/xsd_port"
   61.26 +
   61.27 +evtchn_port_t xenbus_evtchn(void)
   61.28 +{
   61.29 +	int fd;
   61.30 +	int rc;
   61.31 +	evtchn_port_t port; 
   61.32 +	char str[20]; 
   61.33 +
   61.34 +	fd = open(XENSTORED_PROC_PORT, O_RDONLY); 
   61.35 +	if (fd == -1)
   61.36 +		return -1;
   61.37 +
   61.38 +	rc = read(fd, str, sizeof(str)); 
   61.39 +	if (rc == -1)
   61.40 +	{
   61.41 +		int err = errno;
   61.42 +		close(fd);
   61.43 +		errno = err;
   61.44 +		return -1;
   61.45 +	}
   61.46 +
   61.47 +	str[rc] = '\0'; 
   61.48 +	port = strtoul(str, NULL, 0); 
   61.49 +
   61.50 +	close(fd); 
   61.51 +	return port;
   61.52 +}
   61.53 +
   61.54 +void *xenbus_map(void)
   61.55 +{
   61.56 +	int fd;
   61.57 +	void *addr;
   61.58 +
   61.59 +	fd = open(XENSTORED_PROC_KVA, O_RDWR);
   61.60 +	if (fd == -1)
   61.61 +		return NULL;
   61.62 +
   61.63 +	addr = mmap(NULL, getpagesize(), PROT_READ|PROT_WRITE,
   61.64 +		MAP_SHARED, fd, 0);
   61.65 +
   61.66 +	if (addr == MAP_FAILED)
   61.67 +		addr = NULL;
   61.68 +
   61.69 +	close(fd);
   61.70 +
   61.71 +	return addr;
   61.72 +}
    62.1 --- a/tools/xenstore/xenstored_proc.h	Thu May 25 15:59:18 2006 -0600
    62.2 +++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
    62.3 @@ -1,27 +0,0 @@
    62.4 -/* 
    62.5 -    Copyright (C) 2005 XenSource Ltd
    62.6 -
    62.7 -    This program is free software; you can redistribute it and/or modify
    62.8 -    it under the terms of the GNU General Public License as published by
    62.9 -    the Free Software Foundation; either version 2 of the License, or
   62.10 -    (at your option) any later version.
   62.11 -
   62.12 -    This program is distributed in the hope that it will be useful,
   62.13 -    but WITHOUT ANY WARRANTY; without even the implied warranty of
   62.14 -    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
   62.15 -    GNU General Public License for more details.
   62.16 -
   62.17 -    You should have received a copy of the GNU General Public License
   62.18 -    along with this program; if not, write to the Free Software
   62.19 -    Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
   62.20 -
   62.21 -*/
   62.22 -
   62.23 -#ifndef _XENSTORED_PROC_H
   62.24 -#define _XENSTORED_PROC_H
   62.25 -
   62.26 -#define XENSTORED_PROC_KVA  "/proc/xen/xsd_kva"
   62.27 -#define XENSTORED_PROC_PORT "/proc/xen/xsd_port"
   62.28 -
   62.29 -
   62.30 -#endif /* _XENSTORED_PROC_H */
    63.1 --- a/xen/arch/x86/domain_build.c	Thu May 25 15:59:18 2006 -0600
    63.2 +++ b/xen/arch/x86/domain_build.c	Fri May 26 13:41:49 2006 -0600
    63.3 @@ -367,7 +367,10 @@ int construct_dom0(struct domain *d,
    63.4      if ( (1UL << order) > nr_pages )
    63.5          panic("Domain 0 allocation is too small for kernel image.\n");
    63.6  
    63.7 -    /* Allocate from DMA pool: PAE L3 table must be below 4GB boundary. */
    63.8 +    /*
    63.9 +     * Allocate from DMA pool: on i386 this ensures that our low-memory 1:1
   63.10 +     * mapping covers the allocation.
   63.11 +     */
   63.12      if ( (page = alloc_domheap_pages(d, order, ALLOC_DOM_DMA)) == NULL )
   63.13          panic("Not enough RAM for domain 0 allocation.\n");
   63.14      alloc_spfn = page_to_mfn(page);
    64.1 --- a/xen/arch/x86/hvm/hvm.c	Thu May 25 15:59:18 2006 -0600
    64.2 +++ b/xen/arch/x86/hvm/hvm.c	Fri May 26 13:41:49 2006 -0600
    64.3 @@ -185,8 +185,9 @@ static void hvm_get_info(struct domain *
    64.4  void hvm_setup_platform(struct domain* d)
    64.5  {
    64.6      struct hvm_domain *platform;
    64.7 +    struct vcpu *v=current;
    64.8  
    64.9 -    if ( !hvm_guest(current) || (current->vcpu_id != 0) )
   64.10 +    if ( !hvm_guest(v) || (v->vcpu_id != 0) )
   64.11          return;
   64.12  
   64.13      if ( shadow_direct_map_init(d) == 0 )
   64.14 @@ -208,7 +209,8 @@ void hvm_setup_platform(struct domain* d
   64.15          hvm_vioapic_init(d);
   64.16      }
   64.17  
   64.18 -    pit_init(&platform->vpit, current);
   64.19 +    init_timer(&platform->pl_time.periodic_tm.timer, pt_timer_fn, v, v->processor);
   64.20 +    pit_init(v, cpu_khz);
   64.21  }
   64.22  
   64.23  void pic_irq_request(void *data, int level)
   64.24 @@ -240,6 +242,14 @@ void hvm_pic_assist(struct vcpu *v)
   64.25      }
   64.26  }
   64.27  
   64.28 +u64 hvm_get_guest_time(struct vcpu *v)
   64.29 +{
   64.30 +    u64    host_tsc;
   64.31 +    
   64.32 +    rdtscll(host_tsc);
   64.33 +    return host_tsc + v->arch.hvm_vcpu.cache_tsc_offset;
   64.34 +}
   64.35 +
   64.36  int cpu_get_interrupt(struct vcpu *v, int *type)
   64.37  {
   64.38      int intno;
    65.1 --- a/xen/arch/x86/hvm/i8254.c	Thu May 25 15:59:18 2006 -0600
    65.2 +++ b/xen/arch/x86/hvm/i8254.c	Fri May 26 13:41:49 2006 -0600
    65.3 @@ -22,11 +22,10 @@
    65.4   * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
    65.5   * THE SOFTWARE.
    65.6   */
    65.7 -/* Edwin Zhai <edwin.zhai@intel.com>
    65.8 +/* Edwin Zhai <edwin.zhai@intel.com>, Eddie Dong <eddie.dong@intel.com>
    65.9   * Ported to xen:
   65.10 - * use actimer for intr generation;
   65.11 + * Add a new layer of periodic time on top of PIT;
   65.12   * move speaker io access to hypervisor;
   65.13 - * use new method for counter/intrs calculation
   65.14   */
   65.15  
   65.16  #include <xen/config.h>
   65.17 @@ -42,184 +41,117 @@
   65.18  #include <asm/hvm/vpit.h>
   65.19  #include <asm/current.h>
   65.20  
   65.21 -/*#define DEBUG_PIT*/
   65.22 +/* Enable DEBUG_PIT may cause guest calibration inaccuracy */
   65.23 +/* #define DEBUG_PIT */
   65.24  
   65.25  #define RW_STATE_LSB 1
   65.26  #define RW_STATE_MSB 2
   65.27  #define RW_STATE_WORD0 3
   65.28  #define RW_STATE_WORD1 4
   65.29  
   65.30 -#ifndef NSEC_PER_SEC
   65.31 -#define NSEC_PER_SEC (1000000000ULL)
   65.32 -#endif
   65.33 +#define ticks_per_sec(v)      (v->domain->arch.hvm_domain.tsc_frequency)
   65.34 +static int handle_pit_io(ioreq_t *p);
   65.35 +static int handle_speaker_io(ioreq_t *p);
   65.36  
   65.37 -#ifndef TIMER_SLOP 
   65.38 -#define TIMER_SLOP (50*1000) /* ns */
   65.39 -#endif
   65.40 +/* compute with 96 bit intermediate result: (a*b)/c */
   65.41 +uint64_t muldiv64(uint64_t a, uint32_t b, uint32_t c)
   65.42 +{
   65.43 +    union {
   65.44 +        uint64_t ll;
   65.45 +        struct {
   65.46 +#ifdef WORDS_BIGENDIAN
   65.47 +            uint32_t high, low;
   65.48 +#else
   65.49 +            uint32_t low, high;
   65.50 +#endif            
   65.51 +        } l;
   65.52 +    } u, res;
   65.53 +    uint64_t rl, rh;
   65.54  
   65.55 -static void pit_irq_timer_update(PITChannelState *s, s64 current_time);
   65.56 +    u.ll = a;
   65.57 +    rl = (uint64_t)u.l.low * (uint64_t)b;
   65.58 +    rh = (uint64_t)u.l.high * (uint64_t)b;
   65.59 +    rh += (rl >> 32);
   65.60 +    res.l.high = rh / c;
   65.61 +    res.l.low = (((rh % c) << 32) + (rl & 0xffffffff)) / c;
   65.62 +    return res.ll;
   65.63 +}
   65.64  
   65.65 -s_time_t hvm_get_clock(void)
   65.66 +/*
   65.67 + * get processor time.
   65.68 + * unit: TSC
   65.69 + */
   65.70 +int64_t hvm_get_clock(struct vcpu *v)
   65.71  {
   65.72 -    /* TODO: add pause/unpause support */
   65.73 -    return NOW();
   65.74 +    uint64_t  gtsc;
   65.75 +    gtsc = hvm_get_guest_time(v);
   65.76 +    return gtsc;
   65.77  }
   65.78  
   65.79  static int pit_get_count(PITChannelState *s)
   65.80  {
   65.81 -    u64 d;
   65.82 -    u64 counter;
   65.83 +    uint64_t d;
   65.84 +    int  counter;
   65.85  
   65.86 -    d = hvm_get_clock() - s->count_load_time;
   65.87 +    d = muldiv64(hvm_get_clock(s->vcpu) - s->count_load_time, PIT_FREQ, ticks_per_sec(s->vcpu));
   65.88      switch(s->mode) {
   65.89      case 0:
   65.90      case 1:
   65.91      case 4:
   65.92      case 5:
   65.93 -        counter = (s->period - d) & 0xffff;
   65.94 +        counter = (s->count - d) & 0xffff;
   65.95          break;
   65.96      case 3:
   65.97          /* XXX: may be incorrect for odd counts */
   65.98 -        counter = s->period - ((2 * d) % s->period);
   65.99 +        counter = s->count - ((2 * d) % s->count);
  65.100          break;
  65.101      default:
  65.102 -        /* mod 2 counter handle */
  65.103 -        d = hvm_get_clock() - s->hvm_time->count_point;
  65.104 -        d += s->hvm_time->count_advance;
  65.105 -        counter = s->period - (d % s->period);
  65.106 +        counter = s->count - (d % s->count);
  65.107          break;
  65.108      }
  65.109 -    /* change from ns to pit counter */
  65.110 -    counter = DIV_ROUND( (counter * PIT_FREQ), NSEC_PER_SEC);
  65.111      return counter;
  65.112  }
  65.113  
  65.114  /* get pit output bit */
  65.115 -static int pit_get_out1(PITChannelState *s, s64 current_time)
  65.116 +static int pit_get_out1(PITChannelState *s, int64_t current_time)
  65.117  {
  65.118 -    u64 d;
  65.119 +    uint64_t d;
  65.120      int out;
  65.121  
  65.122 -    d = current_time - s->count_load_time;
  65.123 +    d = muldiv64(current_time - s->count_load_time, PIT_FREQ, ticks_per_sec(s->vcpu));
  65.124      switch(s->mode) {
  65.125      default:
  65.126      case 0:
  65.127 -        out = (d >= s->period);
  65.128 +        out = (d >= s->count);
  65.129          break;
  65.130      case 1:
  65.131 -        out = (d < s->period);
  65.132 +        out = (d < s->count);
  65.133          break;
  65.134      case 2:
  65.135 -        /* mod2 out is no meaning, since intr are generated in background */
  65.136 -        if ((d % s->period) == 0 && d != 0)
  65.137 +        if ((d % s->count) == 0 && d != 0)
  65.138              out = 1;
  65.139          else
  65.140              out = 0;
  65.141          break;
  65.142      case 3:
  65.143 -        out = (d % s->period) < ((s->period + 1) >> 1);
  65.144 +        out = (d % s->count) < ((s->count + 1) >> 1);
  65.145          break;
  65.146      case 4:
  65.147      case 5:
  65.148 -        out = (d == s->period);
  65.149 +        out = (d == s->count);
  65.150          break;
  65.151      }
  65.152      return out;
  65.153  }
  65.154  
  65.155 -int pit_get_out(hvm_virpit *pit, int channel, s64 current_time)
  65.156 +int pit_get_out(PITState *pit, int channel, int64_t current_time)
  65.157  {
  65.158      PITChannelState *s = &pit->channels[channel];
  65.159      return pit_get_out1(s, current_time);
  65.160  }
  65.161  
  65.162 -static __inline__ s64 missed_ticks(PITChannelState *s, s64 current_time)
  65.163 -{
  65.164 -    struct hvm_time_info *hvm_time = s->hvm_time;
  65.165 -    struct domain *d = (void *) s - 
  65.166 -        offsetof(struct domain, arch.hvm_domain.vpit.channels[0]);
  65.167 -
  65.168 -    /* ticks from current time(expected time) to NOW */ 
  65.169 -    int missed_ticks;
  65.170 -    /* current_time is expected time for next intr, check if it's true
  65.171 -     * (actimer has a TIMER_SLOP in advance)
  65.172 -     */
  65.173 -    s64 missed_time = hvm_get_clock() + TIMER_SLOP - current_time;
  65.174 -
  65.175 -    if (missed_time >= 0) {
  65.176 -        missed_ticks = missed_time/(s_time_t)s->period + 1;
  65.177 -        if (test_bit(_DOMF_debugging, &d->domain_flags)) {
  65.178 -            hvm_time->pending_intr_nr++;
  65.179 -        } else {
  65.180 -            hvm_time->pending_intr_nr += missed_ticks;
  65.181 -        }
  65.182 -        s->next_transition_time = current_time + (missed_ticks ) * s->period;
  65.183 -    }
  65.184 -
  65.185 -    return s->next_transition_time;
  65.186 -}
  65.187 -
  65.188 -/* only rearm the actimer when return value > 0
  65.189 - *  -2: init state
  65.190 - *  -1: the mode has expired
  65.191 - *   0: current VCPU is not running
  65.192 - *  >0: the next fired time
  65.193 - */
  65.194 -s64 pit_get_next_transition_time(PITChannelState *s, 
  65.195 -                                            s64 current_time)
  65.196 -{
  65.197 -    s64 d, next_time, base;
  65.198 -    int period2;
  65.199 -    struct hvm_time_info *hvm_time = s->hvm_time;
  65.200 -
  65.201 -    d = current_time - s->count_load_time;
  65.202 -    switch(s->mode) {
  65.203 -    default:
  65.204 -    case 0:
  65.205 -    case 1:
  65.206 -        if (d < s->period)
  65.207 -            next_time = s->period;
  65.208 -        else
  65.209 -            return -1;
  65.210 -        break;
  65.211 -    case 2:
  65.212 -        next_time = missed_ticks(s, current_time);
  65.213 -        if ( !test_bit(_VCPUF_running, &(hvm_time->vcpu->vcpu_flags)) )
  65.214 -            return 0;
  65.215 -        break;
  65.216 -    case 3:
  65.217 -        base = (d / s->period) * s->period;
  65.218 -        period2 = ((s->period + 1) >> 1);
  65.219 -        if ((d - base) < period2) 
  65.220 -            next_time = base + period2;
  65.221 -        else
  65.222 -            next_time = base + s->period;
  65.223 -        break;
  65.224 -    case 4:
  65.225 -    case 5:
  65.226 -        if (d < s->period)
  65.227 -            next_time = s->period;
  65.228 -        else if (d == s->period)
  65.229 -            next_time = s->period + 1;
  65.230 -        else
  65.231 -            return -1;
  65.232 -        break;
  65.233 -    case 0xff:
  65.234 -        return -2;      /* for init state */ 
  65.235 -        break;
  65.236 -    }
  65.237 -    /* XXX: better solution: use a clock at PIT_FREQ Hz */
  65.238 -    if (next_time <= current_time){
  65.239 -#ifdef DEBUG_PIT
  65.240 -        printk("HVM_PIT:next_time <= current_time. next=0x%llx, current=0x%llx!\n",next_time, current_time);
  65.241 -#endif
  65.242 -        next_time = current_time + 1;
  65.243 -    }
  65.244 -    return next_time;
  65.245 -}
  65.246 -
  65.247  /* val must be 0 or 1 */
  65.248 -void pit_set_gate(hvm_virpit *pit, int channel, int val)
  65.249 +void pit_set_gate(PITState *pit, int channel, int val)
  65.250  {
  65.251      PITChannelState *s = &pit->channels[channel];
  65.252  
  65.253 @@ -233,16 +165,16 @@ void pit_set_gate(hvm_virpit *pit, int c
  65.254      case 5:
  65.255          if (s->gate < val) {
  65.256              /* restart counting on rising edge */
  65.257 -            s->count_load_time = hvm_get_clock();
  65.258 -            pit_irq_timer_update(s, s->count_load_time);
  65.259 +            s->count_load_time = hvm_get_clock(s->vcpu);
  65.260 +//            pit_irq_timer_update(s, s->count_load_time);
  65.261          }
  65.262          break;
  65.263      case 2:
  65.264      case 3:
  65.265          if (s->gate < val) {
  65.266              /* restart counting on rising edge */
  65.267 -            s->count_load_time = hvm_get_clock();
  65.268 -            pit_irq_timer_update(s, s->count_load_time);
  65.269 +            s->count_load_time = hvm_get_clock(s->vcpu);
  65.270 +//            pit_irq_timer_update(s, s->count_load_time);
  65.271          }
  65.272          /* XXX: disable/enable counting */
  65.273          break;
  65.274 @@ -250,7 +182,7 @@ void pit_set_gate(hvm_virpit *pit, int c
  65.275      s->gate = val;
  65.276  }
  65.277  
  65.278 -int pit_get_gate(hvm_virpit *pit, int channel)
  65.279 +int pit_get_gate(PITState *pit, int channel)
  65.280  {
  65.281      PITChannelState *s = &pit->channels[channel];
  65.282      return s->gate;
  65.283 @@ -258,37 +190,37 @@ int pit_get_gate(hvm_virpit *pit, int ch
  65.284  
  65.285  static inline void pit_load_count(PITChannelState *s, int val)
  65.286  {
  65.287 +    u32   period;
  65.288      if (val == 0)
  65.289          val = 0x10000;
  65.290 -
  65.291 -    s->count_load_time = hvm_get_clock();
  65.292 +    s->count_load_time = hvm_get_clock(s->vcpu);
  65.293      s->count = val;
  65.294 -    s->period = DIV_ROUND(((s->count) * NSEC_PER_SEC), PIT_FREQ);
  65.295 +    period = DIV_ROUND((val * 1000000000ULL), PIT_FREQ);
  65.296  
  65.297  #ifdef DEBUG_PIT
  65.298 -    printk("HVM_PIT: pit-load-counter, count=0x%x,period=0x%u us,mode=%d, load_time=%lld\n",
  65.299 +    printk("HVM_PIT: pit-load-counter(%p), count=0x%x, period=%uns mode=%d, load_time=%lld\n",
  65.300 +            s,
  65.301              val,
  65.302 -            s->period / 1000,
  65.303 +            period,
  65.304              s->mode,
  65.305 -            s->count_load_time);
  65.306 +            (long long)s->count_load_time);
  65.307  #endif
  65.308  
  65.309 -    if (s->mode == HVM_PIT_ACCEL_MODE) {
  65.310 -        if (!s->hvm_time) {
  65.311 -            printk("HVM_PIT:guest should only set mod 2 on channel 0!\n");
  65.312 -            return;
  65.313 -        }
  65.314 -        s->hvm_time->period_cycles = (u64)s->period * cpu_khz / 1000000L;
  65.315 -        s->hvm_time->first_injected = 0;
  65.316 -
  65.317 -        if (s->period < 900000) { /* < 0.9 ms */
  65.318 -            printk("HVM_PIT: guest programmed too small an count: %x\n",
  65.319 -                    s->count);
  65.320 -            s->period = 1000000;
  65.321 -        }
  65.322 +    switch (s->mode) {
  65.323 +        case 2:
  65.324 +            /* create periodic time */
  65.325 +            s->pt = create_periodic_time (s->vcpu, period, 0, 0);
  65.326 +            break;
  65.327 +        case 1:
  65.328 +            /* create one shot time */
  65.329 +            s->pt = create_periodic_time (s->vcpu, period, 0, 1);
  65.330 +#ifdef DEBUG_PIT
  65.331 +            printk("HVM_PIT: create one shot time.\n");
  65.332 +#endif
  65.333 +            break;
  65.334 +        default:
  65.335 +            break;
  65.336      }
  65.337 -        
  65.338 -    pit_irq_timer_update(s, s->count_load_time);
  65.339  }
  65.340  
  65.341  /* if already latched, do not latch again */
  65.342 @@ -300,9 +232,9 @@ static void pit_latch_count(PITChannelSt
  65.343      }
  65.344  }
  65.345  
  65.346 -static void pit_ioport_write(void *opaque, u32 addr, u32 val)
  65.347 +static void pit_ioport_write(void *opaque, uint32_t addr, uint32_t val)
  65.348  {
  65.349 -    hvm_virpit *pit = opaque;
  65.350 +    PITState *pit = opaque;
  65.351      int channel, access;
  65.352      PITChannelState *s;
  65.353      val &= 0xff;
  65.354 @@ -321,7 +253,7 @@ static void pit_ioport_write(void *opaqu
  65.355                      if (!(val & 0x10) && !s->status_latched) {
  65.356                          /* status latch */
  65.357                          /* XXX: add BCD and null count */
  65.358 -                        s->status =  (pit_get_out1(s, hvm_get_clock()) << 7) |
  65.359 +                        s->status =  (pit_get_out1(s, hvm_get_clock(s->vcpu)) << 7) |
  65.360                              (s->rw_mode << 4) |
  65.361                              (s->mode << 1) |
  65.362                              s->bcd;
  65.363 @@ -366,9 +298,9 @@ static void pit_ioport_write(void *opaqu
  65.364      }
  65.365  }
  65.366  
  65.367 -static u32 pit_ioport_read(void *opaque, u32 addr)
  65.368 +static uint32_t pit_ioport_read(void *opaque, uint32_t addr)
  65.369  {
  65.370 -    hvm_virpit *pit = opaque;
  65.371 +    PITState *pit = opaque;
  65.372      int ret, count;
  65.373      PITChannelState *s;
  65.374      
  65.375 @@ -419,84 +351,51 @@ static u32 pit_ioport_read(void *opaque,
  65.376      return ret;
  65.377  }
  65.378  
  65.379 -static void pit_irq_timer_update(PITChannelState *s, s64 current_time)
  65.380 -{
  65.381 -    s64 expire_time;
  65.382 -    int irq_level;
  65.383 -    struct vcpu *v = current;
  65.384 -    struct hvm_virpic *pic= &v->domain->arch.hvm_domain.vpic;
  65.385 -
  65.386 -    if (!s->hvm_time || s->mode == 0xff)
  65.387 -        return;
  65.388 -
  65.389 -    expire_time = pit_get_next_transition_time(s, current_time);
  65.390 -    /* not generate intr by direct pic_set_irq in mod 2
  65.391 -     * XXX:mod 3 should be same as mod 2
  65.392 -     */
  65.393 -    if (s->mode != HVM_PIT_ACCEL_MODE) {
  65.394 -        irq_level = pit_get_out1(s, current_time);
  65.395 -        pic_set_irq(pic, s->irq, irq_level);
  65.396 -        s->next_transition_time = expire_time;
  65.397 -#ifdef DEBUG_PIT
  65.398 -        printk("HVM_PIT:irq_level=%d next_delay=%l ns\n",
  65.399 -                irq_level, 
  65.400 -                (expire_time - current_time));
  65.401 -#endif
  65.402 -    }
  65.403 -
  65.404 -    if (expire_time > 0)
  65.405 -        set_timer(&(s->hvm_time->pit_timer), s->next_transition_time);
  65.406 -
  65.407 -}
  65.408 -
  65.409 -static void pit_irq_timer(void *data)
  65.410 -{
  65.411 -    PITChannelState *s = data;
  65.412 -
  65.413 -    pit_irq_timer_update(s, s->next_transition_time);
  65.414 -}
  65.415 -
  65.416  static void pit_reset(void *opaque)
  65.417  {
  65.418 -    hvm_virpit *pit = opaque;
  65.419 +    PITState *pit = opaque;
  65.420      PITChannelState *s;
  65.421      int i;
  65.422  
  65.423      for(i = 0;i < 3; i++) {
  65.424          s = &pit->channels[i];
  65.425 +        if ( s -> pt ) {
  65.426 +            destroy_periodic_time (s->pt);
  65.427 +            s->pt = NULL;
  65.428 +        }
  65.429          s->mode = 0xff; /* the init mode */
  65.430          s->gate = (i != 2);
  65.431          pit_load_count(s, 0);
  65.432      }
  65.433  }
  65.434  
  65.435 -/* hvm_io_assist light-weight version, specific to PIT DM */ 
  65.436 -static void resume_pit_io(ioreq_t *p)
  65.437 +void pit_init(struct vcpu *v, unsigned long cpu_khz)
  65.438  {
  65.439 -    struct cpu_user_regs *regs = guest_cpu_user_regs();
  65.440 -    unsigned long old_eax = regs->eax;
  65.441 -    p->state = STATE_INVALID;
  65.442 +    PITState *pit = &v->domain->arch.hvm_domain.pl_time.vpit;
  65.443 +    PITChannelState *s;
  65.444  
  65.445 -    switch(p->size) {
  65.446 -    case 1:
  65.447 -        regs->eax = (old_eax & 0xffffff00) | (p->u.data & 0xff);
  65.448 -        break;
  65.449 -    case 2:
  65.450 -        regs->eax = (old_eax & 0xffff0000) | (p->u.data & 0xffff);
  65.451 -        break;
  65.452 -    case 4:
  65.453 -        regs->eax = (p->u.data & 0xffffffff);
  65.454 -        break;
  65.455 -    default:
  65.456 -        BUG();
  65.457 -    }
  65.458 +    s = &pit->channels[0];
  65.459 +    /* the timer 0 is connected to an IRQ */
  65.460 +    s->vcpu = v;
  65.461 +    s++; s->vcpu = v;
  65.462 +    s++; s->vcpu = v;
  65.463 +
  65.464 +    register_portio_handler(PIT_BASE, 4, handle_pit_io);
  65.465 +    /* register the speaker port */
  65.466 +    register_portio_handler(0x61, 1, handle_speaker_io);
  65.467 +    ticks_per_sec(v) = cpu_khz * (int64_t)1000; 
  65.468 +#ifdef DEBUG_PIT
  65.469 +    printk("HVM_PIT: guest frequency =%lld\n", (long long)ticks_per_sec(v));
  65.470 +#endif
  65.471 +    pit_reset(pit);
  65.472 +    return;
  65.473  }
  65.474  
  65.475  /* the intercept action for PIT DM retval:0--not handled; 1--handled */  
  65.476 -int handle_pit_io(ioreq_t *p)
  65.477 +static int handle_pit_io(ioreq_t *p)
  65.478  {
  65.479      struct vcpu *v = current;
  65.480 -    struct hvm_virpit *vpit = &(v->domain->arch.hvm_domain.vpit);
  65.481 +    struct PITState *vpit = &(v->domain->arch.hvm_domain.pl_time.vpit);
  65.482  
  65.483      if (p->size != 1 ||
  65.484          p->pdata_valid ||
  65.485 @@ -508,18 +407,18 @@ int handle_pit_io(ioreq_t *p)
  65.486      if (p->dir == 0) {/* write */
  65.487          pit_ioport_write(vpit, p->addr, p->u.data);
  65.488      } else if (p->dir == 1) { /* read */
  65.489 -        p->u.data = pit_ioport_read(vpit, p->addr);
  65.490 -        resume_pit_io(p);
  65.491 +        if ( (p->addr & 3) != 3 ) {
  65.492 +            p->u.data = pit_ioport_read(vpit, p->addr);
  65.493 +        } else {
  65.494 +            printk("HVM_PIT: read A1:A0=3!\n");
  65.495 +        }
  65.496      }
  65.497 -
  65.498 -    /* always return 1, since PIT sit in HV now */
  65.499      return 1;
  65.500  }
  65.501  
  65.502  static void speaker_ioport_write(void *opaque, uint32_t addr, uint32_t val)
  65.503  {
  65.504 -    hvm_virpit *pit = opaque;
  65.505 -    val &= 0xff;
  65.506 +    PITState *pit = opaque;
  65.507      pit->speaker_data_on = (val >> 1) & 1;
  65.508      pit_set_gate(pit, 2, val & 1);
  65.509  }
  65.510 @@ -527,18 +426,18 @@ static void speaker_ioport_write(void *o
  65.511  static uint32_t speaker_ioport_read(void *opaque, uint32_t addr)
  65.512  {
  65.513      int out;
  65.514 -    hvm_virpit *pit = opaque;
  65.515 -    out = pit_get_out(pit, 2, hvm_get_clock());
  65.516 +    PITState *pit = opaque;
  65.517 +    out = pit_get_out(pit, 2, hvm_get_clock(pit->channels[2].vcpu));
  65.518      pit->dummy_refresh_clock ^= 1;
  65.519  
  65.520      return (pit->speaker_data_on << 1) | pit_get_gate(pit, 2) | (out << 5) |
  65.521        (pit->dummy_refresh_clock << 4);
  65.522  }
  65.523  
  65.524 -int handle_speaker_io(ioreq_t *p)
  65.525 +static int handle_speaker_io(ioreq_t *p)
  65.526  {
  65.527      struct vcpu *v = current;
  65.528 -    struct hvm_virpit *vpit = &(v->domain->arch.hvm_domain.vpit);
  65.529 +    struct PITState *vpit = &(v->domain->arch.hvm_domain.pl_time.vpit);
  65.530  
  65.531      if (p->size != 1 ||
  65.532          p->pdata_valid ||
  65.533 @@ -551,45 +450,7 @@ int handle_speaker_io(ioreq_t *p)
  65.534          speaker_ioport_write(vpit, p->addr, p->u.data);
  65.535      } else if (p->dir == 1) {/* read */
  65.536          p->u.data = speaker_ioport_read(vpit, p->addr);
  65.537 -        resume_pit_io(p);
  65.538      }
  65.539  
  65.540      return 1;
  65.541  }
  65.542 -
  65.543 -/* pick up missed timer ticks at deactive time */
  65.544 -void pickup_deactive_ticks(struct hvm_virpit *vpit)
  65.545 -{
  65.546 -    s64 next_time;
  65.547 -    PITChannelState *s = &(vpit->channels[0]);
  65.548 -    if ( !active_timer(&(vpit->time_info.pit_timer)) ) {
  65.549 -        next_time = pit_get_next_transition_time(s, s->next_transition_time); 
  65.550 -        if (next_time >= 0)
  65.551 -            set_timer(&(s->hvm_time->pit_timer), s->next_transition_time);
  65.552 -    }
  65.553 -}
  65.554 -
  65.555 -void pit_init(struct hvm_virpit *pit, struct vcpu *v)
  65.556 -{
  65.557 -    PITChannelState *s;
  65.558 -    struct hvm_time_info *hvm_time;
  65.559 -
  65.560 -    s = &pit->channels[0];
  65.561 -    /* the timer 0 is connected to an IRQ */
  65.562 -    s->irq = 0;
  65.563 -    /* channel 0 need access the related time info for intr injection */
  65.564 -    hvm_time = s->hvm_time = &pit->time_info;
  65.565 -    hvm_time->vcpu = v;
  65.566 -
  65.567 -    init_timer(&(hvm_time->pit_timer), pit_irq_timer, s, v->processor);
  65.568 -
  65.569 -    register_portio_handler(PIT_BASE, 4, handle_pit_io);
  65.570 -
  65.571 -    /* register the speaker port */
  65.572 -    register_portio_handler(0x61, 1, handle_speaker_io);
  65.573 -
  65.574 -    pit_reset(pit);
  65.575 -
  65.576 -    return;
  65.577 -
  65.578 -}
    66.1 --- a/xen/arch/x86/hvm/intercept.c	Thu May 25 15:59:18 2006 -0600
    66.2 +++ b/xen/arch/x86/hvm/intercept.c	Fri May 26 13:41:49 2006 -0600
    66.3 @@ -214,6 +214,88 @@ void hlt_timer_fn(void *data)
    66.4      evtchn_set_pending(v, iopacket_port(v));
    66.5  }
    66.6  
    66.7 +static __inline__ void missed_ticks(struct periodic_time *pt)
    66.8 +{
    66.9 +    int missed_ticks;
   66.10 +
   66.11 +    missed_ticks = (NOW() - pt->scheduled)/(s_time_t) pt->period;
   66.12 +    if ( missed_ticks++ >= 0 ) {
   66.13 +        if ( missed_ticks > 1000 ) {
   66.14 +            /* TODO: Adjust guest time togther */
   66.15 +            pt->pending_intr_nr ++;
   66.16 +        }
   66.17 +        else {
   66.18 +            pt->pending_intr_nr += missed_ticks;
   66.19 +        }
   66.20 +        pt->scheduled += missed_ticks * pt->period;
   66.21 +    }
   66.22 +}
   66.23 +
   66.24 +/* hook function for the platform periodic time */
   66.25 +void pt_timer_fn(void *data)
   66.26 +{
   66.27 +    struct vcpu *v = data;
   66.28 +    struct periodic_time *pt = &(v->domain->arch.hvm_domain.pl_time.periodic_tm);
   66.29 +
   66.30 +    /* pick up missed timer tick */
   66.31 +    missed_ticks(pt);
   66.32 +    if ( test_bit(_VCPUF_running, &v->vcpu_flags) ) {
   66.33 +        set_timer(&pt->timer, pt->scheduled);
   66.34 +    }
   66.35 +}
   66.36 +
   66.37 +/* pick up missed timer ticks at deactive time */
   66.38 +void pickup_deactive_ticks(struct periodic_time *pt)
   66.39 +{
   66.40 +    if ( !active_timer(&(pt->timer)) ) {
   66.41 +        missed_ticks(pt);
   66.42 +        set_timer(&pt->timer, pt->scheduled);
   66.43 +    }
   66.44 +}
   66.45 +
   66.46 +/*
   66.47 + * period: fire frequency in ns.
   66.48 + */
   66.49 +struct periodic_time * create_periodic_time(
   66.50 +        struct vcpu *v, 
   66.51 +        u32 period, 
   66.52 +        char irq,
   66.53 +        char one_shot)
   66.54 +{
   66.55 +    struct periodic_time *pt = &(v->domain->arch.hvm_domain.pl_time.periodic_tm);
   66.56 +    if ( pt->enabled ) {
   66.57 +        if ( v->vcpu_id != 0 ) {
   66.58 +            printk("HVM_PIT: start 2nd periodic time on non BSP!\n");
   66.59 +        }
   66.60 +        stop_timer (&pt->timer);
   66.61 +        pt->enabled = 0;
   66.62 +    }
   66.63 +    pt->pending_intr_nr = 0;
   66.64 +    pt->first_injected = 0;
   66.65 +    if (period < 900000) { /* < 0.9 ms */
   66.66 +        printk("HVM_PlatformTime: program too small period %u\n",period);
   66.67 +        period = 900000;   /* force to 0.9ms */
   66.68 +    }
   66.69 +    pt->period = period;
   66.70 +    pt->irq = irq;
   66.71 +    pt->period_cycles = (u64)period * cpu_khz / 1000000L;
   66.72 +    pt->one_shot = one_shot;
   66.73 +    if ( one_shot ) {
   66.74 +        printk("HVM_PL: No support for one shot platform time yet\n");
   66.75 +    }
   66.76 +    pt->scheduled = NOW() + period;
   66.77 +    set_timer (&pt->timer,pt->scheduled);
   66.78 +    pt->enabled = 1;
   66.79 +    return pt;
   66.80 +}
   66.81 +
   66.82 +void destroy_periodic_time(struct periodic_time *pt)
   66.83 +{
   66.84 +    if ( pt->enabled ) {
   66.85 +        stop_timer(&pt->timer);
   66.86 +        pt->enabled = 0;
   66.87 +    }
   66.88 +}
   66.89  
   66.90  /*
   66.91   * Local variables:
    67.1 --- a/xen/arch/x86/hvm/svm/intr.c	Thu May 25 15:59:18 2006 -0600
    67.2 +++ b/xen/arch/x86/hvm/svm/intr.c	Fri May 26 13:41:49 2006 -0600
    67.3 @@ -44,45 +44,33 @@
    67.4   */
    67.5  #define BSP_CPU(v)    (!(v->vcpu_id))
    67.6  
    67.7 -u64 svm_get_guest_time(struct vcpu *v)
    67.8 -{
    67.9 -    struct hvm_time_info *time_info = &(v->domain->arch.hvm_domain.vpit.time_info);
   67.10 -    u64    host_tsc;
   67.11 -    
   67.12 -    rdtscll(host_tsc);
   67.13 -    return host_tsc + time_info->cache_tsc_offset;
   67.14 -}
   67.15 -
   67.16  void svm_set_guest_time(struct vcpu *v, u64 gtime)
   67.17  {
   67.18 -    struct hvm_time_info *time_info = &(v->domain->arch.hvm_domain.vpit.time_info);
   67.19      u64    host_tsc;
   67.20     
   67.21      rdtscll(host_tsc);
   67.22      
   67.23 -    time_info->cache_tsc_offset = gtime - host_tsc;
   67.24 -    v->arch.hvm_svm.vmcb->tsc_offset = time_info->cache_tsc_offset;
   67.25 +    v->arch.hvm_vcpu.cache_tsc_offset = gtime - host_tsc;
   67.26 +    v->arch.hvm_svm.vmcb->tsc_offset = v->arch.hvm_vcpu.cache_tsc_offset;
   67.27  }
   67.28  
   67.29  static inline void
   67.30  interrupt_post_injection(struct vcpu * v, int vector, int type)
   67.31  {
   67.32 -    struct hvm_virpit *vpit = &(v->domain->arch.hvm_domain.vpit);
   67.33 -    struct hvm_time_info *time_info = &vpit->time_info;
   67.34 +    struct  periodic_time *pt = &(v->domain->arch.hvm_domain.pl_time.periodic_tm);
   67.35  
   67.36      if ( is_pit_irq(v, vector, type) ) {
   67.37 -        if ( !time_info->first_injected ) {
   67.38 -            time_info->pending_intr_nr = 0;
   67.39 -            time_info->last_pit_gtime = svm_get_guest_time(v);
   67.40 -            time_info->first_injected = 1;
   67.41 +        if ( !pt->first_injected ) {
   67.42 +            pt->pending_intr_nr = 0;
   67.43 +            pt->last_plt_gtime = hvm_get_guest_time(v);
   67.44 +            pt->scheduled = NOW() + pt->period;
   67.45 +            set_timer(&pt->timer, pt->scheduled);
   67.46 +            pt->first_injected = 1;
   67.47          } else {
   67.48 -            time_info->pending_intr_nr--;
   67.49 +            pt->pending_intr_nr--;
   67.50 +            pt->last_plt_gtime += pt->period_cycles;
   67.51 +            svm_set_guest_time(v, pt->last_plt_gtime);
   67.52          }
   67.53 -        time_info->count_advance = 0;
   67.54 -        time_info->count_point = NOW();
   67.55 -
   67.56 -        time_info->last_pit_gtime += time_info->period_cycles;
   67.57 -        svm_set_guest_time(v, time_info->last_pit_gtime);
   67.58      }
   67.59  
   67.60      switch(type)
   67.61 @@ -121,8 +109,7 @@ asmlinkage void svm_intr_assist(void)
   67.62      struct vcpu *v = current;
   67.63      struct vmcb_struct *vmcb = v->arch.hvm_svm.vmcb;
   67.64      struct hvm_domain *plat=&v->domain->arch.hvm_domain; 
   67.65 -    struct hvm_virpit *vpit = &plat->vpit;
   67.66 -    struct hvm_time_info *time_info = &vpit->time_info;
   67.67 +    struct periodic_time *pt = &plat->pl_time.periodic_tm;
   67.68      struct hvm_virpic *pic= &plat->vpic;
   67.69      int intr_type = VLAPIC_DELIV_MODE_EXT;
   67.70      int intr_vector = -1;
   67.71 @@ -174,9 +161,9 @@ asmlinkage void svm_intr_assist(void)
   67.72        if ( cpu_has_pending_irq(v) ) {
   67.73             intr_vector = cpu_get_interrupt(v, &intr_type);
   67.74        }
   67.75 -      else  if ( (v->vcpu_id == 0) && time_info->pending_intr_nr ) {
   67.76 -          pic_set_irq(pic, 0, 0);
   67.77 -          pic_set_irq(pic, 0, 1);
   67.78 +      else  if ( (v->vcpu_id == 0) && pt->enabled && pt->pending_intr_nr ) {
   67.79 +          pic_set_irq(pic, pt->irq, 0);
   67.80 +          pic_set_irq(pic, pt->irq, 1);
   67.81            intr_vector = cpu_get_interrupt(v, &intr_type);
   67.82        }
   67.83      }
   67.84 @@ -190,7 +177,7 @@ asmlinkage void svm_intr_assist(void)
   67.85              /* Re-injecting a PIT interruptt? */
   67.86              if (re_injecting && 
   67.87                  is_pit_irq(v, intr_vector, intr_type)) {
   67.88 -                    ++time_info->pending_intr_nr;
   67.89 +                    ++pt->pending_intr_nr;
   67.90              }
   67.91              /* let's inject this interrupt */
   67.92              TRACE_3D(TRC_VMX_INT, v->domain->domain_id, intr_vector, 0);
    68.1 --- a/xen/arch/x86/hvm/svm/svm.c	Thu May 25 15:59:18 2006 -0600
    68.2 +++ b/xen/arch/x86/hvm/svm/svm.c	Fri May 26 13:41:49 2006 -0600
    68.3 @@ -51,13 +51,6 @@
    68.4  
    68.5  #define SVM_EXTRA_DEBUG
    68.6  
    68.7 -#ifdef TRACE_BUFFER
    68.8 -static unsigned long trace_values[NR_CPUS][4];
    68.9 -#define TRACE_VMEXIT(index,value) trace_values[current->processor][index]=value
   68.10 -#else
   68.11 -#define TRACE_VMEXIT(index,value) ((void)0)
   68.12 -#endif
   68.13 -
   68.14  /* Useful define */
   68.15  #define MAX_INST_SIZE  15
   68.16  
   68.17 @@ -672,12 +665,11 @@ static void arch_svm_do_launch(struct vc
   68.18  
   68.19  static void svm_freeze_time(struct vcpu *v)
   68.20  {
   68.21 -    struct hvm_time_info *time_info = &v->domain->arch.hvm_domain.vpit.time_info;
   68.22 +    struct periodic_time *pt=&v->domain->arch.hvm_domain.pl_time.periodic_tm;
   68.23      
   68.24 -    if ( time_info->first_injected && !v->domain->arch.hvm_domain.guest_time ) {
   68.25 -        v->domain->arch.hvm_domain.guest_time = svm_get_guest_time(v);
   68.26 -        time_info->count_advance += (NOW() - time_info->count_point);
   68.27 -        stop_timer(&(time_info->pit_timer));
   68.28 +    if ( pt->enabled && pt->first_injected && !v->arch.hvm_vcpu.guest_time ) {
   68.29 +        v->arch.hvm_vcpu.guest_time = hvm_get_guest_time(v);
   68.30 +        stop_timer(&(pt->timer));
   68.31      }
   68.32  }
   68.33  
   68.34 @@ -754,7 +746,7 @@ static void svm_relinquish_guest_resourc
   68.35          }
   68.36      }
   68.37  
   68.38 -    kill_timer(&d->arch.hvm_domain.vpit.time_info.pit_timer);
   68.39 +    kill_timer(&d->arch.hvm_domain.pl_time.periodic_tm.timer);
   68.40  
   68.41      if ( d->arch.hvm_domain.shared_page_va )
   68.42          unmap_domain_page_global(
   68.43 @@ -784,10 +776,12 @@ void arch_svm_do_resume(struct vcpu *v)
   68.44  
   68.45  void svm_migrate_timers(struct vcpu *v)
   68.46  {
   68.47 -    struct hvm_time_info *time_info = &v->domain->arch.hvm_domain.vpit.time_info;
   68.48 -
   68.49 -    migrate_timer(&time_info->pit_timer, v->processor);
   68.50 -    migrate_timer(&v->arch.hvm_svm.hlt_timer, v->processor);
   68.51 +    struct periodic_time *pt = &(v->domain->arch.hvm_domain.pl_time.periodic_tm);
   68.52 +
   68.53 +    if ( pt->enabled ) {
   68.54 +        migrate_timer( &pt->timer, v->processor );
   68.55 +        migrate_timer( &v->arch.hvm_svm.hlt_timer, v->processor );
   68.56 +    }
   68.57      if ( hvm_apic_support(v->domain) && VLAPIC( v ))
   68.58          migrate_timer( &(VLAPIC(v)->vlapic_timer ), v->processor );
   68.59  }
   68.60 @@ -816,7 +810,6 @@ static int svm_do_page_fault(unsigned lo
   68.61              return 1;
   68.62  
   68.63          handle_mmio(va, va);
   68.64 -        TRACE_VMEXIT(2,2);
   68.65          return 1;
   68.66      }
   68.67  
   68.68 @@ -842,7 +835,6 @@ static int svm_do_page_fault(unsigned lo
   68.69              return 1;
   68.70          }
   68.71  
   68.72 -        TRACE_VMEXIT (2,2);
   68.73          handle_mmio(va, gpa);
   68.74  
   68.75          return 1;
   68.76 @@ -855,8 +847,6 @@ static int svm_do_page_fault(unsigned lo
   68.77          set_bit(ARCH_SVM_VMCB_ASSIGN_ASID, &v->arch.hvm_svm.flags);
   68.78      }
   68.79  
   68.80 -    TRACE_VMEXIT (2,result);
   68.81 -
   68.82      return result;
   68.83  }
   68.84  
   68.85 @@ -1901,14 +1891,8 @@ static inline void svm_do_msr_access(str
   68.86          regs->edx = 0;
   68.87          switch (regs->ecx) {
   68.88          case MSR_IA32_TIME_STAMP_COUNTER:
   68.89 -        {
   68.90 -            struct hvm_time_info *time_info;
   68.91 -
   68.92 -            rdtscll(msr_content);
   68.93 -            time_info = &v->domain->arch.hvm_domain.vpit.time_info;
   68.94 -            msr_content += time_info->cache_tsc_offset;
   68.95 +            msr_content = hvm_get_guest_time(v);
   68.96              break;
   68.97 -        }
   68.98          case MSR_IA32_SYSENTER_CS:
   68.99              msr_content = vmcb->sysenter_cs;
  68.100              break;
  68.101 @@ -1975,7 +1959,7 @@ done:
  68.102  static inline void svm_vmexit_do_hlt(struct vmcb_struct *vmcb)
  68.103  {
  68.104      struct vcpu *v = current;
  68.105 -    struct hvm_virpit *vpit = &v->domain->arch.hvm_domain.vpit;
  68.106 +    struct periodic_time *pt=&v->domain->arch.hvm_domain.pl_time.periodic_tm;
  68.107      s_time_t  next_pit = -1, next_wakeup;
  68.108  
  68.109      __update_guest_eip(vmcb, 1);
  68.110 @@ -1985,7 +1969,7 @@ static inline void svm_vmexit_do_hlt(str
  68.111         return; 
  68.112  
  68.113      if ( !v->vcpu_id )
  68.114 -        next_pit = get_pit_scheduled(v, vpit);
  68.115 +        next_pit = get_scheduled(v, pt->irq, pt);
  68.116      next_wakeup = get_apictime_scheduled(v);
  68.117      if ( (next_pit != -1 && next_pit < next_wakeup) || next_wakeup == -1 )
  68.118          next_wakeup = next_pit;
    69.1 --- a/xen/arch/x86/hvm/svm/vmcb.c	Thu May 25 15:59:18 2006 -0600
    69.2 +++ b/xen/arch/x86/hvm/svm/vmcb.c	Fri May 26 13:41:49 2006 -0600
    69.3 @@ -442,19 +442,17 @@ void set_hsa_to_guest( struct arch_svm_s
    69.4  void svm_do_resume(struct vcpu *v) 
    69.5  {
    69.6      struct domain *d = v->domain;
    69.7 -    struct hvm_virpit *vpit = &d->arch.hvm_domain.vpit;
    69.8 -    struct hvm_time_info *time_info = &vpit->time_info;
    69.9 +    struct periodic_time *pt = &d->arch.hvm_domain.pl_time.periodic_tm;
   69.10  
   69.11      svm_stts(v);
   69.12  
   69.13      /* pick up the elapsed PIT ticks and re-enable pit_timer */
   69.14 -    if ( time_info->first_injected ) {
   69.15 -        if ( v->domain->arch.hvm_domain.guest_time ) {
   69.16 -            svm_set_guest_time(v, v->domain->arch.hvm_domain.guest_time);
   69.17 -            time_info->count_point = NOW();
   69.18 -            v->domain->arch.hvm_domain.guest_time = 0;
   69.19 +    if ( pt->enabled && pt->first_injected ) {
   69.20 +        if ( v->arch.hvm_vcpu.guest_time ) {
   69.21 +            svm_set_guest_time(v, v->arch.hvm_vcpu.guest_time);
   69.22 +            v->arch.hvm_vcpu.guest_time = 0;
   69.23          }
   69.24 -        pickup_deactive_ticks(vpit);
   69.25 +        pickup_deactive_ticks(pt);
   69.26      }
   69.27  
   69.28      if ( test_bit(iopacket_port(v), &d->shared_info->evtchn_pending[0]) ||
    70.1 --- a/xen/arch/x86/hvm/vmx/io.c	Thu May 25 15:59:18 2006 -0600
    70.2 +++ b/xen/arch/x86/hvm/vmx/io.c	Fri May 26 13:41:49 2006 -0600
    70.3 @@ -49,45 +49,33 @@ void __set_tsc_offset(u64  offset)
    70.4  #endif
    70.5  }
    70.6  
    70.7 -u64 get_guest_time(struct vcpu *v)
    70.8 -{
    70.9 -    struct hvm_time_info *time_info = &(v->domain->arch.hvm_domain.vpit.time_info);
   70.10 -    u64    host_tsc;
   70.11 -    
   70.12 -    rdtscll(host_tsc);
   70.13 -    return host_tsc + time_info->cache_tsc_offset;
   70.14 -}
   70.15 -
   70.16  void set_guest_time(struct vcpu *v, u64 gtime)
   70.17  {
   70.18 -    struct hvm_time_info *time_info = &(v->domain->arch.hvm_domain.vpit.time_info);
   70.19      u64    host_tsc;
   70.20     
   70.21      rdtscll(host_tsc);
   70.22      
   70.23 -    time_info->cache_tsc_offset = gtime - host_tsc;
   70.24 -    __set_tsc_offset(time_info->cache_tsc_offset);
   70.25 +    v->arch.hvm_vcpu.cache_tsc_offset = gtime - host_tsc;
   70.26 +    __set_tsc_offset(v->arch.hvm_vcpu.cache_tsc_offset);
   70.27  }
   70.28  
   70.29  static inline void
   70.30  interrupt_post_injection(struct vcpu * v, int vector, int type)
   70.31  {
   70.32 -    struct hvm_virpit *vpit = &(v->domain->arch.hvm_domain.vpit);
   70.33 -    struct hvm_time_info *time_info = &vpit->time_info;
   70.34 +    struct periodic_time *pt = &(v->domain->arch.hvm_domain.pl_time.periodic_tm);
   70.35  
   70.36      if ( is_pit_irq(v, vector, type) ) {
   70.37 -        if ( !time_info->first_injected ) {
   70.38 -            time_info->pending_intr_nr = 0;
   70.39 -            time_info->last_pit_gtime = get_guest_time(v);
   70.40 -            time_info->first_injected = 1;
   70.41 +        if ( !pt->first_injected ) {
   70.42 +            pt->pending_intr_nr = 0;
   70.43 +            pt->last_plt_gtime = hvm_get_guest_time(v);
   70.44 +            pt->scheduled = NOW() + pt->period;
   70.45 +            set_timer(&pt->timer, pt->scheduled);
   70.46 +            pt->first_injected = 1;
   70.47          } else {
   70.48 -            time_info->pending_intr_nr--;
   70.49 +            pt->pending_intr_nr--;
   70.50 +            pt->last_plt_gtime += pt->period_cycles;
   70.51 +            set_guest_time(v, pt->last_plt_gtime);
   70.52          }
   70.53 -        time_info->count_advance = 0;
   70.54 -        time_info->count_point = NOW();
   70.55 -
   70.56 -        time_info->last_pit_gtime += time_info->period_cycles;
   70.57 -        set_guest_time(v, time_info->last_pit_gtime);
   70.58      }
   70.59  
   70.60      switch(type)
   70.61 @@ -151,7 +139,7 @@ asmlinkage void vmx_intr_assist(void)
   70.62      unsigned long eflags;
   70.63      struct vcpu *v = current;
   70.64      struct hvm_domain *plat=&v->domain->arch.hvm_domain;
   70.65 -    struct hvm_time_info *time_info = &plat->vpit.time_info;
   70.66 +    struct periodic_time *pt = &plat->pl_time.periodic_tm;
   70.67      struct hvm_virpic *pic= &plat->vpic;
   70.68      unsigned int idtv_info_field;
   70.69      unsigned long inst_len;
   70.70 @@ -160,9 +148,9 @@ asmlinkage void vmx_intr_assist(void)
   70.71      if ( v->vcpu_id == 0 )
   70.72          hvm_pic_assist(v);
   70.73  
   70.74 -    if ( (v->vcpu_id == 0) && time_info->pending_intr_nr ) {
   70.75 -        pic_set_irq(pic, 0, 0);
   70.76 -        pic_set_irq(pic, 0, 1);
   70.77 +    if ( (v->vcpu_id == 0) && pt->enabled && pt->pending_intr_nr ) {
   70.78 +        pic_set_irq(pic, pt->irq, 0);
   70.79 +        pic_set_irq(pic, pt->irq, 1);
   70.80      }
   70.81  
   70.82      has_ext_irq = cpu_has_pending_irq(v);
   70.83 @@ -232,19 +220,17 @@ asmlinkage void vmx_intr_assist(void)
   70.84  void vmx_do_resume(struct vcpu *v)
   70.85  {
   70.86      struct domain *d = v->domain;
   70.87 -    struct hvm_virpit *vpit = &v->domain->arch.hvm_domain.vpit;
   70.88 -    struct hvm_time_info *time_info = &vpit->time_info;
   70.89 +    struct periodic_time *pt = &v->domain->arch.hvm_domain.pl_time.periodic_tm;
   70.90  
   70.91      vmx_stts();
   70.92  
   70.93      /* pick up the elapsed PIT ticks and re-enable pit_timer */
   70.94 -    if ( time_info->first_injected ) {
   70.95 -        if ( v->domain->arch.hvm_domain.guest_time ) {
   70.96 -            time_info->count_point = NOW();
   70.97 -            set_guest_time(v, v->domain->arch.hvm_domain.guest_time);
   70.98 -            v->domain->arch.hvm_domain.guest_time = 0;
   70.99 +    if ( pt->enabled && pt->first_injected ) {
  70.100 +        if ( v->arch.hvm_vcpu.guest_time ) {
  70.101 +            set_guest_time(v, v->arch.hvm_vcpu.guest_time);
  70.102 +            v->arch.hvm_vcpu.guest_time = 0;
  70.103          }
  70.104 -        pickup_deactive_ticks(vpit);
  70.105 +        pickup_deactive_ticks(pt);
  70.106      }
  70.107  
  70.108      if ( test_bit(iopacket_port(v), &d->shared_info->evtchn_pending[0]) ||
    71.1 --- a/xen/arch/x86/hvm/vmx/vmx.c	Thu May 25 15:59:18 2006 -0600
    71.2 +++ b/xen/arch/x86/hvm/vmx/vmx.c	Fri May 26 13:41:49 2006 -0600
    71.3 @@ -47,7 +47,7 @@
    71.4  #include <asm/hvm/vpic.h>
    71.5  #include <asm/hvm/vlapic.h>
    71.6  
    71.7 -static unsigned long trace_values[NR_CPUS][4];
    71.8 +static unsigned long trace_values[NR_CPUS][5];
    71.9  #define TRACE_VMEXIT(index,value) trace_values[smp_processor_id()][index]=value
   71.10  
   71.11  static void vmx_ctxt_switch_from(struct vcpu *v);
   71.12 @@ -102,7 +102,7 @@ static void vmx_relinquish_guest_resourc
   71.13          }
   71.14      }
   71.15  
   71.16 -    kill_timer(&d->arch.hvm_domain.vpit.time_info.pit_timer);
   71.17 +    kill_timer(&d->arch.hvm_domain.pl_time.periodic_tm.timer);
   71.18  
   71.19      if ( d->arch.hvm_domain.shared_page_va )
   71.20          unmap_domain_page_global(
   71.21 @@ -358,12 +358,11 @@ static inline int long_mode_do_msr_write
   71.22  
   71.23  static void vmx_freeze_time(struct vcpu *v)
   71.24  {
   71.25 -    struct hvm_time_info *time_info = &(v->domain->arch.hvm_domain.vpit.time_info);
   71.26 +    struct periodic_time *pt=&v->domain->arch.hvm_domain.pl_time.periodic_tm;
   71.27      
   71.28 -    if ( time_info->first_injected && !v->domain->arch.hvm_domain.guest_time ) {
   71.29 -        v->domain->arch.hvm_domain.guest_time = get_guest_time(v);
   71.30 -        time_info->count_advance += (NOW() - time_info->count_point);
   71.31 -        stop_timer(&(time_info->pit_timer));
   71.32 +    if ( pt->enabled && pt->first_injected && !v->arch.hvm_vcpu.guest_time ) {
   71.33 +        v->arch.hvm_vcpu.guest_time = hvm_get_guest_time(v);
   71.34 +        stop_timer(&(pt->timer));
   71.35      }
   71.36  }
   71.37  
   71.38 @@ -393,10 +392,12 @@ int vmx_initialize_guest_resources(struc
   71.39  
   71.40  void vmx_migrate_timers(struct vcpu *v)
   71.41  {
   71.42 -    struct hvm_time_info *time_info = &v->domain->arch.hvm_domain.vpit.time_info;
   71.43 +    struct periodic_time *pt = &(v->domain->arch.hvm_domain.pl_time.periodic_tm);
   71.44  
   71.45 -    migrate_timer(&time_info->pit_timer, v->processor);
   71.46 -    migrate_timer(&v->arch.hvm_vmx.hlt_timer, v->processor);
   71.47 +    if ( pt->enabled ) {
   71.48 +        migrate_timer(&pt->timer, v->processor);
   71.49 +        migrate_timer(&v->arch.hvm_vmx.hlt_timer, v->processor);
   71.50 +    }
   71.51      if ( hvm_apic_support(v->domain) && VLAPIC(v))
   71.52          migrate_timer(&(VLAPIC(v)->vlapic_timer), v->processor);
   71.53  }
   71.54 @@ -1861,14 +1862,8 @@ static inline void vmx_do_msr_read(struc
   71.55                  (unsigned long)regs->edx);
   71.56      switch (regs->ecx) {
   71.57      case MSR_IA32_TIME_STAMP_COUNTER:
   71.58 -    {
   71.59 -        struct hvm_time_info *time_info;
   71.60 -
   71.61 -        rdtscll(msr_content);
   71.62 -        time_info = &(v->domain->arch.hvm_domain.vpit.time_info);
   71.63 -        msr_content += time_info->cache_tsc_offset;
   71.64 +        msr_content = hvm_get_guest_time(v);
   71.65          break;
   71.66 -    }
   71.67      case MSR_IA32_SYSENTER_CS:
   71.68          __vmread(GUEST_SYSENTER_CS, (u32 *)&msr_content);
   71.69          break;
   71.70 @@ -1941,11 +1936,11 @@ static inline void vmx_do_msr_write(stru
   71.71  void vmx_vmexit_do_hlt(void)
   71.72  {
   71.73      struct vcpu *v=current;
   71.74 -    struct hvm_virpit *vpit = &(v->domain->arch.hvm_domain.vpit);
   71.75 +    struct periodic_time *pt = &(v->domain->arch.hvm_domain.pl_time.periodic_tm);
   71.76      s_time_t   next_pit=-1,next_wakeup;
   71.77  
   71.78      if ( !v->vcpu_id )
   71.79 -        next_pit = get_pit_scheduled(v,vpit);
   71.80 +        next_pit = get_scheduled(v, pt->irq, pt);
   71.81      next_wakeup = get_apictime_scheduled(v);
   71.82      if ( (next_pit != -1 && next_pit < next_wakeup) || next_wakeup == -1 )
   71.83          next_wakeup = next_pit;
    72.1 --- a/xen/arch/x86/mm.c	Thu May 25 15:59:18 2006 -0600
    72.2 +++ b/xen/arch/x86/mm.c	Fri May 26 13:41:49 2006 -0600
    72.3 @@ -260,9 +260,42 @@ void share_xen_page_with_privileged_gues
    72.4      share_xen_page_with_guest(page, dom_xen, readonly);
    72.5  }
    72.6  
    72.7 +static void __write_ptbase(unsigned long mfn)
    72.8 +{
    72.9 +#ifdef CONFIG_X86_PAE
   72.10 +    if ( mfn >= 0x100000 )
   72.11 +    {
   72.12 +        l3_pgentry_t *highmem_l3tab, *lowmem_l3tab;
   72.13 +        struct vcpu *v = current;
   72.14 +        unsigned long flags;
   72.15 +
   72.16 +        /* Protects against re-entry and against __pae_flush_pgd(). */
   72.17 +        local_irq_save(flags);
   72.18 +
   72.19 +        /* Pick an unused low-memory L3 cache slot. */
   72.20 +        v->arch.lowmem_l3tab_inuse ^= 1;
   72.21 +        lowmem_l3tab = v->arch.lowmem_l3tab[v->arch.lowmem_l3tab_inuse];
   72.22 +        v->arch.lowmem_l3tab_high_mfn[v->arch.lowmem_l3tab_inuse] = mfn;
   72.23 +
   72.24 +        /* Map the guest L3 table and copy to the chosen low-memory cache. */
   72.25 +        highmem_l3tab = map_domain_page(mfn);
   72.26 +        memcpy(lowmem_l3tab, highmem_l3tab, sizeof(v->arch.lowmem_l3tab));
   72.27 +        unmap_domain_page(highmem_l3tab);
   72.28 +
   72.29 +        /* Install the low-memory L3 table in CR3. */
   72.30 +        write_cr3(__pa(lowmem_l3tab));
   72.31 +
   72.32 +        local_irq_restore(flags);
   72.33 +        return;
   72.34 +    }
   72.35 +#endif
   72.36 +
   72.37 +    write_cr3(mfn << PAGE_SHIFT);
   72.38 +}
   72.39 +
   72.40  void write_ptbase(struct vcpu *v)
   72.41  {
   72.42 -    write_cr3(pagetable_get_paddr(v->arch.monitor_table));
   72.43 +    __write_ptbase(pagetable_get_pfn(v->arch.monitor_table));
   72.44  }
   72.45  
   72.46  void invalidate_shadow_ldt(struct vcpu *v)
   72.47 @@ -401,6 +434,7 @@ static int get_page_and_type_from_pagenr
   72.48      return 1;
   72.49  }
   72.50  
   72.51 +#ifndef CONFIG_X86_PAE /* We do not support guest linear mappings on PAE. */
   72.52  /*
   72.53   * We allow root tables to map each other (a.k.a. linear page tables). It
   72.54   * needs some special care with reference counts and access permissions:
   72.55 @@ -456,6 +490,7 @@ get_linear_pagetable(
   72.56  
   72.57      return 1;
   72.58  }
   72.59 +#endif /* !CONFIG_X86_PAE */
   72.60  
   72.61  int
   72.62  get_page_from_l1e(
   72.63 @@ -564,10 +599,6 @@ get_page_from_l3e(
   72.64      rc = get_page_and_type_from_pagenr(
   72.65          l3e_get_pfn(l3e),
   72.66          PGT_l2_page_table | vaddr, d);
   72.67 -#if CONFIG_PAGING_LEVELS == 3
   72.68 -    if ( unlikely(!rc) )
   72.69 -        rc = get_linear_pagetable(l3e, pfn, d);
   72.70 -#endif
   72.71      return rc;
   72.72  }
   72.73  #endif /* 3 level */
   72.74 @@ -773,6 +804,50 @@ static int create_pae_xen_mappings(l3_pg
   72.75      return 1;
   72.76  }
   72.77  
   72.78 +struct pae_flush_pgd {
   72.79 +    unsigned long l3tab_mfn;
   72.80 +    unsigned int  l3tab_idx;
   72.81 +    l3_pgentry_t  nl3e;
   72.82 +};
   72.83 +
   72.84 +static void __pae_flush_pgd(void *data)
   72.85 +{
   72.86 +    struct pae_flush_pgd *args = data;
   72.87 +    struct vcpu *v = this_cpu(curr_vcpu);
   72.88 +    int i = v->arch.lowmem_l3tab_inuse;
   72.89 +    intpte_t _ol3e, _nl3e, _pl3e;
   72.90 +    l3_pgentry_t *l3tab_ptr;
   72.91 +
   72.92 +    ASSERT(!local_irq_is_enabled());
   72.93 +
   72.94 +    if ( v->arch.lowmem_l3tab_high_mfn[i] != args->l3tab_mfn )
   72.95 +        return;
   72.96 +
   72.97 +    l3tab_ptr = &v->arch.lowmem_l3tab[i][args->l3tab_idx];
   72.98 +
   72.99 +    _ol3e = l3e_get_intpte(*l3tab_ptr);
  72.100 +    _nl3e = l3e_get_intpte(args->nl3e);
  72.101 +    _pl3e = cmpxchg((intpte_t *)l3tab_ptr, _ol3e, _nl3e);
  72.102 +    BUG_ON(_pl3e != _ol3e);
  72.103 +}
  72.104 +
  72.105 +/* Flush a pgdir update into low-memory caches. */
  72.106 +static void pae_flush_pgd(
  72.107 +    unsigned long mfn, unsigned int idx, l3_pgentry_t nl3e)
  72.108 +{
  72.109 +    struct domain *d = page_get_owner(mfn_to_page(mfn));
  72.110 +    struct pae_flush_pgd args = {
  72.111 +        .l3tab_mfn = mfn,
  72.112 +        .l3tab_idx = idx,
  72.113 +        .nl3e      = nl3e };
  72.114 +
  72.115 +    /* If below 4GB then the pgdir is not shadowed in low memory. */
  72.116 +    if ( mfn < 0x100000 )
  72.117 +        return;
  72.118 +
  72.119 +    on_selected_cpus(d->domain_dirty_cpumask, __pae_flush_pgd, &args, 1, 1);
  72.120 +}
  72.121 +
  72.122  static inline int l1_backptr(
  72.123      unsigned long *backptr, unsigned long offset_in_l2, unsigned long l2_type)
  72.124  {
  72.125 @@ -787,6 +862,7 @@ static inline int l1_backptr(
  72.126  
  72.127  #elif CONFIG_X86_64
  72.128  # define create_pae_xen_mappings(pl3e) (1)
  72.129 +# define pae_flush_pgd(mfn, idx, nl3e) ((void)0)
  72.130  
  72.131  static inline int l1_backptr(
  72.132      unsigned long *backptr, unsigned long offset_in_l2, unsigned long l2_type)
  72.133 @@ -886,14 +962,6 @@ static int alloc_l3_table(struct page_in
  72.134  
  72.135      ASSERT(!shadow_mode_refcounts(d));
  72.136  
  72.137 -#ifdef CONFIG_X86_PAE
  72.138 -    if ( pfn >= 0x100000 )
  72.139 -    {
  72.140 -        MEM_LOG("PAE pgd must be below 4GB (0x%lx >= 0x100000)", pfn);
  72.141 -        return 0;
  72.142 -    }
  72.143 -#endif
  72.144 -
  72.145      pl3e = map_domain_page(pfn);
  72.146      for ( i = 0; i < L3_PAGETABLE_ENTRIES; i++ )
  72.147      {
  72.148 @@ -1241,6 +1309,8 @@ static int mod_l3_entry(l3_pgentry_t *pl
  72.149      okay = create_pae_xen_mappings(pl3e);
  72.150      BUG_ON(!okay);
  72.151  
  72.152 +    pae_flush_pgd(pfn, pgentry_ptr_to_slot(pl3e), nl3e);
  72.153 +
  72.154      put_page_from_l3e(ol3e, pfn);
  72.155      return 1;
  72.156  }
  72.157 @@ -3109,7 +3179,7 @@ void ptwr_flush(struct domain *d, const 
  72.158  
  72.159      if ( unlikely(d->arch.ptwr[which].vcpu != current) )
  72.160          /* Don't use write_ptbase: it may switch to guest_user on x86/64! */
  72.161 -        write_cr3(pagetable_get_paddr(
  72.162 +        __write_ptbase(pagetable_get_pfn(
  72.163              d->arch.ptwr[which].vcpu->arch.guest_table));
  72.164      else
  72.165          TOGGLE_MODE();
  72.166 @@ -3220,15 +3290,16 @@ static int ptwr_emulated_update(
  72.167      /* Turn a sub-word access into a full-word access. */
  72.168      if ( bytes != sizeof(paddr_t) )
  72.169      {
  72.170 -        int           rc;
  72.171 -        paddr_t    full;
  72.172 -        unsigned int  offset = addr & (sizeof(paddr_t)-1);
  72.173 +        paddr_t      full;
  72.174 +        unsigned int offset = addr & (sizeof(paddr_t)-1);
  72.175  
  72.176          /* Align address; read full word. */
  72.177          addr &= ~(sizeof(paddr_t)-1);
  72.178 -        if ( (rc = x86_emulate_read_std(addr, (unsigned long *)&full,
  72.179 -                                        sizeof(paddr_t))) )
  72.180 -            return rc; 
  72.181 +        if ( copy_from_user(&full, (void *)addr, sizeof(paddr_t)) )
  72.182 +        {
  72.183 +            propagate_page_fault(addr, 4); /* user mode, read fault */
  72.184 +            return X86EMUL_PROPAGATE_FAULT;
  72.185 +        }
  72.186          /* Mask out bits provided by caller. */
  72.187          full &= ~((((paddr_t)1 << (bytes*8)) - 1) << (offset*8));
  72.188          /* Shift the caller value and OR in the missing bits. */
  72.189 @@ -3306,7 +3377,8 @@ static int ptwr_emulated_update(
  72.190  static int ptwr_emulated_write(
  72.191      unsigned long addr,
  72.192      unsigned long val,
  72.193 -    unsigned int bytes)
  72.194 +    unsigned int bytes,
  72.195 +    struct x86_emulate_ctxt *ctxt)
  72.196  {
  72.197      return ptwr_emulated_update(addr, 0, val, bytes, 0);
  72.198  }
  72.199 @@ -3315,7 +3387,8 @@ static int ptwr_emulated_cmpxchg(
  72.200      unsigned long addr,
  72.201      unsigned long old,
  72.202      unsigned long new,
  72.203 -    unsigned int bytes)
  72.204 +    unsigned int bytes,
  72.205 +    struct x86_emulate_ctxt *ctxt)
  72.206  {
  72.207      return ptwr_emulated_update(addr, old, new, bytes, 1);
  72.208  }
  72.209 @@ -3325,7 +3398,8 @@ static int ptwr_emulated_cmpxchg8b(
  72.210      unsigned long old,
  72.211      unsigned long old_hi,
  72.212      unsigned long new,
  72.213 -    unsigned long new_hi)
  72.214 +    unsigned long new_hi,
  72.215 +    struct x86_emulate_ctxt *ctxt)
  72.216  {
  72.217      if ( CONFIG_PAGING_LEVELS == 2 )
  72.218          return X86EMUL_UNHANDLEABLE;
  72.219 @@ -3334,7 +3408,7 @@ static int ptwr_emulated_cmpxchg8b(
  72.220              addr, ((u64)old_hi << 32) | old, ((u64)new_hi << 32) | new, 8, 1);
  72.221  }
  72.222  
  72.223 -static struct x86_mem_emulator ptwr_mem_emulator = {
  72.224 +static struct x86_emulate_ops ptwr_emulate_ops = {
  72.225      .read_std           = x86_emulate_read_std,
  72.226      .write_std          = x86_emulate_write_std,
  72.227      .read_emulated      = x86_emulate_read_std,
  72.228 @@ -3353,6 +3427,7 @@ int ptwr_do_page_fault(struct domain *d,
  72.229      l2_pgentry_t    *pl2e, l2e;
  72.230      int              which, flags;
  72.231      unsigned long    l2_idx;
  72.232 +    struct x86_emulate_ctxt emul_ctxt;
  72.233  
  72.234      if ( unlikely(shadow_mode_enabled(d)) )
  72.235          return 0;
  72.236 @@ -3507,8 +3582,10 @@ int ptwr_do_page_fault(struct domain *d,
  72.237      return EXCRET_fault_fixed;
  72.238  
  72.239   emulate:
  72.240 -    if ( x86_emulate_memop(guest_cpu_user_regs(), addr,
  72.241 -                           &ptwr_mem_emulator, X86EMUL_MODE_HOST) )
  72.242 +    emul_ctxt.regs = guest_cpu_user_regs();
  72.243 +    emul_ctxt.cr2  = addr;
  72.244 +    emul_ctxt.mode = X86EMUL_MODE_HOST;
  72.245 +    if ( x86_emulate_memop(&emul_ctxt, &ptwr_emulate_ops) )
  72.246          return 0;
  72.247      perfc_incrc(ptwr_emulations);
  72.248      return EXCRET_fault_fixed;
    73.1 --- a/xen/arch/x86/traps.c	Thu May 25 15:59:18 2006 -0600
    73.2 +++ b/xen/arch/x86/traps.c	Fri May 26 13:41:49 2006 -0600
    73.3 @@ -876,7 +876,7 @@ static int emulate_privileged_op(struct 
    73.4                      PAGE_FAULT(regs->edi, USER_WRITE_FAULT);
    73.5                  break;
    73.6              }
    73.7 -            regs->edi += (regs->eflags & EF_DF) ? -op_bytes : op_bytes;
    73.8 +            regs->edi += (regs->eflags & EF_DF) ? -(int)op_bytes : op_bytes;
    73.9              break;
   73.10  
   73.11          case 0x6e: /* OUTSB */
   73.12 @@ -902,7 +902,7 @@ static int emulate_privileged_op(struct 
   73.13                  outl_user((u32)data, (u16)regs->edx, v, regs);
   73.14                  break;
   73.15              }
   73.16 -            regs->esi += (regs->eflags & EF_DF) ? -op_bytes : op_bytes;
   73.17 +            regs->esi += (regs->eflags & EF_DF) ? -(int)op_bytes : op_bytes;
   73.18              break;
   73.19          }
   73.20  
    74.1 --- a/xen/arch/x86/x86_emulate.c	Thu May 25 15:59:18 2006 -0600
    74.2 +++ b/xen/arch/x86/x86_emulate.c	Fri May 26 13:41:49 2006 -0600
    74.3 @@ -363,12 +363,13 @@ do{ __asm__ __volatile__ (              
    74.4  #endif /* __i386__ */
    74.5  
    74.6  /* Fetch next part of the instruction being emulated. */
    74.7 -#define insn_fetch(_type, _size, _eip) \
    74.8 -({ unsigned long _x; \
    74.9 -   if ( (rc = ops->read_std((unsigned long)(_eip), &_x, (_size))) != 0 ) \
   74.10 -       goto done; \
   74.11 -   (_eip) += (_size); \
   74.12 -   (_type)_x; \
   74.13 +#define insn_fetch(_type, _size, _eip)                                  \
   74.14 +({ unsigned long _x;                                                    \
   74.15 +   rc = ops->read_std((unsigned long)(_eip), &_x, (_size), ctxt);       \
   74.16 +   if ( rc != 0 )                                                       \
   74.17 +       goto done;                                                       \
   74.18 +   (_eip) += (_size);                                                   \
   74.19 +   (_type)_x;                                                           \
   74.20  })
   74.21  
   74.22  /* Access/update address held in a register, based on addressing mode. */
   74.23 @@ -426,12 +427,10 @@ decode_register(
   74.24      return p;
   74.25  }
   74.26  
   74.27 -int 
   74.28 +int
   74.29  x86_emulate_memop(
   74.30 -    struct cpu_user_regs *regs,
   74.31 -    unsigned long cr2,
   74.32 -    struct x86_mem_emulator *ops,
   74.33 -    int mode)
   74.34 +    struct x86_emulate_ctxt *ctxt,
   74.35 +    struct x86_emulate_ops  *ops)
   74.36  {
   74.37      uint8_t b, d, sib, twobyte = 0, rex_prefix = 0;
   74.38      uint8_t modrm, modrm_mod = 0, modrm_reg = 0, modrm_rm = 0;
   74.39 @@ -439,9 +438,11 @@ x86_emulate_memop(
   74.40      unsigned int op_bytes, ad_bytes, lock_prefix = 0, rep_prefix = 0, i;
   74.41      int rc = 0;
   74.42      struct operand src, dst;
   74.43 +    unsigned long cr2 = ctxt->cr2;
   74.44 +    int mode = ctxt->mode;
   74.45  
   74.46      /* Shadow copy of register state. Committed on successful emulation. */
   74.47 -    struct cpu_user_regs _regs = *regs;
   74.48 +    struct cpu_user_regs _regs = *ctxt->regs;
   74.49  
   74.50      switch ( mode )
   74.51      {
   74.52 @@ -628,7 +629,7 @@ x86_emulate_memop(
   74.53          dst.bytes = (d & ByteOp) ? 1 : op_bytes;
   74.54          if ( !(d & Mov) && /* optimisation - avoid slow emulated read */
   74.55               ((rc = ops->read_emulated((unsigned long)dst.ptr,
   74.56 -                                       &dst.val, dst.bytes)) != 0) )
   74.57 +                                       &dst.val, dst.bytes, ctxt)) != 0) )
   74.58               goto done;
   74.59          break;
   74.60      }
   74.61 @@ -670,7 +671,7 @@ x86_emulate_memop(
   74.62          src.type  = OP_MEM;
   74.63          src.ptr   = (unsigned long *)cr2;
   74.64          if ( (rc = ops->read_emulated((unsigned long)src.ptr, 
   74.65 -                                      &src.val, src.bytes)) != 0 )
   74.66 +                                      &src.val, src.bytes, ctxt)) != 0 )
   74.67              goto done;
   74.68          src.orig_val = src.val;
   74.69          break;
   74.70 @@ -776,7 +777,7 @@ x86_emulate_memop(
   74.71          if ( mode == X86EMUL_MODE_PROT64 )
   74.72              dst.bytes = 8;
   74.73          if ( (rc = ops->read_std(register_address(_regs.ss, _regs.esp),
   74.74 -                                 &dst.val, dst.bytes)) != 0 )
   74.75 +                                 &dst.val, dst.bytes, ctxt)) != 0 )
   74.76              goto done;
   74.77          register_address_increment(_regs.esp, dst.bytes);
   74.78          break;
   74.79 @@ -854,12 +855,12 @@ x86_emulate_memop(
   74.80              {
   74.81                  dst.bytes = 8;
   74.82                  if ( (rc = ops->read_std((unsigned long)dst.ptr,
   74.83 -                                         &dst.val, 8)) != 0 )
   74.84 +                                         &dst.val, 8, ctxt)) != 0 )
   74.85                      goto done;
   74.86              }
   74.87 -            register_address_increment(_regs.esp, -dst.bytes);
   74.88 +            register_address_increment(_regs.esp, -(int)dst.bytes);
   74.89              if ( (rc = ops->write_std(register_address(_regs.ss, _regs.esp),
   74.90 -                                      dst.val, dst.bytes)) != 0 )
   74.91 +                                      dst.val, dst.bytes, ctxt)) != 0 )
   74.92                  goto done;
   74.93              dst.val = dst.orig_val; /* skanky: disable writeback */
   74.94              break;
   74.95 @@ -887,10 +888,11 @@ x86_emulate_memop(
   74.96          case OP_MEM:
   74.97              if ( lock_prefix )
   74.98                  rc = ops->cmpxchg_emulated(
   74.99 -                    (unsigned long)dst.ptr, dst.orig_val, dst.val, dst.bytes);
  74.100 +                    (unsigned long)dst.ptr, dst.orig_val,
  74.101 +                    dst.val, dst.bytes, ctxt);
  74.102              else
  74.103                  rc = ops->write_emulated(
  74.104 -                    (unsigned long)dst.ptr, dst.val, dst.bytes);
  74.105 +                    (unsigned long)dst.ptr, dst.val, dst.bytes, ctxt);
  74.106              if ( rc != 0 )
  74.107                  goto done;
  74.108          default:
  74.109 @@ -899,7 +901,7 @@ x86_emulate_memop(
  74.110      }
  74.111  
  74.112      /* Commit shadow register state. */
  74.113 -    *regs = _regs;
  74.114 +    *ctxt->regs = _regs;
  74.115  
  74.116   done:
  74.117      return (rc == X86EMUL_UNHANDLEABLE) ? -1 : 0;
  74.118 @@ -911,11 +913,11 @@ x86_emulate_memop(
  74.119      {
  74.120          if ( _regs.ecx == 0 )
  74.121          {
  74.122 -            regs->eip = _regs.eip;
  74.123 +            ctxt->regs->eip = _regs.eip;
  74.124              goto done;
  74.125          }
  74.126          _regs.ecx--;
  74.127 -        _regs.eip = regs->eip;
  74.128 +        _regs.eip = ctxt->regs->eip;
  74.129      }
  74.130      switch ( b )
  74.131      {
  74.132 @@ -928,20 +930,21 @@ x86_emulate_memop(
  74.133              dst.ptr = (unsigned long *)cr2;
  74.134              if ( (rc = ops->read_std(register_address(seg ? *seg : _regs.ds,
  74.135                                                        _regs.esi),
  74.136 -                                     &dst.val, dst.bytes)) != 0 )
  74.137 +                                     &dst.val, dst.bytes, ctxt)) != 0 )
  74.138                  goto done;
  74.139          }
  74.140          else
  74.141          {
  74.142              /* Read fault: source is special memory. */
  74.143              dst.ptr = (unsigned long *)register_address(_regs.es, _regs.edi);
  74.144 -            if ( (rc = ops->read_emulated(cr2, &dst.val, dst.bytes)) != 0 )
  74.145 +            if ( (rc = ops->read_emulated(cr2, &dst.val,
  74.146 +                                          dst.bytes, ctxt)) != 0 )
  74.147                  goto done;
  74.148          }
  74.149          register_address_increment(
  74.150 -            _regs.esi, (_regs.eflags & EFLG_DF) ? -dst.bytes : dst.bytes);
  74.151 +            _regs.esi, (_regs.eflags & EFLG_DF) ? -(int)dst.bytes : dst.bytes);
  74.152          register_address_increment(
  74.153 -            _regs.edi, (_regs.eflags & EFLG_DF) ? -dst.bytes : dst.bytes);
  74.154 +            _regs.edi, (_regs.eflags & EFLG_DF) ? -(int)dst.bytes : dst.bytes);
  74.155          break;
  74.156      case 0xa6 ... 0xa7: /* cmps */
  74.157          DPRINTF("Urk! I don't handle CMPS.\n");
  74.158 @@ -952,16 +955,16 @@ x86_emulate_memop(
  74.159          dst.ptr   = (unsigned long *)cr2;
  74.160          dst.val   = _regs.eax;
  74.161          register_address_increment(
  74.162 -            _regs.edi, (_regs.eflags & EFLG_DF) ? -dst.bytes : dst.bytes);
  74.163 +            _regs.edi, (_regs.eflags & EFLG_DF) ? -(int)dst.bytes : dst.bytes);
  74.164          break;
  74.165      case 0xac ... 0xad: /* lods */
  74.166          dst.type  = OP_REG;
  74.167          dst.bytes = (d & ByteOp) ? 1 : op_bytes;
  74.168          dst.ptr   = (unsigned long *)&_regs.eax;
  74.169 -        if ( (rc = ops->read_emulated(cr2, &dst.val, dst.bytes)) != 0 )
  74.170 +        if ( (rc = ops->read_emulated(cr2, &dst.val, dst.bytes, ctxt)) != 0 )
  74.171              goto done;
  74.172          register_address_increment(
  74.173 -            _regs.esi, (_regs.eflags & EFLG_DF) ? -dst.bytes : dst.bytes);
  74.174 +            _regs.esi, (_regs.eflags & EFLG_DF) ? -(int)dst.bytes : dst.bytes);
  74.175          break;
  74.176      case 0xae ... 0xaf: /* scas */
  74.177          DPRINTF("Urk! I don't handle SCAS.\n");
  74.178 @@ -1074,8 +1077,8 @@ x86_emulate_memop(
  74.179  #if defined(__i386__)
  74.180      {
  74.181          unsigned long old_lo, old_hi;
  74.182 -        if ( ((rc = ops->read_emulated(cr2+0, &old_lo, 4)) != 0) ||
  74.183 -             ((rc = ops->read_emulated(cr2+4, &old_hi, 4)) != 0) )
  74.184 +        if ( ((rc = ops->read_emulated(cr2+0, &old_lo, 4, ctxt)) != 0) ||
  74.185 +             ((rc = ops->read_emulated(cr2+4, &old_hi, 4, ctxt)) != 0) )
  74.186              goto done;
  74.187          if ( (old_lo != _regs.eax) || (old_hi != _regs.edx) )
  74.188          {
  74.189 @@ -1090,8 +1093,8 @@ x86_emulate_memop(
  74.190          }
  74.191          else
  74.192          {
  74.193 -            if ( (rc = ops->cmpxchg8b_emulated(cr2, old_lo, old_hi,
  74.194 -                                               _regs.ebx, _regs.ecx)) != 0 )
  74.195 +            if ( (rc = ops->cmpxchg8b_emulated(cr2, old_lo, old_hi, _regs.ebx,
  74.196 +                                               _regs.ecx, ctxt)) != 0 )
  74.197                  goto done;
  74.198              _regs.eflags |= EFLG_ZF;
  74.199          }
  74.200 @@ -1100,7 +1103,7 @@ x86_emulate_memop(
  74.201  #elif defined(__x86_64__)
  74.202      {
  74.203          unsigned long old, new;
  74.204 -        if ( (rc = ops->read_emulated(cr2, &old, 8)) != 0 )
  74.205 +        if ( (rc = ops->read_emulated(cr2, &old, 8, ctxt)) != 0 )
  74.206              goto done;
  74.207          if ( ((uint32_t)(old>>0) != (uint32_t)_regs.eax) ||
  74.208               ((uint32_t)(old>>32) != (uint32_t)_regs.edx) )
  74.209 @@ -1112,7 +1115,7 @@ x86_emulate_memop(
  74.210          else
  74.211          {
  74.212              new = (_regs.ecx<<32)|(uint32_t)_regs.ebx;
  74.213 -            if ( (rc = ops->cmpxchg_emulated(cr2, old, new, 8)) != 0 )
  74.214 +            if ( (rc = ops->cmpxchg_emulated(cr2, old, new, 8, ctxt)) != 0 )
  74.215                  goto done;
  74.216              _regs.eflags |= EFLG_ZF;
  74.217          }
  74.218 @@ -1136,7 +1139,8 @@ int
  74.219  x86_emulate_read_std(
  74.220      unsigned long addr,
  74.221      unsigned long *val,
  74.222 -    unsigned int bytes)
  74.223 +    unsigned int bytes,
  74.224 +    struct x86_emulate_ctxt *ctxt)
  74.225  {
  74.226      *val = 0;
  74.227      if ( copy_from_user((void *)val, (void *)addr, bytes) )
  74.228 @@ -1151,7 +1155,8 @@ int
  74.229  x86_emulate_write_std(
  74.230      unsigned long addr,
  74.231      unsigned long val,
  74.232 -    unsigned int bytes)
  74.233 +    unsigned int bytes,
  74.234 +    struct x86_emulate_ctxt *ctxt)
  74.235  {
  74.236      if ( copy_to_user((void *)addr, (void *)&val, bytes) )
  74.237      {
    75.1 --- a/xen/common/Makefile	Thu May 25 15:59:18 2006 -0600
    75.2 +++ b/xen/common/Makefile	Fri May 26 13:41:49 2006 -0600
    75.3 @@ -13,6 +13,7 @@ obj-y += multicall.o
    75.4  obj-y += page_alloc.o
    75.5  obj-y += rangeset.o
    75.6  obj-y += sched_bvt.o
    75.7 +obj-y += sched_credit.o
    75.8  obj-y += sched_sedf.o
    75.9  obj-y += schedule.o
   75.10  obj-y += softirq.o
    76.1 --- a/xen/common/grant_table.c	Thu May 25 15:59:18 2006 -0600
    76.2 +++ b/xen/common/grant_table.c	Fri May 26 13:41:49 2006 -0600
    76.3 @@ -505,15 +505,12 @@ gnttab_setup_table(
    76.4          goto out;
    76.5      }
    76.6  
    76.7 -    if ( op.nr_frames <= NR_GRANT_FRAMES )
    76.8 +    ASSERT(d->grant_table != NULL);
    76.9 +    op.status = GNTST_okay;
   76.10 +    for ( i = 0; i < op.nr_frames; i++ )
   76.11      {
   76.12 -        ASSERT(d->grant_table != NULL);
   76.13 -        op.status = GNTST_okay;
   76.14 -        for ( i = 0; i < op.nr_frames; i++ )
   76.15 -        {
   76.16 -            gmfn = gnttab_shared_gmfn(d, d->grant_table, i);
   76.17 -            (void)copy_to_guest_offset(op.frame_list, i, &gmfn, 1);
   76.18 -        }
   76.19 +        gmfn = gnttab_shared_gmfn(d, d->grant_table, i);
   76.20 +        (void)copy_to_guest_offset(op.frame_list, i, &gmfn, 1);
   76.21      }
   76.22  
   76.23      put_domain(d);
    77.1 --- a/xen/common/kernel.c	Thu May 25 15:59:18 2006 -0600
    77.2 +++ b/xen/common/kernel.c	Fri May 26 13:41:49 2006 -0600
    77.3 @@ -191,12 +191,11 @@ long do_xen_version(int cmd, XEN_GUEST_H
    77.4          switch ( fi.submap_idx )
    77.5          {
    77.6          case 0:
    77.7 -            fi.submap = 0;
    77.8 +            fi.submap = (1U << XENFEAT_pae_pgdir_above_4gb);
    77.9              if ( shadow_mode_translate(current->domain) )
   77.10                  fi.submap |= 
   77.11                      (1U << XENFEAT_writable_page_tables) |
   77.12 -                    (1U << XENFEAT_auto_translated_physmap) |
   77.13 -                    (1U << XENFEAT_pae_pgdir_above_4gb);
   77.14 +                    (1U << XENFEAT_auto_translated_physmap);
   77.15              if ( supervisor_mode_kernel )
   77.16                  fi.submap |= 1U << XENFEAT_supervisor_mode_kernel;
   77.17              break;
    78.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
    78.2 +++ b/xen/common/sched_credit.c	Fri May 26 13:41:49 2006 -0600
    78.3 @@ -0,0 +1,1233 @@
    78.4 +/****************************************************************************
    78.5 + * (C) 2005-2006 - Emmanuel Ackaouy - XenSource Inc.
    78.6 + ****************************************************************************
    78.7 + *
    78.8 + *        File: common/csched_credit.c
    78.9 + *      Author: Emmanuel Ackaouy
   78.10 + *
   78.11 + * Description: Credit-based SMP CPU scheduler
   78.12 + */
   78.13 +
   78.14 +#include <xen/config.h>
   78.15 +#include <xen/init.h>
   78.16 +#include <xen/lib.h>
   78.17 +#include <xen/sched.h>
   78.18 +#include <xen/domain.h>
   78.19 +#include <xen/delay.h>
   78.20 +#include <xen/event.h>
   78.21 +#include <xen/time.h>
   78.22 +#include <xen/perfc.h>
   78.23 +#include <xen/sched-if.h>
   78.24 +#include <xen/softirq.h>
   78.25 +#include <asm/atomic.h>
   78.26 +
   78.27 +
   78.28 +/*
   78.29 + * CSCHED_STATS
   78.30 + *
   78.31 + * Manage very basic counters and stats.
   78.32 + *
   78.33 + * Useful for debugging live systems. The stats are displayed
   78.34 + * with runq dumps ('r' on the Xen console).
   78.35 + */
   78.36 +#define CSCHED_STATS
   78.37 +
   78.38 +
   78.39 +/*
   78.40 + * Basic constants
   78.41 + */
   78.42 +#define CSCHED_TICK             10      /* milliseconds */
   78.43 +#define CSCHED_TSLICE           30      /* milliseconds */
   78.44 +#define CSCHED_ACCT_NTICKS      3
   78.45 +#define CSCHED_ACCT_PERIOD      (CSCHED_ACCT_NTICKS * CSCHED_TICK)
   78.46 +#define CSCHED_DEFAULT_WEIGHT   256
   78.47 +
   78.48 +
   78.49 +/*
   78.50 + * Priorities
   78.51 + */
   78.52 +#define CSCHED_PRI_TS_UNDER     -1      /* time-share w/ credits */
   78.53 +#define CSCHED_PRI_TS_OVER      -2      /* time-share w/o credits */
   78.54 +#define CSCHED_PRI_IDLE         -64     /* idle */
   78.55 +#define CSCHED_PRI_TS_PARKED    -65     /* time-share w/ capped credits */
   78.56 +
   78.57 +
   78.58 +/*
   78.59 + * Useful macros
   78.60 + */
   78.61 +#define CSCHED_PCPU(_c)     ((struct csched_pcpu *)schedule_data[_c].sched_priv)
   78.62 +#define CSCHED_VCPU(_vcpu)  ((struct csched_vcpu *) (_vcpu)->sched_priv)
   78.63 +#define CSCHED_DOM(_dom)    ((struct csched_dom *) (_dom)->sched_priv)
   78.64 +#define RUNQ(_cpu)          (&(CSCHED_PCPU(_cpu)->runq))
   78.65 +
   78.66 +
   78.67 +/*
   78.68 + * Stats
   78.69 + */
   78.70 +#ifdef CSCHED_STATS
   78.71 +
   78.72 +#define CSCHED_STAT(_X)         (csched_priv.stats._X)
   78.73 +#define CSCHED_STAT_DEFINE(_X)  uint32_t _X;
   78.74 +#define CSCHED_STAT_PRINTK(_X)                                  \
   78.75 +    do                                                          \
   78.76 +    {                                                           \
   78.77 +        printk("\t%-30s = %u\n", #_X, CSCHED_STAT(_X));  \
   78.78 +    } while ( 0 );
   78.79 +
   78.80 +#define CSCHED_STATS_EXPAND_SCHED(_MACRO)   \
   78.81 +    _MACRO(vcpu_alloc)                      \
   78.82 +    _MACRO(vcpu_add)                        \
   78.83 +    _MACRO(vcpu_sleep)                      \
   78.84 +    _MACRO(vcpu_wake_running)               \
   78.85 +    _MACRO(vcpu_wake_onrunq)                \
   78.86 +    _MACRO(vcpu_wake_runnable)              \
   78.87 +    _MACRO(vcpu_wake_not_runnable)          \
   78.88 +    _MACRO(dom_free)                        \
   78.89 +    _MACRO(schedule)                        \
   78.90 +    _MACRO(tickle_local_idler)              \
   78.91 +    _MACRO(tickle_local_over)               \
   78.92 +    _MACRO(tickle_local_under)              \
   78.93 +    _MACRO(tickle_local_other)              \
   78.94 +    _MACRO(acct_run)                        \
   78.95 +    _MACRO(acct_no_work)                    \
   78.96 +    _MACRO(acct_balance)                    \
   78.97 +    _MACRO(acct_reorder)                    \
   78.98 +    _MACRO(acct_min_credit)                 \
   78.99 +    _MACRO(acct_vcpu_active)                \
  78.100 +    _MACRO(acct_vcpu_idle)                  \
  78.101 +    _MACRO(acct_vcpu_credit_min)
  78.102 +
  78.103 +#define CSCHED_STATS_EXPAND_SMP_LOAD_BALANCE(_MACRO)    \
  78.104 +    _MACRO(vcpu_migrate)                                \
  78.105 +    _MACRO(load_balance_idle)                           \
  78.106 +    _MACRO(load_balance_over)                           \
  78.107 +    _MACRO(load_balance_other)                          \
  78.108 +    _MACRO(steal_trylock_failed)                        \
  78.109 +    _MACRO(steal_peer_down)                             \
  78.110 +    _MACRO(steal_peer_idle)                             \
  78.111 +    _MACRO(steal_peer_running)                          \
  78.112 +    _MACRO(steal_peer_pinned)                           \
  78.113 +    _MACRO(tickle_idlers_none)                          \
  78.114 +    _MACRO(tickle_idlers_some)
  78.115 +
  78.116 +#ifndef NDEBUG
  78.117 +#define CSCHED_STATS_EXPAND_CHECKS(_MACRO)  \
  78.118 +    _MACRO(vcpu_check)
  78.119 +#else
  78.120 +#define CSCHED_STATS_EXPAND_CHECKS(_MACRO)
  78.121 +#endif
  78.122 +
  78.123 +#define CSCHED_STATS_EXPAND(_MACRO)                 \
  78.124 +    CSCHED_STATS_EXPAND_SCHED(_MACRO)               \
  78.125 +    CSCHED_STATS_EXPAND_SMP_LOAD_BALANCE(_MACRO)    \
  78.126 +    CSCHED_STATS_EXPAND_CHECKS(_MACRO)
  78.127 +
  78.128 +#define CSCHED_STATS_RESET()                                        \
  78.129 +    do                                                              \
  78.130 +    {                                                               \
  78.131 +        memset(&csched_priv.stats, 0, sizeof(csched_priv.stats));   \
  78.132 +    } while ( 0 )
  78.133 +
  78.134 +#define CSCHED_STATS_DEFINE()                   \
  78.135 +    struct                                      \
  78.136 +    {                                           \
  78.137 +        CSCHED_STATS_EXPAND(CSCHED_STAT_DEFINE) \
  78.138 +    } stats
  78.139 +
  78.140 +#define CSCHED_STATS_PRINTK()                   \
  78.141 +    do                                          \
  78.142 +    {                                           \
  78.143 +        printk("stats:\n");                     \
  78.144 +        CSCHED_STATS_EXPAND(CSCHED_STAT_PRINTK) \
  78.145 +    } while ( 0 )
  78.146 +
  78.147 +#define CSCHED_STAT_CRANK(_X)   (CSCHED_STAT(_X)++)
  78.148 +
  78.149 +#else /* CSCHED_STATS */
  78.150 +
  78.151 +#define CSCHED_STATS_RESET()    do {} while ( 0 )
  78.152 +#define CSCHED_STATS_DEFINE()   do {} while ( 0 )
  78.153 +#define CSCHED_STATS_PRINTK()   do {} while ( 0 )
  78.154 +#define CSCHED_STAT_CRANK(_X)   do {} while ( 0 )
  78.155 +
  78.156 +#endif /* CSCHED_STATS */
  78.157 +
  78.158 +
  78.159 +/*
  78.160 + * Physical CPU
  78.161 + */
  78.162 +struct csched_pcpu {
  78.163 +    struct list_head runq;
  78.164 +    uint32_t runq_sort_last;
  78.165 +};
  78.166 +
  78.167 +/*
  78.168 + * Virtual CPU
  78.169 + */
  78.170 +struct csched_vcpu {
  78.171 +    struct list_head runq_elem;
  78.172 +    struct list_head active_vcpu_elem;
  78.173 +    struct csched_dom *sdom;
  78.174 +    struct vcpu *vcpu;
  78.175 +    atomic_t credit;
  78.176 +    int credit_last;
  78.177 +    uint32_t credit_incr;
  78.178 +    uint32_t state_active;
  78.179 +    uint32_t state_idle;
  78.180 +    int16_t pri;
  78.181 +};
  78.182 +
  78.183 +/*
  78.184 + * Domain
  78.185 + */
  78.186 +struct csched_dom {
  78.187 +    struct list_head active_vcpu;
  78.188 +    struct list_head active_sdom_elem;
  78.189 +    struct domain *dom;
  78.190 +    uint16_t active_vcpu_count;
  78.191 +    uint16_t weight;
  78.192 +    uint16_t cap;
  78.193 +};
  78.194 +
  78.195 +/*
  78.196 + * System-wide private data
  78.197 + */
  78.198 +struct csched_private {
  78.199 +    spinlock_t lock;
  78.200 +    struct list_head active_sdom;
  78.201 +    uint32_t ncpus;
  78.202 +    unsigned int master;
  78.203 +    cpumask_t idlers;
  78.204 +    uint32_t weight;
  78.205 +    uint32_t credit;
  78.206 +    int credit_balance;
  78.207 +    uint32_t runq_sort;
  78.208 +    CSCHED_STATS_DEFINE();
  78.209 +};
  78.210 +
  78.211 +
  78.212 +/*
  78.213 + * Global variables
  78.214 + */
  78.215 +static struct csched_private csched_priv;
  78.216 +
  78.217 +
  78.218 +
  78.219 +static inline int
  78.220 +__vcpu_on_runq(struct csched_vcpu *svc)
  78.221 +{
  78.222 +    return !list_empty(&svc->runq_elem);
  78.223 +}
  78.224 +
  78.225 +static inline struct csched_vcpu *
  78.226 +__runq_elem(struct list_head *elem)
  78.227 +{
  78.228 +    return list_entry(elem, struct csched_vcpu, runq_elem);
  78.229 +}
  78.230 +
  78.231 +static inline void
  78.232 +__runq_insert(unsigned int cpu, struct csched_vcpu *svc)
  78.233 +{
  78.234 +    const struct list_head * const runq = RUNQ(cpu);
  78.235 +    struct list_head *iter;
  78.236 +
  78.237 +    BUG_ON( __vcpu_on_runq(svc) );
  78.238 +    BUG_ON( cpu != svc->vcpu->processor );
  78.239 +
  78.240 +    list_for_each( iter, runq )
  78.241 +    {
  78.242 +        const struct csched_vcpu * const iter_svc = __runq_elem(iter);
  78.243 +        if ( svc->pri > iter_svc->pri )
  78.244 +            break;
  78.245 +    }
  78.246 +
  78.247 +    list_add_tail(&svc->runq_elem, iter);
  78.248 +}
  78.249 +
  78.250 +static inline void
  78.251 +__runq_remove(struct csched_vcpu *svc)
  78.252 +{
  78.253 +    BUG_ON( !__vcpu_on_runq(svc) );
  78.254 +    list_del_init(&svc->runq_elem);
  78.255 +}
  78.256 +
  78.257 +static inline void
  78.258 +__runq_tickle(unsigned int cpu, struct csched_vcpu *new)
  78.259 +{
  78.260 +    struct csched_vcpu * const cur = CSCHED_VCPU(schedule_data[cpu].curr);
  78.261 +    cpumask_t mask;
  78.262 +
  78.263 +    ASSERT(cur);
  78.264 +    cpus_clear(mask);
  78.265 +
  78.266 +    /* If strictly higher priority than current VCPU, signal the CPU */
  78.267 +    if ( new->pri > cur->pri )
  78.268 +    {
  78.269 +        if ( cur->pri == CSCHED_PRI_IDLE )
  78.270 +            CSCHED_STAT_CRANK(tickle_local_idler);
  78.271 +        else if ( cur->pri == CSCHED_PRI_TS_OVER )
  78.272 +            CSCHED_STAT_CRANK(tickle_local_over);
  78.273 +        else if ( cur->pri == CSCHED_PRI_TS_UNDER )
  78.274 +            CSCHED_STAT_CRANK(tickle_local_under);
  78.275 +        else
  78.276 +            CSCHED_STAT_CRANK(tickle_local_other);
  78.277 +
  78.278 +        cpu_set(cpu, mask);
  78.279 +    }
  78.280 +
  78.281 +    /*
  78.282 +     * If this CPU has at least two runnable VCPUs, we tickle any idlers to
  78.283 +     * let them know there is runnable work in the system...
  78.284 +     */
  78.285 +    if ( cur->pri > CSCHED_PRI_IDLE )
  78.286 +    {
  78.287 +        if ( cpus_empty(csched_priv.idlers) )
  78.288 +        {
  78.289 +            CSCHED_STAT_CRANK(tickle_idlers_none);
  78.290 +        }
  78.291 +        else
  78.292 +        {
  78.293 +            CSCHED_STAT_CRANK(tickle_idlers_some);
  78.294 +            cpus_or(mask, mask, csched_priv.idlers);
  78.295 +        }
  78.296 +    }
  78.297 +
  78.298 +    /* Send scheduler interrupts to designated CPUs */
  78.299 +    if ( !cpus_empty(mask) )
  78.300 +        cpumask_raise_softirq(mask, SCHEDULE_SOFTIRQ);
  78.301 +}
  78.302 +
  78.303 +static void
  78.304 +csched_pcpu_init(int cpu)
  78.305 +{
  78.306 +    struct csched_pcpu *spc;
  78.307 +    unsigned long flags;
  78.308 +
  78.309 +    spin_lock_irqsave(&csched_priv.lock, flags);
  78.310 +
  78.311 +    /* Initialize/update system-wide config */
  78.312 +    csched_priv.credit += CSCHED_ACCT_PERIOD;
  78.313 +    if ( csched_priv.ncpus <= cpu )
  78.314 +        csched_priv.ncpus = cpu + 1;
  78.315 +    if ( csched_priv.master >= csched_priv.ncpus )
  78.316 +        csched_priv.master = cpu;
  78.317 +
  78.318 +    /* Allocate per-PCPU info */
  78.319 +    spc = xmalloc(struct csched_pcpu);
  78.320 +    BUG_ON( spc == NULL );
  78.321 +    INIT_LIST_HEAD(&spc->runq);
  78.322 +    spc->runq_sort_last = csched_priv.runq_sort;
  78.323 +    schedule_data[cpu].sched_priv = spc;
  78.324 +
  78.325 +    /* Start off idling... */
  78.326 +    BUG_ON( !is_idle_vcpu(schedule_data[cpu].curr) );
  78.327 +    cpu_set(cpu, csched_priv.idlers);
  78.328 +
  78.329 +    spin_unlock_irqrestore(&csched_priv.lock, flags);
  78.330 +}
  78.331 +
  78.332 +#ifndef NDEBUG
  78.333 +static inline void
  78.334 +__csched_vcpu_check(struct vcpu *vc)
  78.335 +{
  78.336 +    struct csched_vcpu * const svc = CSCHED_VCPU(vc);
  78.337 +    struct csched_dom * const sdom = svc->sdom;
  78.338 +
  78.339 +    BUG_ON( svc->vcpu != vc );
  78.340 +    BUG_ON( sdom != CSCHED_DOM(vc->domain) );
  78.341 +    if ( sdom )
  78.342 +    {
  78.343 +        BUG_ON( is_idle_vcpu(vc) );
  78.344 +        BUG_ON( sdom->dom != vc->domain );
  78.345 +    }
  78.346 +    else
  78.347 +    {
  78.348 +        BUG_ON( !is_idle_vcpu(vc) );
  78.349 +    }
  78.350 +
  78.351 +    CSCHED_STAT_CRANK(vcpu_check);
  78.352 +}
  78.353 +#define CSCHED_VCPU_CHECK(_vc)  (__csched_vcpu_check(_vc))
  78.354 +#else
  78.355 +#define CSCHED_VCPU_CHECK(_vc)
  78.356 +#endif
  78.357 +
  78.358 +static inline int
  78.359 +__csched_vcpu_is_stealable(int local_cpu, struct vcpu *vc)
  78.360 +{
  78.361 +    /*
  78.362 +     * Don't pick up work that's in the peer's scheduling tail. Also only pick
  78.363 +     * up work that's allowed to run on our CPU.
  78.364 +     */
  78.365 +    if ( unlikely(test_bit(_VCPUF_running, &vc->vcpu_flags)) )
  78.366 +    {
  78.367 +        CSCHED_STAT_CRANK(steal_peer_running);
  78.368 +        return 0;
  78.369 +    }
  78.370 +
  78.371 +    if ( unlikely(!cpu_isset(local_cpu, vc->cpu_affinity)) )
  78.372 +    {
  78.373 +        CSCHED_STAT_CRANK(steal_peer_pinned);
  78.374 +        return 0;
  78.375 +    }
  78.376 +
  78.377 +    return 1;
  78.378 +}
  78.379 +
  78.380 +static void
  78.381 +csched_vcpu_acct(struct csched_vcpu *svc, int credit_dec)
  78.382 +{
  78.383 +    struct csched_dom * const sdom = svc->sdom;
  78.384 +    unsigned long flags;
  78.385 +
  78.386 +    /* Update credits */
  78.387 +    atomic_sub(credit_dec, &svc->credit);
  78.388 +
  78.389 +    /* Put this VCPU and domain back on the active list if it was idling */
  78.390 +    if ( list_empty(&svc->active_vcpu_elem) )
  78.391 +    {
  78.392 +        spin_lock_irqsave(&csched_priv.lock, flags);
  78.393 +
  78.394 +        if ( list_empty(&svc->active_vcpu_elem) )
  78.395 +        {
  78.396 +            CSCHED_STAT_CRANK(acct_vcpu_active);
  78.397 +            svc->state_active++;
  78.398 +
  78.399 +            sdom->active_vcpu_count++;
  78.400 +            list_add(&svc->active_vcpu_elem, &sdom->active_vcpu);
  78.401 +            if ( list_empty(&sdom->active_sdom_elem) )
  78.402 +            {
  78.403 +                list_add(&sdom->active_sdom_elem, &csched_priv.active_sdom);
  78.404 +                csched_priv.weight += sdom->weight;
  78.405 +            }
  78.406 +        }
  78.407 +
  78.408 +        spin_unlock_irqrestore(&csched_priv.lock, flags);
  78.409 +    }
  78.410 +}
  78.411 +
  78.412 +static inline void
  78.413 +__csched_vcpu_acct_idle_locked(struct csched_vcpu *svc)
  78.414 +{
  78.415 +    struct csched_dom * const sdom = svc->sdom;
  78.416 +
  78.417 +    BUG_ON( list_empty(&svc->active_vcpu_elem) );
  78.418 +
  78.419 +    CSCHED_STAT_CRANK(acct_vcpu_idle);
  78.420 +    svc->state_idle++;
  78.421 +
  78.422 +    sdom->active_vcpu_count--;
  78.423 +    list_del_init(&svc->active_vcpu_elem);
  78.424 +    if ( list_empty(&sdom->active_vcpu) )
  78.425 +    {
  78.426 +        BUG_ON( csched_priv.weight < sdom->weight );
  78.427 +        list_del_init(&sdom->active_sdom_elem);
  78.428 +        csched_priv.weight -= sdom->weight;
  78.429 +    }
  78.430 +
  78.431 +    atomic_set(&svc->credit, 0);
  78.432 +}
  78.433 +
  78.434 +static int
  78.435 +csched_vcpu_alloc(struct vcpu *vc)
  78.436 +{
  78.437 +    struct domain * const dom = vc->domain;
  78.438 +    struct csched_dom *sdom;
  78.439 +    struct csched_vcpu *svc;
  78.440 +    int16_t pri;
  78.441 +
  78.442 +    CSCHED_STAT_CRANK(vcpu_alloc);
  78.443 +
  78.444 +    /* Allocate, if appropriate, per-domain info */
  78.445 +    if ( is_idle_vcpu(vc) )
  78.446 +    {
  78.447 +        sdom = NULL;
  78.448 +        pri = CSCHED_PRI_IDLE;
  78.449 +    }
  78.450 +    else if ( CSCHED_DOM(dom) )
  78.451 +    {
  78.452 +        sdom = CSCHED_DOM(dom);
  78.453 +        pri = CSCHED_PRI_TS_UNDER;
  78.454 +    }
  78.455 +    else 
  78.456 +    {
  78.457 +        sdom = xmalloc(struct csched_dom);
  78.458 +        if ( !sdom )
  78.459 +            return -1;
  78.460 +
  78.461 +        /* Initialize credit and weight */
  78.462 +        INIT_LIST_HEAD(&sdom->active_vcpu);
  78.463 +        sdom->active_vcpu_count = 0;
  78.464 +        INIT_LIST_HEAD(&sdom->active_sdom_elem);
  78.465 +        sdom->dom = dom;
  78.466 +        sdom->weight = CSCHED_DEFAULT_WEIGHT;
  78.467 +        sdom->cap = 0U;
  78.468 +        dom->sched_priv = sdom;
  78.469 +        pri = CSCHED_PRI_TS_UNDER;
  78.470 +    }
  78.471 +
  78.472 +    /* Allocate per-VCPU info */
  78.473 +    svc = xmalloc(struct csched_vcpu);
  78.474 +    if ( !svc )
  78.475 +        return -1;
  78.476 +
  78.477 +    INIT_LIST_HEAD(&svc->runq_elem);
  78.478 +    INIT_LIST_HEAD(&svc->active_vcpu_elem);
  78.479 +    svc->sdom = sdom;
  78.480 +    svc->vcpu = vc;
  78.481 +    atomic_set(&svc->credit, 0);
  78.482 +    svc->credit_last = 0;
  78.483 +    svc->credit_incr = 0U;
  78.484 +    svc->state_active = 0U;
  78.485 +    svc->state_idle = 0U;
  78.486 +    svc->pri = pri;
  78.487 +    vc->sched_priv = svc;
  78.488 +
  78.489 +    CSCHED_VCPU_CHECK(vc);
  78.490 +
  78.491 +    /* Attach fair-share VCPUs to the accounting list */
  78.492 +    if ( likely(sdom != NULL) )
  78.493 +        csched_vcpu_acct(svc, 0);
  78.494 +
  78.495 +    return 0;
  78.496 +}
  78.497 +
  78.498 +static void
  78.499 +csched_vcpu_add(struct vcpu *vc) 
  78.500 +{
  78.501 +    CSCHED_STAT_CRANK(vcpu_add);
  78.502 +
  78.503 +    /* Allocate per-PCPU info */
  78.504 +    if ( unlikely(!CSCHED_PCPU(vc->processor)) )
  78.505 +        csched_pcpu_init(vc->processor);
  78.506 +
  78.507 +    CSCHED_VCPU_CHECK(vc);
  78.508 +}
  78.509 +
  78.510 +static void
  78.511 +csched_vcpu_free(struct vcpu *vc)
  78.512 +{
  78.513 +    struct csched_vcpu * const svc = CSCHED_VCPU(vc);
  78.514 +    struct csched_dom * const sdom = svc->sdom;
  78.515 +    unsigned long flags;
  78.516 +
  78.517 +    BUG_ON( sdom == NULL );
  78.518 +    BUG_ON( !list_empty(&svc->runq_elem) );
  78.519 +
  78.520 +    spin_lock_irqsave(&csched_priv.lock, flags);
  78.521 +
  78.522 +    if ( !list_empty(&svc->active_vcpu_elem) )
  78.523 +        __csched_vcpu_acct_idle_locked(svc);
  78.524 +
  78.525 +    spin_unlock_irqrestore(&csched_priv.lock, flags);
  78.526 +
  78.527 +    xfree(svc);
  78.528 +}
  78.529 +
  78.530 +static void
  78.531 +csched_vcpu_sleep(struct vcpu *vc)
  78.532 +{
  78.533 +    struct csched_vcpu * const svc = CSCHED_VCPU(vc);
  78.534 +
  78.535 +    CSCHED_STAT_CRANK(vcpu_sleep);
  78.536 +
  78.537 +    BUG_ON( is_idle_vcpu(vc) );
  78.538 +
  78.539 +    if ( schedule_data[vc->processor].curr == vc )
  78.540 +        cpu_raise_softirq(vc->processor, SCHEDULE_SOFTIRQ);
  78.541 +    else if ( __vcpu_on_runq(svc) )
  78.542 +        __runq_remove(svc);
  78.543 +}
  78.544 +
  78.545 +static void
  78.546 +csched_vcpu_wake(struct vcpu *vc)
  78.547 +{
  78.548 +    struct csched_vcpu * const svc = CSCHED_VCPU(vc);
  78.549 +    const unsigned int cpu = vc->processor;
  78.550 +
  78.551 +    BUG_ON( is_idle_vcpu(vc) );
  78.552 +
  78.553 +    if ( unlikely(schedule_data[cpu].curr == vc) )
  78.554 +    {
  78.555 +        CSCHED_STAT_CRANK(vcpu_wake_running);
  78.556 +        return;
  78.557 +    }
  78.558 +    if ( unlikely(__vcpu_on_runq(svc)) )
  78.559 +    {
  78.560 +        CSCHED_STAT_CRANK(vcpu_wake_onrunq);
  78.561 +        return;
  78.562 +    }
  78.563 +
  78.564 +    if ( likely(vcpu_runnable(vc)) )
  78.565 +        CSCHED_STAT_CRANK(vcpu_wake_runnable);
  78.566 +    else
  78.567 +        CSCHED_STAT_CRANK(vcpu_wake_not_runnable);
  78.568 +
  78.569 +    /* Put the VCPU on the runq and tickle CPUs */
  78.570 +    __runq_insert(cpu, svc);
  78.571 +    __runq_tickle(cpu, svc);
  78.572 +}
  78.573 +
  78.574 +static int
  78.575 +csched_vcpu_set_affinity(struct vcpu *vc, cpumask_t *affinity)
  78.576 +{
  78.577 +    unsigned long flags;
  78.578 +    int lcpu;
  78.579 +
  78.580 +    if ( vc == current )
  78.581 +    {
  78.582 +        /* No locking needed but also can't move on the spot... */
  78.583 +        if ( !cpu_isset(vc->processor, *affinity) )
  78.584 +            return -EBUSY;
  78.585 +
  78.586 +        vc->cpu_affinity = *affinity;
  78.587 +    }
  78.588 +    else
  78.589 +    {
  78.590 +        /* Pause, modify, and unpause. */
  78.591 +        vcpu_pause(vc);
  78.592 +
  78.593 +        vc->cpu_affinity = *affinity;
  78.594 +        if ( !cpu_isset(vc->processor, vc->cpu_affinity) )
  78.595 +        {
  78.596 +            /*
  78.597 +             * We must grab the scheduler lock for the CPU currently owning
  78.598 +             * this VCPU before changing its ownership.
  78.599 +             */
  78.600 +            vcpu_schedule_lock_irqsave(vc, flags);
  78.601 +            lcpu = vc->processor;
  78.602 +
  78.603 +            vc->processor = first_cpu(vc->cpu_affinity);
  78.604 +
  78.605 +            spin_unlock_irqrestore(&schedule_data[lcpu].schedule_lock, flags);
  78.606 +        }
  78.607 +
  78.608 +        vcpu_unpause(vc);
  78.609 +    }
  78.610 +
  78.611 +    return 0;
  78.612 +}
  78.613 +
  78.614 +static int
  78.615 +csched_dom_cntl(
  78.616 +    struct domain *d,
  78.617 +    struct sched_adjdom_cmd *cmd)
  78.618 +{
  78.619 +    struct csched_dom * const sdom = CSCHED_DOM(d);
  78.620 +    unsigned long flags;
  78.621 +
  78.622 +    if ( cmd->direction == SCHED_INFO_GET )
  78.623 +    {
  78.624 +        cmd->u.credit.weight = sdom->weight;
  78.625 +        cmd->u.credit.cap = sdom->cap;
  78.626 +    }
  78.627 +    else
  78.628 +    {
  78.629 +        ASSERT( cmd->direction == SCHED_INFO_PUT );
  78.630 +
  78.631 +        spin_lock_irqsave(&csched_priv.lock, flags);
  78.632 +
  78.633 +        if ( cmd->u.credit.weight != 0 )
  78.634 +        {
  78.635 +            csched_priv.weight -= sdom->weight;
  78.636 +            sdom->weight = cmd->u.credit.weight;
  78.637 +            csched_priv.weight += sdom->weight;
  78.638 +        }
  78.639 +
  78.640 +        if ( cmd->u.credit.cap != (uint16_t)~0U )
  78.641 +            sdom->cap = cmd->u.credit.cap;
  78.642 +
  78.643 +        spin_unlock_irqrestore(&csched_priv.lock, flags);
  78.644 +    }
  78.645 +
  78.646 +    return 0;
  78.647 +}
  78.648 +
  78.649 +static void
  78.650 +csched_dom_free(struct domain *dom)
  78.651 +{
  78.652 +    struct csched_dom * const sdom = CSCHED_DOM(dom);
  78.653 +    int i;
  78.654 +
  78.655 +    CSCHED_STAT_CRANK(dom_free);
  78.656 +
  78.657 +    for ( i = 0; i < MAX_VIRT_CPUS; i++ )
  78.658 +    {
  78.659 +        if ( dom->vcpu[i] )
  78.660 +            csched_vcpu_free(dom->vcpu[i]);
  78.661 +    }
  78.662 +
  78.663 +    xfree(sdom);
  78.664 +}
  78.665 +
  78.666 +/*
  78.667 + * This is a O(n) optimized sort of the runq.
  78.668 + *
  78.669 + * Time-share VCPUs can only be one of two priorities, UNDER or OVER. We walk
  78.670 + * through the runq and move up any UNDERs that are preceded by OVERS. We
  78.671 + * remember the last UNDER to make the move up operation O(1).
  78.672 + */
  78.673 +static void
  78.674 +csched_runq_sort(unsigned int cpu)
  78.675 +{
  78.676 +    struct csched_pcpu * const spc = CSCHED_PCPU(cpu);
  78.677 +    struct list_head *runq, *elem, *next, *last_under;
  78.678 +    struct csched_vcpu *svc_elem;
  78.679 +    unsigned long flags;
  78.680 +    int sort_epoch;
  78.681 +
  78.682 +    sort_epoch = csched_priv.runq_sort;
  78.683 +    if ( sort_epoch == spc->runq_sort_last )
  78.684 +        return;
  78.685 +
  78.686 +    spc->runq_sort_last = sort_epoch;
  78.687 +
  78.688 +    spin_lock_irqsave(&schedule_data[cpu].schedule_lock, flags);
  78.689 +
  78.690 +    runq = &spc->runq;
  78.691 +    elem = runq->next;
  78.692 +    last_under = runq;
  78.693 +
  78.694 +    while ( elem != runq )
  78.695 +    {
  78.696 +        next = elem->next;
  78.697 +        svc_elem = __runq_elem(elem);
  78.698 +
  78.699 +        if ( svc_elem->pri == CSCHED_PRI_TS_UNDER )
  78.700 +        {
  78.701 +            /* does elem need to move up the runq? */
  78.702 +            if ( elem->prev != last_under )
  78.703 +            {
  78.704 +                list_del(elem);
  78.705 +                list_add(elem, last_under);
  78.706 +            }
  78.707 +            last_under = elem;
  78.708 +        }
  78.709 +
  78.710 +        elem = next;
  78.711 +    }
  78.712 +
  78.713 +    spin_unlock_irqrestore(&schedule_data[cpu].schedule_lock, flags);
  78.714 +}
  78.715 +
  78.716 +static void
  78.717 +csched_acct(void)
  78.718 +{
  78.719 +    unsigned long flags;
  78.720 +    struct list_head *iter_vcpu, *next_vcpu;
  78.721 +    struct list_head *iter_sdom, *next_sdom;
  78.722 +    struct csched_vcpu *svc;
  78.723 +    struct csched_dom *sdom;
  78.724 +    uint32_t credit_total;
  78.725 +    uint32_t weight_total;
  78.726 +    uint32_t weight_left;
  78.727 +    uint32_t credit_fair;
  78.728 +    uint32_t credit_peak;
  78.729 +    int credit_balance;
  78.730 +    int credit_xtra;
  78.731 +    int credit;
  78.732 +
  78.733 +
  78.734 +    spin_lock_irqsave(&csched_priv.lock, flags);
  78.735 +
  78.736 +    weight_total = csched_priv.weight;
  78.737 +    credit_total = csched_priv.credit;
  78.738 +
  78.739 +    /* Converge balance towards 0 when it drops negative */
  78.740 +    if ( csched_priv.credit_balance < 0 )
  78.741 +    {
  78.742 +        credit_total -= csched_priv.credit_balance;
  78.743 +        CSCHED_STAT_CRANK(acct_balance);
  78.744 +    }
  78.745 +
  78.746 +    if ( unlikely(weight_total == 0) )
  78.747 +    {
  78.748 +        csched_priv.credit_balance = 0;
  78.749 +        spin_unlock_irqrestore(&csched_priv.lock, flags);
  78.750 +        CSCHED_STAT_CRANK(acct_no_work);
  78.751 +        return;
  78.752 +    }
  78.753 +
  78.754 +    CSCHED_STAT_CRANK(acct_run);
  78.755 +
  78.756 +    weight_left = weight_total;
  78.757 +    credit_balance = 0;
  78.758 +    credit_xtra = 0;
  78.759 +
  78.760 +    list_for_each_safe( iter_sdom, next_sdom, &csched_priv.active_sdom )
  78.761 +    {
  78.762 +        sdom = list_entry(iter_sdom, struct csched_dom, active_sdom_elem);
  78.763 +
  78.764 +        BUG_ON( is_idle_domain(sdom->dom) );
  78.765 +        BUG_ON( sdom->active_vcpu_count == 0 );
  78.766 +        BUG_ON( sdom->weight == 0 );
  78.767 +        BUG_ON( sdom->weight > weight_left );
  78.768 +
  78.769 +        weight_left -= sdom->weight;
  78.770 +
  78.771 +        /*
  78.772 +         * A domain's fair share is computed using its weight in competition
  78.773 +         * with that of all other active domains.
  78.774 +         *
  78.775 +         * At most, a domain can use credits to run all its active VCPUs
  78.776 +         * for one full accounting period. We allow a domain to earn more
  78.777 +         * only when the system-wide credit balance is negative.
  78.778 +         */
  78.779 +        credit_peak = sdom->active_vcpu_count * CSCHED_ACCT_PERIOD;
  78.780 +        if ( csched_priv.credit_balance < 0 )
  78.781 +        {
  78.782 +            credit_peak += ( ( -csched_priv.credit_balance * sdom->weight) +
  78.783 +                             (weight_total - 1)
  78.784 +                           ) / weight_total;
  78.785 +        }
  78.786 +        if ( sdom->cap != 0U )
  78.787 +        {
  78.788 +            uint32_t credit_cap = ((sdom->cap * CSCHED_ACCT_PERIOD) + 99) / 100;
  78.789 +            if ( credit_cap < credit_peak )
  78.790 +                credit_peak = credit_cap;
  78.791 +        }
  78.792 +
  78.793 +        credit_fair = ( ( credit_total * sdom->weight) + (weight_total - 1)
  78.794 +                      ) / weight_total;
  78.795 +
  78.796 +        if ( credit_fair < credit_peak )
  78.797 +        {
  78.798 +            credit_xtra = 1;
  78.799 +        }
  78.800 +        else
  78.801 +        {
  78.802 +            if ( weight_left != 0U )
  78.803 +            {
  78.804 +                /* Give other domains a chance at unused credits */
  78.805 +                credit_total += ( ( ( credit_fair - credit_peak
  78.806 +                                    ) * weight_total
  78.807 +                                  ) + ( weight_left - 1 )
  78.808 +                                ) / weight_left;
  78.809 +            }
  78.810 +
  78.811 +            if ( credit_xtra )
  78.812 +            {
  78.813 +                /*
  78.814 +                 * Lazily keep domains with extra credits at the head of
  78.815 +                 * the queue to give others a chance at them in future
  78.816 +                 * accounting periods.
  78.817 +                 */
  78.818 +                CSCHED_STAT_CRANK(acct_reorder);
  78.819 +                list_del(&sdom->active_sdom_elem);
  78.820 +                list_add(&sdom->active_sdom_elem, &csched_priv.active_sdom);
  78.821 +            }
  78.822 +
  78.823 +            credit_fair = credit_peak;
  78.824 +        }
  78.825 +
  78.826 +        /* Compute fair share per VCPU */
  78.827 +        credit_fair = ( credit_fair + ( sdom->active_vcpu_count - 1 )
  78.828 +                      ) / sdom->active_vcpu_count;
  78.829 +
  78.830 +
  78.831 +        list_for_each_safe( iter_vcpu, next_vcpu, &sdom->active_vcpu )
  78.832 +        {
  78.833 +            svc = list_entry(iter_vcpu, struct csched_vcpu, active_vcpu_elem);
  78.834 +            BUG_ON( sdom != svc->sdom );
  78.835 +
  78.836 +            /* Increment credit */
  78.837 +            atomic_add(credit_fair, &svc->credit);
  78.838 +            credit = atomic_read(&svc->credit);
  78.839 +
  78.840 +            /*
  78.841 +             * Recompute priority or, if VCPU is idling, remove it from
  78.842 +             * the active list.
  78.843 +             */
  78.844 +            if ( credit < 0 )
  78.845 +            {
  78.846 +                if ( sdom->cap == 0U )
  78.847 +                    svc->pri = CSCHED_PRI_TS_OVER;
  78.848 +                else
  78.849 +                    svc->pri = CSCHED_PRI_TS_PARKED;
  78.850 +
  78.851 +                if ( credit < -CSCHED_TSLICE )
  78.852 +                {
  78.853 +                    CSCHED_STAT_CRANK(acct_min_credit);
  78.854 +                    credit = -CSCHED_TSLICE;
  78.855 +                    atomic_set(&svc->credit, credit);
  78.856 +                }
  78.857 +            }
  78.858 +            else
  78.859 +            {
  78.860 +                svc->pri = CSCHED_PRI_TS_UNDER;
  78.861 +
  78.862 +                if ( credit > CSCHED_TSLICE )
  78.863 +                    __csched_vcpu_acct_idle_locked(svc);
  78.864 +            }
  78.865 +
  78.866 +            svc->credit_last = credit;
  78.867 +            svc->credit_incr = credit_fair;
  78.868 +            credit_balance += credit;
  78.869 +        }
  78.870 +    }
  78.871 +
  78.872 +    csched_priv.credit_balance = credit_balance;
  78.873 +
  78.874 +    spin_unlock_irqrestore(&csched_priv.lock, flags);
  78.875 +
  78.876 +    /* Inform each CPU that its runq needs to be sorted */
  78.877 +    csched_priv.runq_sort++;
  78.878 +}
  78.879 +
  78.880 +static void
  78.881 +csched_tick(unsigned int cpu)
  78.882 +{
  78.883 +    struct csched_vcpu * const svc = CSCHED_VCPU(current);
  78.884 +    struct csched_dom * const sdom = svc->sdom;
  78.885 +
  78.886 +    /*
  78.887 +     * Accounting for running VCPU
  78.888 +     *
  78.889 +     * Note: Some VCPUs, such as the idle tasks, are not credit scheduled.
  78.890 +     */
  78.891 +    if ( likely(sdom != NULL) )
  78.892 +    {
  78.893 +        csched_vcpu_acct(svc, CSCHED_TICK);
  78.894 +    }
  78.895 +
  78.896 +    /*
  78.897 +     * Accounting duty
  78.898 +     *
  78.899 +     * Note: Currently, this is always done by the master boot CPU. Eventually,
  78.900 +     * we could distribute or at the very least cycle the duty.
  78.901 +     */
  78.902 +    if ( (csched_priv.master == cpu) &&
  78.903 +         (schedule_data[cpu].tick % CSCHED_ACCT_NTICKS) == 0 )
  78.904 +    {
  78.905 +        csched_acct();
  78.906 +    }
  78.907 +
  78.908 +    /*
  78.909 +     * Check if runq needs to be sorted
  78.910 +     *
  78.911 +     * Every physical CPU resorts the runq after the accounting master has
  78.912 +     * modified priorities. This is a special O(n) sort and runs at most
  78.913 +     * once per accounting period (currently 30 milliseconds).
  78.914 +     */
  78.915 +    csched_runq_sort(cpu);
  78.916 +}
  78.917 +
  78.918 +static struct csched_vcpu *
  78.919 +csched_runq_steal(struct csched_pcpu *spc, int cpu, int pri)
  78.920 +{
  78.921 +    struct list_head *iter;
  78.922 +    struct csched_vcpu *speer;
  78.923 +    struct vcpu *vc;
  78.924 +
  78.925 +    list_for_each( iter, &spc->runq )
  78.926 +    {
  78.927 +        speer = __runq_elem(iter);
  78.928 +
  78.929 +        /*
  78.930 +         * If next available VCPU here is not of higher priority than ours,
  78.931 +         * this PCPU is useless to us.
  78.932 +         */
  78.933 +        if ( speer->pri <= CSCHED_PRI_IDLE || speer->pri <= pri )
  78.934 +        {
  78.935 +            CSCHED_STAT_CRANK(steal_peer_idle);
  78.936 +            break;
  78.937 +        }
  78.938 +
  78.939 +        /* Is this VCPU is runnable on our PCPU? */
  78.940 +        vc = speer->vcpu;
  78.941 +        BUG_ON( is_idle_vcpu(vc) );
  78.942 +
  78.943 +        if ( __csched_vcpu_is_stealable(cpu, vc) )
  78.944 +        {
  78.945 +            /* We got a candidate. Grab it! */
  78.946 +            __runq_remove(speer);
  78.947 +            vc->processor = cpu;
  78.948 +
  78.949 +            return speer;
  78.950 +        }
  78.951 +    }
  78.952 +
  78.953 +    return NULL;
  78.954 +}
  78.955 +
  78.956 +static struct csched_vcpu *
  78.957 +csched_load_balance(int cpu, struct csched_vcpu *snext)
  78.958 +{
  78.959 +    struct csched_pcpu *spc;
  78.960 +    struct csched_vcpu *speer;
  78.961 +    int peer_cpu;
  78.962 +
  78.963 +    if ( snext->pri == CSCHED_PRI_IDLE )
  78.964 +        CSCHED_STAT_CRANK(load_balance_idle);
  78.965 +    else if ( snext->pri == CSCHED_PRI_TS_OVER )
  78.966 +        CSCHED_STAT_CRANK(load_balance_over);
  78.967 +    else
  78.968 +        CSCHED_STAT_CRANK(load_balance_other);
  78.969 +
  78.970 +    peer_cpu = cpu;
  78.971 +    BUG_ON( peer_cpu != snext->vcpu->processor );
  78.972 +
  78.973 +    while ( 1 )
  78.974 +    {
  78.975 +        /* For each PCPU in the system starting with our neighbour... */
  78.976 +        peer_cpu = (peer_cpu + 1) % csched_priv.ncpus;
  78.977 +        if ( peer_cpu == cpu )
  78.978 +            break;
  78.979 +
  78.980 +        BUG_ON( peer_cpu >= csched_priv.ncpus );
  78.981 +        BUG_ON( peer_cpu == cpu );
  78.982 +
  78.983 +        /*
  78.984 +         * Get ahold of the scheduler lock for this peer CPU.
  78.985 +         *
  78.986 +         * Note: We don't spin on this lock but simply try it. Spinning could
  78.987 +         * cause a deadlock if the peer CPU is also load balancing and trying
  78.988 +         * to lock this CPU.
  78.989 +         */
  78.990 +        if ( spin_trylock(&schedule_data[peer_cpu].schedule_lock) )
  78.991 +        {
  78.992 +
  78.993 +            spc = CSCHED_PCPU(peer_cpu);
  78.994 +            if ( unlikely(spc == NULL) )
  78.995 +            {
  78.996 +                CSCHED_STAT_CRANK(steal_peer_down);
  78.997 +                speer = NULL;
  78.998 +            }
  78.999 +            else
 78.1000 +            {
 78.1001 +                speer = csched_runq_steal(spc, cpu, snext->pri);
 78.1002 +            }
 78.1003 +
 78.1004 +            spin_unlock(&schedule_data[peer_cpu].schedule_lock);
 78.1005 +
 78.1006 +            /* Got one! */
 78.1007 +            if ( speer )
 78.1008 +            {
 78.1009 +                CSCHED_STAT_CRANK(vcpu_migrate);
 78.1010 +                return speer;
 78.1011 +            }
 78.1012 +        }
 78.1013 +        else
 78.1014 +        {
 78.1015 +            CSCHED_STAT_CRANK(steal_trylock_failed);
 78.1016 +        }
 78.1017 +    }
 78.1018 +
 78.1019 +
 78.1020 +    /* Failed to find more important work */
 78.1021 +    __runq_remove(snext);
 78.1022 +    return snext;
 78.1023 +}
 78.1024 +
 78.1025 +/*
 78.1026 + * This function is in the critical path. It is designed to be simple and
 78.1027 + * fast for the common case.
 78.1028 + */
 78.1029 +static struct task_slice
 78.1030 +csched_schedule(s_time_t now)
 78.1031 +{
 78.1032 +    const int cpu = smp_processor_id();
 78.1033 +    struct list_head * const runq = RUNQ(cpu);
 78.1034 +    struct csched_vcpu * const scurr = CSCHED_VCPU(current);
 78.1035 +    struct csched_vcpu *snext;
 78.1036 +    struct task_slice ret;
 78.1037 +
 78.1038 +    CSCHED_STAT_CRANK(schedule);
 78.1039 +    CSCHED_VCPU_CHECK(current);
 78.1040 +
 78.1041 +    /*
 78.1042 +     * Select next runnable local VCPU (ie top of local runq)
 78.1043 +     */
 78.1044 +    if ( vcpu_runnable(current) )
 78.1045 +        __runq_insert(cpu, scurr);
 78.1046 +    else
 78.1047 +        BUG_ON( is_idle_vcpu(current) || list_empty(runq) );
 78.1048 +
 78.1049 +    snext = __runq_elem(runq->next);
 78.1050 +
 78.1051 +    /*
 78.1052 +     * SMP Load balance:
 78.1053 +     *
 78.1054 +     * If the next highest priority local runnable VCPU has already eaten
 78.1055 +     * through its credits, look on other PCPUs to see if we have more
 78.1056 +     * urgent work... If not, csched_load_balance() will return snext, but
 78.1057 +     * already removed from the runq.
 78.1058 +     */
 78.1059 +    if ( snext->pri > CSCHED_PRI_TS_OVER )
 78.1060 +        __runq_remove(snext);
 78.1061 +    else
 78.1062 +        snext = csched_load_balance(cpu, snext);
 78.1063 +
 78.1064 +    /*
 78.1065 +     * Update idlers mask if necessary. When we're idling, other CPUs
 78.1066 +     * will tickle us when they get extra work.
 78.1067 +     */
 78.1068 +    if ( snext->pri == CSCHED_PRI_IDLE )
 78.1069 +    {
 78.1070 +        if ( !cpu_isset(cpu, csched_priv.idlers) )
 78.1071 +            cpu_set(cpu, csched_priv.idlers);
 78.1072 +    }
 78.1073 +    else if ( cpu_isset(cpu, csched_priv.idlers) )
 78.1074 +    {
 78.1075 +        cpu_clear(cpu, csched_priv.idlers);
 78.1076 +    }
 78.1077 +
 78.1078 +    /*
 78.1079 +     * Return task to run next...
 78.1080 +     */
 78.1081 +    ret.time = MILLISECS(CSCHED_TSLICE);
 78.1082 +    ret.task = snext->vcpu;
 78.1083 +
 78.1084 +    CSCHED_VCPU_CHECK(ret.task);
 78.1085 +    BUG_ON( !vcpu_runnable(ret.task) );
 78.1086 +
 78.1087 +    return ret;
 78.1088 +}
 78.1089 +
 78.1090 +static void
 78.1091 +csched_dump_vcpu(struct csched_vcpu *svc)
 78.1092 +{
 78.1093 +    struct csched_dom * const sdom = svc->sdom;
 78.1094 +
 78.1095 +    printk("[%i.%i] pri=%i cpu=%i",
 78.1096 +            svc->vcpu->domain->domain_id,
 78.1097 +            svc->vcpu->vcpu_id,
 78.1098 +            svc->pri,
 78.1099 +            svc->vcpu->processor);
 78.1100 +
 78.1101 +    if ( sdom )
 78.1102 +    {
 78.1103 +        printk(" credit=%i (%d+%u) {a=%u i=%u w=%u}",
 78.1104 +            atomic_read(&svc->credit),
 78.1105 +            svc->credit_last,
 78.1106 +            svc->credit_incr,
 78.1107 +            svc->state_active,
 78.1108 +            svc->state_idle,
 78.1109 +            sdom->weight);
 78.1110 +    }
 78.1111 +
 78.1112 +    printk("\n");
 78.1113 +}
 78.1114 +
 78.1115 +static void
 78.1116 +csched_dump_pcpu(int cpu)
 78.1117 +{
 78.1118 +    struct list_head *runq, *iter;
 78.1119 +    struct csched_pcpu *spc;
 78.1120 +    struct csched_vcpu *svc;
 78.1121 +    int loop;
 78.1122 +
 78.1123 +    spc = CSCHED_PCPU(cpu);
 78.1124 +    runq = &spc->runq;
 78.1125 +
 78.1126 +    printk(" tick=%lu, sort=%d\n",
 78.1127 +            schedule_data[cpu].tick,
 78.1128 +            spc->runq_sort_last);
 78.1129 +
 78.1130 +    /* current VCPU */
 78.1131 +    svc = CSCHED_VCPU(schedule_data[cpu].curr);
 78.1132 +    if ( svc )
 78.1133 +    {
 78.1134 +        printk("\trun: ");
 78.1135 +        csched_dump_vcpu(svc);
 78.1136 +    }
 78.1137 +
 78.1138 +    loop = 0;
 78.1139 +    list_for_each( iter, runq )
 78.1140 +    {
 78.1141 +        svc = __runq_elem(iter);
 78.1142 +        if ( svc )
 78.1143 +        {
 78.1144 +            printk("\t%3d: ", ++loop);
 78.1145 +            csched_dump_vcpu(svc);
 78.1146 +        }
 78.1147 +    }
 78.1148 +}
 78.1149 +
 78.1150 +static void
 78.1151 +csched_dump(void)
 78.1152 +{
 78.1153 +    struct list_head *iter_sdom, *iter_svc;
 78.1154 +    int loop;
 78.1155 +
 78.1156 +    printk("info:\n"
 78.1157 +           "\tncpus              = %u\n"
 78.1158 +           "\tmaster             = %u\n"
 78.1159 +           "\tcredit             = %u\n"
 78.1160 +           "\tcredit balance     = %d\n"
 78.1161 +           "\tweight             = %u\n"
 78.1162 +           "\trunq_sort          = %u\n"
 78.1163 +           "\ttick               = %dms\n"
 78.1164 +           "\ttslice             = %dms\n"
 78.1165 +           "\taccounting period  = %dms\n"
 78.1166 +           "\tdefault-weight     = %d\n",
 78.1167 +           csched_priv.ncpus,
 78.1168 +           csched_priv.master,
 78.1169 +           csched_priv.credit,
 78.1170 +           csched_priv.credit_balance,
 78.1171 +           csched_priv.weight,
 78.1172 +           csched_priv.runq_sort,
 78.1173 +           CSCHED_TICK,
 78.1174 +           CSCHED_TSLICE,
 78.1175 +           CSCHED_ACCT_PERIOD,
 78.1176 +           CSCHED_DEFAULT_WEIGHT);
 78.1177 +
 78.1178 +    printk("idlers: 0x%lx\n", csched_priv.idlers.bits[0]);
 78.1179 +
 78.1180 +    CSCHED_STATS_PRINTK();
 78.1181 +
 78.1182 +    printk("active vcpus:\n");
 78.1183 +    loop = 0;
 78.1184 +    list_for_each( iter_sdom, &csched_priv.active_sdom )
 78.1185 +    {
 78.1186 +        struct csched_dom *sdom;
 78.1187 +        sdom = list_entry(iter_sdom, struct csched_dom, active_sdom_elem);
 78.1188 +
 78.1189 +        list_for_each( iter_svc, &sdom->active_vcpu )
 78.1190 +        {
 78.1191 +            struct csched_vcpu *svc;
 78.1192 +            svc = list_entry(iter_svc, struct csched_vcpu, active_vcpu_elem);
 78.1193 +
 78.1194 +            printk("\t%3d: ", ++loop);
 78.1195 +            csched_dump_vcpu(svc);
 78.1196 +        }
 78.1197 +    }
 78.1198 +}
 78.1199 +
 78.1200 +static void
 78.1201 +csched_init(void)
 78.1202 +{
 78.1203 +    spin_lock_init(&csched_priv.lock);
 78.1204 +    INIT_LIST_HEAD(&csched_priv.active_sdom);
 78.1205 +    csched_priv.ncpus = 0;
 78.1206 +    csched_priv.master = UINT_MAX;
 78.1207 +    cpus_clear(csched_priv.idlers);
 78.1208 +    csched_priv.weight = 0U;
 78.1209 +    csched_priv.credit = 0U;
 78.1210 +    csched_priv.credit_balance = 0;
 78.1211 +    csched_priv.runq_sort = 0U;
 78.1212 +    CSCHED_STATS_RESET();
 78.1213 +}
 78.1214 +
 78.1215 +
 78.1216 +struct scheduler sched_credit_def = {
 78.1217 +    .name           = "SMP Credit Scheduler",
 78.1218 +    .opt_name       = "credit",
 78.1219 +    .sched_id       = SCHED_CREDIT,
 78.1220 +
 78.1221 +    .alloc_task     = csched_vcpu_alloc,
 78.1222 +    .add_task       = csched_vcpu_add,
 78.1223 +    .sleep          = csched_vcpu_sleep,
 78.1224 +    .wake           = csched_vcpu_wake,
 78.1225 +    .set_affinity   = csched_vcpu_set_affinity,
 78.1226 +
 78.1227 +    .adjdom         = csched_dom_cntl,
 78.1228 +    .free_task      = csched_dom_free,
 78.1229 +
 78.1230 +    .tick           = csched_tick,
 78.1231 +    .do_schedule    = csched_schedule,
 78.1232 +
 78.1233 +    .dump_cpu_state = csched_dump_pcpu,
 78.1234 +    .dump_settings  = csched_dump,
 78.1235 +    .init           = csched_init,
 78.1236 +};
    79.1 --- a/xen/common/schedule.c	Thu May 25 15:59:18 2006 -0600
    79.2 +++ b/xen/common/schedule.c	Fri May 26 13:41:49 2006 -0600
    79.3 @@ -50,9 +50,11 @@ struct schedule_data schedule_data[NR_CP
    79.4  
    79.5  extern struct scheduler sched_bvt_def;
    79.6  extern struct scheduler sched_sedf_def;
    79.7 +extern struct scheduler sched_credit_def;
    79.8  static struct scheduler *schedulers[] = { 
    79.9      &sched_bvt_def,
   79.10      &sched_sedf_def,
   79.11 +    &sched_credit_def,
   79.12      NULL
   79.13  };
   79.14  
   79.15 @@ -639,6 +641,8 @@ static void t_timer_fn(void *unused)
   79.16  
   79.17      page_scrub_schedule_work();
   79.18  
   79.19 +    SCHED_OP(tick, cpu);
   79.20 +
   79.21      set_timer(&t_timer[cpu], NOW() + MILLISECS(10));
   79.22  }
   79.23  
   79.24 @@ -681,6 +685,7 @@ void __init scheduler_init(void)
   79.25          printk("Could not find scheduler: %s\n", opt_sched);
   79.26  
   79.27      printk("Using scheduler: %s (%s)\n", ops.name, ops.opt_name);
   79.28 +    SCHED_OP(init);
   79.29  
   79.30      if ( idle_vcpu[0] != NULL )
   79.31      {
    80.1 --- a/xen/common/trace.c	Thu May 25 15:59:18 2006 -0600
    80.2 +++ b/xen/common/trace.c	Fri May 26 13:41:49 2006 -0600
    80.3 @@ -91,6 +91,7 @@ static int alloc_trace_bufs(void)
    80.4      if ( (rawbuf = alloc_xenheap_pages(order)) == NULL )
    80.5      {
    80.6          printk("Xen trace buffers: memory allocation failed\n");
    80.7 +        opt_tbuf_size = 0;
    80.8          return -EINVAL;
    80.9      }
   80.10  
   80.11 @@ -135,10 +136,7 @@ static int tb_set_size(int size)
   80.12  
   80.13      opt_tbuf_size = size;
   80.14      if ( alloc_trace_bufs() != 0 )
   80.15 -    {
   80.16 -        opt_tbuf_size = 0;
   80.17          return -EINVAL;
   80.18 -    }
   80.19  
   80.20      printk("Xen trace buffers: initialized\n");
   80.21      return 0;
    81.1 --- a/xen/include/asm-x86/domain.h	Thu May 25 15:59:18 2006 -0600
    81.2 +++ b/xen/include/asm-x86/domain.h	Fri May 26 13:41:49 2006 -0600
    81.3 @@ -120,6 +120,18 @@ struct arch_vcpu
    81.4      struct vcpu_guest_context guest_context
    81.5      __attribute__((__aligned__(16)));
    81.6  
    81.7 +#ifdef CONFIG_X86_PAE
    81.8 +    /*
    81.9 +     * Two low-memory (<4GB) PAE L3 tables, used as fallback when the guest
   81.10 +     * supplies a >=4GB PAE L3 table. We need two because we cannot set up
   81.11 +     * an L3 table while we are currently running on it (without using
   81.12 +     * expensive atomic 64-bit operations).
   81.13 +     */
   81.14 +    l3_pgentry_t  lowmem_l3tab[2][4] __attribute__((__aligned__(32)));
   81.15 +    unsigned long lowmem_l3tab_high_mfn[2]; /* The >=4GB MFN being shadowed. */
   81.16 +    unsigned int  lowmem_l3tab_inuse;       /* Which lowmem_l3tab is in use? */
   81.17 +#endif
   81.18 +
   81.19      unsigned long      flags; /* TF_ */
   81.20  
   81.21      void (*schedule_tail) (struct vcpu *);
    82.1 --- a/xen/include/asm-x86/hvm/domain.h	Thu May 25 15:59:18 2006 -0600
    82.2 +++ b/xen/include/asm-x86/hvm/domain.h	Fri May 26 13:41:49 2006 -0600
    82.3 @@ -35,9 +35,9 @@ struct hvm_domain {
    82.4      unsigned int           nr_vcpus;
    82.5      unsigned int           apic_enabled;
    82.6      unsigned int           pae_enabled;
    82.7 -
    82.8 -    struct hvm_virpit      vpit;
    82.9 -    u64                    guest_time;
   82.10 +    s64                    tsc_frequency;
   82.11 +    struct pl_time         pl_time;
   82.12 +    
   82.13      struct hvm_virpic      vpic;
   82.14      struct hvm_vioapic     vioapic;
   82.15      struct hvm_io_handler  io_handler;
    83.1 --- a/xen/include/asm-x86/hvm/svm/intr.h	Thu May 25 15:59:18 2006 -0600
    83.2 +++ b/xen/include/asm-x86/hvm/svm/intr.h	Fri May 26 13:41:49 2006 -0600
    83.3 @@ -21,7 +21,6 @@
    83.4  #ifndef __ASM_X86_HVM_SVM_INTR_H__
    83.5  #define __ASM_X86_HVM_SVM_INTR_H__
    83.6  
    83.7 -extern void svm_set_tsc_shift(struct vcpu *v, struct hvm_virpit *vpit);
    83.8  extern void svm_intr_assist(void);
    83.9  extern void svm_intr_assist_update(struct vcpu *v, int highest_vector);
   83.10  extern void svm_intr_assist_test_valid(struct vcpu *v, 
    84.1 --- a/xen/include/asm-x86/hvm/svm/svm.h	Thu May 25 15:59:18 2006 -0600
    84.2 +++ b/xen/include/asm-x86/hvm/svm/svm.h	Fri May 26 13:41:49 2006 -0600
    84.3 @@ -48,7 +48,6 @@ extern void svm_stts(struct vcpu *v);
    84.4  extern void svm_do_launch(struct vcpu *v);
    84.5  extern void svm_do_resume(struct vcpu *v);
    84.6  extern void svm_set_guest_time(struct vcpu *v, u64 gtime);
    84.7 -extern u64 svm_get_guest_time(struct vcpu *v);
    84.8  extern void arch_svm_do_resume(struct vcpu *v);
    84.9  extern int load_vmcb(struct arch_svm_struct *arch_svm, u64 phys_hsa);
   84.10  /* For debugging. Remove when no longer needed. */
    85.1 --- a/xen/include/asm-x86/hvm/vcpu.h	Thu May 25 15:59:18 2006 -0600
    85.2 +++ b/xen/include/asm-x86/hvm/vcpu.h	Fri May 26 13:41:49 2006 -0600
    85.3 @@ -32,6 +32,9 @@ struct hvm_vcpu {
    85.4      unsigned long   ioflags;
    85.5      struct mmio_op  mmio_op;
    85.6      struct vlapic   *vlapic;
    85.7 +    s64             cache_tsc_offset;
    85.8 +    u64             guest_time;
    85.9 +
   85.10      /* For AP startup */
   85.11      unsigned long   init_sipi_sipi_state;
   85.12  
    86.1 --- a/xen/include/asm-x86/hvm/vmx/vmx.h	Thu May 25 15:59:18 2006 -0600
    86.2 +++ b/xen/include/asm-x86/hvm/vmx/vmx.h	Fri May 26 13:41:49 2006 -0600
    86.3 @@ -34,7 +34,6 @@ extern void vmx_migrate_timers(struct vc
    86.4  extern void arch_vmx_do_launch(struct vcpu *);
    86.5  extern void arch_vmx_do_resume(struct vcpu *);
    86.6  extern void set_guest_time(struct vcpu *v, u64 gtime);
    86.7 -extern u64  get_guest_time(struct vcpu *v);
    86.8  
    86.9  extern unsigned int cpu_rev;
   86.10  
    87.1 --- a/xen/include/asm-x86/hvm/vpit.h	Thu May 25 15:59:18 2006 -0600
    87.2 +++ b/xen/include/asm-x86/hvm/vpit.h	Fri May 26 13:41:49 2006 -0600
    87.3 @@ -29,9 +29,7 @@
    87.4  #include <asm/hvm/vpic.h>
    87.5  
    87.6  #define PIT_FREQ 1193181
    87.7 -
    87.8 -#define PIT_BASE 0x40
    87.9 -#define HVM_PIT_ACCEL_MODE 2
   87.10 +#define PIT_BASE        0x40
   87.11  
   87.12  typedef struct PITChannelState {
   87.13      int count; /* can be 65536 */
   87.14 @@ -48,47 +46,56 @@ typedef struct PITChannelState {
   87.15      u8 gate; /* timer start */
   87.16      s64 count_load_time;
   87.17      /* irq handling */
   87.18 -    s64 next_transition_time;
   87.19 -    int irq;
   87.20 -    struct hvm_time_info *hvm_time;
   87.21 -    u32 period; /* period(ns) based on count */
   87.22 +    struct vcpu      *vcpu;
   87.23 +    struct periodic_time *pt;
   87.24  } PITChannelState;
   87.25 -
   87.26 -struct hvm_time_info {
   87.27 -    /* extra info for the mode 2 channel */
   87.28 -    struct timer pit_timer;
   87.29 -    struct vcpu *vcpu;          /* which vcpu the ac_timer bound to */
   87.30 -    u64 period_cycles;          /* pit frequency in cpu cycles */
   87.31 -    s_time_t count_advance;     /* accumulated count advance since last fire */
   87.32 -    s_time_t count_point;        /* last point accumulating count advance */
   87.33 -    unsigned int pending_intr_nr; /* the couner for pending timer interrupts */
   87.34 -    int first_injected;         /* flag to prevent shadow window */
   87.35 -    s64 cache_tsc_offset;       /* cache of VMCS TSC_OFFSET offset */
   87.36 -    u64 last_pit_gtime;         /* guest time when last pit is injected */
   87.37 +   
   87.38 +/*
   87.39 + * Abstract layer of periodic time, one short time.
   87.40 + */
   87.41 +struct periodic_time {
   87.42 +    char enabled;               /* enabled */
   87.43 +    char one_shot;              /* one shot time */
   87.44 +    char irq;
   87.45 +    char first_injected;        /* flag to prevent shadow window */
   87.46 +    u32 pending_intr_nr;        /* the couner for pending timer interrupts */
   87.47 +    u32 period;                 /* frequency in ns */
   87.48 +    u64 period_cycles;          /* frequency in cpu cycles */
   87.49 +    s_time_t scheduled;         /* scheduled timer interrupt */
   87.50 +    u64 last_plt_gtime;         /* platform time when last IRQ is injected */
   87.51 +    struct timer timer;         /* ac_timer */
   87.52  };
   87.53  
   87.54 -typedef struct hvm_virpit {
   87.55 +typedef struct PITState {
   87.56      PITChannelState channels[3];
   87.57 -    struct hvm_time_info time_info;
   87.58      int speaker_data_on;
   87.59      int dummy_refresh_clock;
   87.60 -}hvm_virpit;
   87.61 -
   87.62 +} PITState;
   87.63  
   87.64 -static __inline__ s_time_t get_pit_scheduled(
   87.65 -    struct vcpu *v,
   87.66 -    struct hvm_virpit *vpit)
   87.67 +struct pl_time {    /* platform time */
   87.68 +    struct periodic_time periodic_tm;
   87.69 +    struct PITState      vpit;
   87.70 +    /* TODO: RTC/ACPI time */
   87.71 +};
   87.72 +
   87.73 +static __inline__ s_time_t get_scheduled(
   87.74 +    struct vcpu *v, int irq,
   87.75 +    struct periodic_time *pt)
   87.76  {
   87.77 -    struct PITChannelState *s = &(vpit->channels[0]);
   87.78 -    if ( is_irq_enabled(v, 0) ) {
   87.79 -        return s->next_transition_time;
   87.80 +    if ( is_irq_enabled(v, irq) ) {
   87.81 +        return pt->scheduled;
   87.82      }
   87.83      else
   87.84          return -1;
   87.85  }
   87.86  
   87.87  /* to hook the ioreq packet to get the PIT initialization info */
   87.88 -extern void pit_init(struct hvm_virpit *pit, struct vcpu *v);
   87.89 -extern void pickup_deactive_ticks(struct hvm_virpit *vpit);
   87.90 +extern void hvm_hooks_assist(struct vcpu *v);
   87.91 +extern void pickup_deactive_ticks(struct periodic_time *vpit);
   87.92 +extern u64 hvm_get_guest_time(struct vcpu *v);
   87.93 +extern struct periodic_time *create_periodic_time(struct vcpu *v, u32 period, char irq, char one_shot);
   87.94 +extern void destroy_periodic_time(struct periodic_time *pt);
   87.95 +void pit_init(struct vcpu *v, unsigned long cpu_khz);
   87.96 +void pt_timer_fn(void *data);
   87.97  
   87.98  #endif /* __ASM_X86_HVM_VPIT_H__ */
    88.1 --- a/xen/include/asm-x86/string.h	Thu May 25 15:59:18 2006 -0600
    88.2 +++ b/xen/include/asm-x86/string.h	Fri May 26 13:41:49 2006 -0600
    88.3 @@ -3,152 +3,6 @@
    88.4  
    88.5  #include <xen/config.h>
    88.6  
    88.7 -#define __HAVE_ARCH_STRCPY
    88.8 -static inline char *strcpy(char *dest, const char *src)
    88.9 -{
   88.10 -    long d0, d1, d2;
   88.11 -    __asm__ __volatile__ (
   88.12 -        "1: lodsb          \n"
   88.13 -        "   stosb          \n"
   88.14 -        "   test %%al,%%al \n"
   88.15 -        "   jne  1b        \n"
   88.16 -        : "=&S" (d0), "=&D" (d1), "=&a" (d2)
   88.17 -        : "0" (src), "1" (dest) : "memory" );
   88.18 -    return dest;
   88.19 -}
   88.20 -
   88.21 -#define __HAVE_ARCH_STRNCPY
   88.22 -static inline char *strncpy(char *dest, const char *src, size_t count)
   88.23 -{
   88.24 -    long d0, d1, d2, d3;
   88.25 -    __asm__ __volatile__ (
   88.26 -        "1: dec  %2        \n"
   88.27 -        "   js   2f        \n"
   88.28 -        "   lodsb          \n"
   88.29 -        "   stosb          \n"
   88.30 -        "   test %%al,%%al \n"
   88.31 -        "   jne  1b        \n"
   88.32 -        "   rep ; stosb    \n"
   88.33 -        "2:                \n"
   88.34 -        : "=&S" (d0), "=&D" (d1), "=&c" (d2), "=&a" (d3)
   88.35 -        : "0" (src), "1" (dest), "2" (count) : "memory" );
   88.36 -    return dest;
   88.37 -}
   88.38 -
   88.39 -#define __HAVE_ARCH_STRCAT
   88.40 -static inline char *strcat(char *dest, const char *src)
   88.41 -{
   88.42 -    long d0, d1, d2, d3;
   88.43 -    __asm__ __volatile__ (
   88.44 -        "   repne ; scasb  \n"
   88.45 -        "   dec  %1        \n"
   88.46 -        "1: lodsb          \n"
   88.47 -        "   stosb          \n"
   88.48 -        "   test %%al,%%al \n"
   88.49 -        "   jne  1b        \n"
   88.50 -        : "=&S" (d0), "=&D" (d1), "=&a" (d2), "=&c" (d3)
   88.51 -        : "0" (src), "1" (dest), "2" (0UL), "3" (0xffffffffUL) : "memory" );
   88.52 -    return dest;
   88.53 -}
   88.54 -
   88.55 -#define __HAVE_ARCH_STRNCAT
   88.56 -static inline char *strncat(char *dest, const char *src, size_t count)
   88.57 -{
   88.58 -    long d0, d1, d2, d3;
   88.59 -    __asm__ __volatile__ (
   88.60 -        "   repne ; scasb   \n"
   88.61 -        "   dec  %1         \n"
   88.62 -        "   mov  %8,%3      \n"
   88.63 -        "1: dec  %3         \n"
   88.64 -        "   js   2f         \n"
   88.65 -        "   lodsb           \n"
   88.66 -        "   stosb           \n"
   88.67 -        "   test %%al,%%al  \n"
   88.68 -        "   jne  1b         \n"
   88.69 -        "2: xor  %%eax,%%eax\n"
   88.70 -        "   stosb"
   88.71 -        : "=&S" (d0), "=&D" (d1), "=&a" (d2), "=&c" (d3)
   88.72 -        : "0" (src), "1" (dest), "2" (0UL), "3" (0xffffffffUL), "g" (count)
   88.73 -        : "memory" );
   88.74 -    return dest;
   88.75 -}
   88.76 -
   88.77 -#define __HAVE_ARCH_STRCMP
   88.78 -static inline int strcmp(const char *cs, const char *ct)
   88.79 -{
   88.80 -    long d0, d1;
   88.81 -    register int __res;
   88.82 -    __asm__ __volatile__ (
   88.83 -        "1: lodsb           \n"
   88.84 -        "   scasb           \n"
   88.85 -        "   jne  2f         \n"
   88.86 -        "   test %%al,%%al  \n"
   88.87 -        "   jne  1b         \n"
   88.88 -        "   xor  %%eax,%%eax\n"
   88.89 -        "   jmp  3f         \n"
   88.90 -        "2: sbb  %%eax,%%eax\n"
   88.91 -        "   or   $1,%%al    \n"
   88.92 -        "3:                 \n"
   88.93 -        : "=a" (__res), "=&S" (d0), "=&D" (d1)
   88.94 -        : "1" (cs), "2" (ct) );
   88.95 -    return __res;
   88.96 -}
   88.97 -
   88.98 -#define __HAVE_ARCH_STRNCMP
   88.99 -static inline int strncmp(const char *cs, const char *ct, size_t count)
  88.100 -{
  88.101 -    long d0, d1, d2;
  88.102 -    register int __res;
  88.103 -    __asm__ __volatile__ (
  88.104 -        "1: dec  %3         \n"
  88.105 -        "   js   2f         \n"
  88.106 -        "   lodsb           \n"
  88.107 -        "   scasb           \n"
  88.108 -        "   jne  3f         \n"
  88.109 -        "   test %%al,%%al  \n"
  88.110 -        "   jne  1b         \n"
  88.111 -        "2: xor  %%eax,%%eax\n"
  88.112 -        "   jmp  4f         \n"
  88.113 -        "3: sbb  %%eax,%%eax\n"
  88.114 -        "   or   $1,%%al    \n"
  88.115 -        "4:                 \n"
  88.116 -        : "=a" (__res), "=&S" (d0), "=&D" (d1), "=&c" (d2)
  88.117 -        : "1" (cs), "2" (ct), "3" (count) );
  88.118 -    return __res;
  88.119 -}
  88.120 -
  88.121 -#define __HAVE_ARCH_STRCHR
  88.122 -static inline char *strchr(const char *s, int c)
  88.123 -{
  88.124 -    long d0;
  88.125 -    register char *__res;
  88.126 -    __asm__ __volatile__ (
  88.127 -        "   mov  %%al,%%ah  \n"
  88.128 -        "1: lodsb           \n"
  88.129 -        "   cmp  %%ah,%%al  \n"
  88.130 -        "   je   2f         \n"
  88.131 -        "   test %%al,%%al  \n"
  88.132 -        "   jne  1b         \n"
  88.133 -        "   mov  $1,%1      \n"
  88.134 -        "2: mov  %1,%0      \n"
  88.135 -        "   dec  %0         \n"
  88.136 -        : "=a" (__res), "=&S" (d0) : "1" (s), "0" (c) );
  88.137 -    return __res;
  88.138 -}
  88.139 -
  88.140 -#define __HAVE_ARCH_STRLEN
  88.141 -static inline size_t strlen(const char *s)
  88.142 -{
  88.143 -    long d0;
  88.144 -    register int __res;
  88.145 -    __asm__ __volatile__ (
  88.146 -        "   repne ; scasb  \n"
  88.147 -        "   notl %0        \n"
  88.148 -        "   decl %0        \n"
  88.149 -        : "=c" (__res), "=&D" (d0) : "1" (s), "a" (0), "0" (0xffffffffUL) );
  88.150 -    return __res;
  88.151 -}
  88.152 -
  88.153  static inline void *__variable_memcpy(void *to, const void *from, size_t n)
  88.154  {
  88.155      long d0, d1, d2;
  88.156 @@ -258,22 +112,6 @@ extern void *memmove(void *dest, const v
  88.157  #define __HAVE_ARCH_MEMCMP
  88.158  #define memcmp __builtin_memcmp
  88.159  
  88.160 -#define __HAVE_ARCH_MEMCHR
  88.161 -static inline void *memchr(const void *cs, int c, size_t count)
  88.162 -{
  88.163 -    long d0;
  88.164 -    register void *__res;
  88.165 -    if ( count == 0 )
  88.166 -        return NULL;
  88.167 -    __asm__ __volatile__ (
  88.168 -        "   repne ; scasb\n"
  88.169 -        "   je   1f      \n"
  88.170 -        "   mov  $1,%0   \n"
  88.171 -        "1: dec  %0      \n"
  88.172 -        : "=D" (__res), "=&c" (d0) : "a" (c), "0" (cs), "1" (count) );
  88.173 -    return __res;
  88.174 -}
  88.175 -
  88.176  static inline void *__memset_generic(void *s, char c, size_t count)
  88.177  {
  88.178      long d0, d1;
    89.1 --- a/xen/include/asm-x86/x86_emulate.h	Thu May 25 15:59:18 2006 -0600
    89.2 +++ b/xen/include/asm-x86/x86_emulate.h	Fri May 26 13:41:49 2006 -0600
    89.3 @@ -9,8 +9,10 @@
    89.4  #ifndef __X86_EMULATE_H__
    89.5  #define __X86_EMULATE_H__
    89.6  
    89.7 +struct x86_emulate_ctxt;
    89.8 +
    89.9  /*
   89.10 - * x86_mem_emulator:
   89.11 + * x86_emulate_ops:
   89.12   * 
   89.13   * These operations represent the instruction emulator's interface to memory.
   89.14   * There are two categories of operation: those that act on ordinary memory
   89.15 @@ -47,7 +49,7 @@
   89.16  #define X86EMUL_PROPAGATE_FAULT 2 /* propagate a generated fault to guest */
   89.17  #define X86EMUL_RETRY_INSTR     2 /* retry the instruction for some reason */
   89.18  #define X86EMUL_CMPXCHG_FAILED  2 /* cmpxchg did not see expected value */
   89.19 -struct x86_mem_emulator
   89.20 +struct x86_emulate_ops
   89.21  {
   89.22      /*
   89.23       * read_std: Read bytes of standard (non-emulated/special) memory.
   89.24 @@ -59,7 +61,8 @@ struct x86_mem_emulator
   89.25      int (*read_std)(
   89.26          unsigned long addr,
   89.27          unsigned long *val,
   89.28 -        unsigned int bytes);
   89.29 +        unsigned int bytes,
   89.30 +        struct x86_emulate_ctxt *ctxt);
   89.31  
   89.32      /*
   89.33       * write_std: Write bytes of standard (non-emulated/special) memory.
   89.34 @@ -71,7 +74,8 @@ struct x86_mem_emulator
   89.35      int (*write_std)(
   89.36          unsigned long addr,
   89.37          unsigned long val,
   89.38 -        unsigned int bytes);
   89.39 +        unsigned int bytes,
   89.40 +        struct x86_emulate_ctxt *ctxt);
   89.41  
   89.42      /*
   89.43       * read_emulated: Read bytes from emulated/special memory area.
   89.44 @@ -82,7 +86,8 @@ struct x86_mem_emulator
   89.45      int (*read_emulated)(
   89.46          unsigned long addr,
   89.47          unsigned long *val,
   89.48 -        unsigned int bytes);
   89.49 +        unsigned int bytes,
   89.50 +        struct x86_emulate_ctxt *ctxt);
   89.51  
   89.52      /*
   89.53       * write_emulated: Read bytes from emulated/special memory area.
   89.54 @@ -93,7 +98,8 @@ struct x86_mem_emulator
   89.55      int (*write_emulated)(
   89.56          unsigned long addr,
   89.57          unsigned long val,
   89.58 -        unsigned int bytes);
   89.59 +        unsigned int bytes,
   89.60 +        struct x86_emulate_ctxt *ctxt);
   89.61  
   89.62      /*
   89.63       * cmpxchg_emulated: Emulate an atomic (LOCKed) CMPXCHG operation on an
   89.64 @@ -107,11 +113,12 @@ struct x86_mem_emulator
   89.65          unsigned long addr,
   89.66          unsigned long old,
   89.67          unsigned long new,
   89.68 -        unsigned int bytes);
   89.69 +        unsigned int bytes,
   89.70 +        struct x86_emulate_ctxt *ctxt);
   89.71  
   89.72      /*
   89.73 -     * cmpxchg_emulated: Emulate an atomic (LOCKed) CMPXCHG8B operation on an
   89.74 -     *                   emulated/special memory area.
   89.75 +     * cmpxchg8b_emulated: Emulate an atomic (LOCKed) CMPXCHG8B operation on an
   89.76 +     *                     emulated/special memory area.
   89.77       *  @addr:  [IN ] Linear address to access.
   89.78       *  @old:   [IN ] Value expected to be current at @addr.
   89.79       *  @new:   [IN ] Value to write to @addr.
   89.80 @@ -126,7 +133,8 @@ struct x86_mem_emulator
   89.81          unsigned long old_lo,
   89.82          unsigned long old_hi,
   89.83          unsigned long new_lo,
   89.84 -        unsigned long new_hi);
   89.85 +        unsigned long new_hi,
   89.86 +        struct x86_emulate_ctxt *ctxt);
   89.87  };
   89.88  
   89.89  /* Standard reader/writer functions that callers may wish to use. */
   89.90 @@ -134,15 +142,29 @@ extern int
   89.91  x86_emulate_read_std(
   89.92      unsigned long addr,
   89.93      unsigned long *val,
   89.94 -    unsigned int bytes);
   89.95 +    unsigned int bytes,
   89.96 +    struct x86_emulate_ctxt *ctxt);
   89.97  extern int
   89.98  x86_emulate_write_std(
   89.99      unsigned long addr,
  89.100      unsigned long val,
  89.101 -    unsigned int bytes);
  89.102 +    unsigned int bytes,
  89.103 +    struct x86_emulate_ctxt *ctxt);
  89.104  
  89.105  struct cpu_user_regs;
  89.106  
  89.107 +struct x86_emulate_ctxt
  89.108 +{
  89.109 +    /* Register state before/after emulation. */
  89.110 +    struct cpu_user_regs   *regs;
  89.111 +
  89.112 +    /* Linear faulting address (if emulating a page-faulting instruction). */
  89.113 +    unsigned long           cr2;
  89.114 +
  89.115 +    /* Emulated execution mode, represented by an X86EMUL_MODE value. */
  89.116 +    int                     mode;
  89.117 +};
  89.118 +
  89.119  /* Execution mode, passed to the emulator. */
  89.120  #define X86EMUL_MODE_REAL     0 /* Real mode.             */
  89.121  #define X86EMUL_MODE_PROT16   2 /* 16-bit protected mode. */
  89.122 @@ -159,25 +181,19 @@ struct cpu_user_regs;
  89.123  /*
  89.124   * x86_emulate_memop: Emulate an instruction that faulted attempting to
  89.125   *                    read/write a 'special' memory area.
  89.126 - *  @regs: Register state at time of fault.
  89.127 - *  @cr2:  Linear faulting address within an emulated/special memory area.
  89.128 - *  @ops:  Interface to access special memory.
  89.129 - *  @mode: Emulated execution mode, represented by an X86EMUL_MODE value.
  89.130   * Returns -1 on failure, 0 on success.
  89.131   */
  89.132 -extern int
  89.133 +int
  89.134  x86_emulate_memop(
  89.135 -    struct cpu_user_regs *regs,
  89.136 -    unsigned long cr2,
  89.137 -    struct x86_mem_emulator *ops,
  89.138 -    int mode);
  89.139 +    struct x86_emulate_ctxt *ctxt,
  89.140 +    struct x86_emulate_ops  *ops);
  89.141  
  89.142  /*
  89.143   * Given the 'reg' portion of a ModRM byte, and a register block, return a
  89.144   * pointer into the block that addresses the relevant register.
  89.145   * @highbyte_regs specifies whether to decode AH,CH,DH,BH.
  89.146   */
  89.147 -extern void *
  89.148 +void *
  89.149  decode_register(
  89.150      uint8_t modrm_reg, struct cpu_user_regs *regs, int highbyte_regs);
  89.151  
    90.1 --- a/xen/include/public/io/xenbus.h	Thu May 25 15:59:18 2006 -0600
    90.2 +++ b/xen/include/public/io/xenbus.h	Fri May 26 13:41:49 2006 -0600
    90.3 @@ -9,34 +9,37 @@
    90.4  #ifndef _XEN_PUBLIC_IO_XENBUS_H
    90.5  #define _XEN_PUBLIC_IO_XENBUS_H
    90.6  
    90.7 -/* The state of either end of the Xenbus, i.e. the current communication
    90.8 -   status of initialisation across the bus.  States here imply nothing about
    90.9 -   the state of the connection between the driver and the kernel's device
   90.10 -   layers.  */
   90.11 -typedef enum
   90.12 -{
   90.13 -  XenbusStateUnknown      = 0,
   90.14 -  XenbusStateInitialising = 1,
   90.15 -  XenbusStateInitWait     = 2,  /* Finished early initialisation, but waiting
   90.16 -                                   for information from the peer or hotplug
   90.17 -				   scripts. */
   90.18 -  XenbusStateInitialised  = 3,  /* Initialised and waiting for a connection
   90.19 -				   from the peer. */
   90.20 -  XenbusStateConnected    = 4,
   90.21 -  XenbusStateClosing      = 5,  /* The device is being closed due to an error
   90.22 -				   or an unplug event. */
   90.23 -  XenbusStateClosed       = 6
   90.24 +/*
   90.25 + * The state of either end of the Xenbus, i.e. the current communication
   90.26 + * status of initialisation across the bus.  States here imply nothing about
   90.27 + * the state of the connection between the driver and the kernel's device
   90.28 + * layers.
   90.29 + */
   90.30 +enum xenbus_state {
   90.31 +    XenbusStateUnknown       = 0,
   90.32  
   90.33 -} XenbusState;
   90.34 +    XenbusStateInitialising  = 1,
   90.35 +
   90.36 +    /*
   90.37 +     * InitWait: Finished early initialisation but waiting for information
   90.38 +     * from the peer or hotplug scripts.
   90.39 +     */
   90.40 +    XenbusStateInitWait      = 2,
   90.41 +
   90.42 +    /*
   90.43 +     * Initialised: Waiting for a connection from the peer.
   90.44 +     */
   90.45 +    XenbusStateInitialised   = 3,
   90.46 +
   90.47 +    XenbusStateConnected     = 4,
   90.48 +
   90.49 +    /*
   90.50 +     * Closing: The device is being closed due to an error or an unplug event.
   90.51 +     */
   90.52 +    XenbusStateClosing       = 5,
   90.53 +
   90.54 +    XenbusStateClosed	     = 6
   90.55 +};
   90.56 +typedef enum xenbus_state XenbusState;
   90.57  
   90.58  #endif /* _XEN_PUBLIC_IO_XENBUS_H */
   90.59 -
   90.60 -/*
   90.61 - * Local variables:
   90.62 - *  c-file-style: "linux"
   90.63 - *  indent-tabs-mode: t
   90.64 - *  c-indent-level: 8
   90.65 - *  c-basic-offset: 8
   90.66 - *  tab-width: 8
   90.67 - * End:
   90.68 - */
    91.1 --- a/xen/include/public/sched_ctl.h	Thu May 25 15:59:18 2006 -0600
    91.2 +++ b/xen/include/public/sched_ctl.h	Fri May 26 13:41:49 2006 -0600
    91.3 @@ -10,6 +10,7 @@
    91.4  /* Scheduler types. */
    91.5  #define SCHED_BVT      0
    91.6  #define SCHED_SEDF     4
    91.7 +#define SCHED_CREDIT   5
    91.8  
    91.9  /* Set or get info? */
   91.10  #define SCHED_INFO_PUT 0
   91.11 @@ -48,6 +49,10 @@ struct sched_adjdom_cmd {
   91.12              uint32_t extratime;
   91.13              uint32_t weight;
   91.14          } sedf;
   91.15 +        struct csched_domain {
   91.16 +            uint16_t weight;
   91.17 +            uint16_t cap;
   91.18 +        } credit;
   91.19      } u;
   91.20  };
   91.21  
    92.1 --- a/xen/include/xen/sched-if.h	Thu May 25 15:59:18 2006 -0600
    92.2 +++ b/xen/include/xen/sched-if.h	Fri May 26 13:41:49 2006 -0600
    92.3 @@ -58,6 +58,8 @@ struct scheduler {
    92.4      char *opt_name;         /* option name for this scheduler    */
    92.5      unsigned int sched_id;  /* ID for this scheduler             */
    92.6  
    92.7 +    void         (*init)           (void);
    92.8 +    void         (*tick)           (unsigned int cpu);
    92.9      int          (*alloc_task)     (struct vcpu *);
   92.10      void         (*add_task)       (struct vcpu *);
   92.11      void         (*free_task)      (struct domain *);
    93.1 --- a/xen/include/xen/softirq.h	Thu May 25 15:59:18 2006 -0600
    93.2 +++ b/xen/include/xen/softirq.h	Fri May 26 13:41:49 2006 -0600
    93.3 @@ -26,6 +26,19 @@ typedef void (*softirq_handler)(void);
    93.4  asmlinkage void do_softirq(void);
    93.5  extern void open_softirq(int nr, softirq_handler handler);
    93.6  
    93.7 +static inline void cpumask_raise_softirq(cpumask_t mask, unsigned int nr)
    93.8 +{
    93.9 +    int cpu;
   93.10 +
   93.11 +    for_each_cpu_mask(cpu, mask)
   93.12 +    {
   93.13 +        if ( test_and_set_bit(nr, &softirq_pending(cpu)) )
   93.14 +            cpu_clear(cpu, mask);
   93.15 +    }
   93.16 +
   93.17 +    smp_send_event_check_mask(mask);
   93.18 +}
   93.19 +
   93.20  static inline void cpu_raise_softirq(unsigned int cpu, unsigned int nr)
   93.21  {
   93.22      if ( !test_and_set_bit(nr, &softirq_pending(cpu)) )