direct-io.hg

changeset 8290:c9772105fead

Merged.
author emellor@leeni.uk.xensource.com
date Thu Dec 08 15:04:41 2005 +0000 (2005-12-08)
parents 76bff6c996b0 a08aef9f1c8e
children 6f62ad959f6b e64e28e36e02
files
line diff
     1.1 --- a/buildconfigs/Rules.mk	Thu Dec 08 15:04:31 2005 +0000
     1.2 +++ b/buildconfigs/Rules.mk	Thu Dec 08 15:04:41 2005 +0000
     1.3 @@ -21,6 +21,7 @@ endif
     1.4  
     1.5  # Expand Linux series to Linux version
     1.6  LINUX_SERIES	?= 2.6
     1.7 +LINUX_VER	?= $(shell grep "^LINUX_VER" buildconfigs/mk.linux-2.6-xen | sed -e 's/.*=[ ]*//')
     1.8  
     1.9  # Setup Linux search path
    1.10  LINUX_SRC_PATH	?= .:..
    1.11 @@ -109,6 +110,13 @@ endif
    1.12  %-config:
    1.13  	$(MAKE) -f buildconfigs/mk.$* config
    1.14  
    1.15 +linux-2.6-xen.patch: ref-linux-$(LINUX_VER)/.valid-ref
    1.16 +	rm -rf tmp-$@
    1.17 +	cp -al $(<D) tmp-$@
    1.18 +	( cd linux-2.6-xen-sparse && ./mkbuildtree ../tmp-$@ )	
    1.19 +	diff -Nurp $(<D) tmp-$@ > $@ || true
    1.20 +	rm -rf tmp-$@
    1.21 +
    1.22  %-xen.patch: ref-%/.valid-ref
    1.23  	rm -rf tmp-$@
    1.24  	cp -al $(<D) tmp-$@
     2.1 --- a/extras/mini-os/events.c	Thu Dec 08 15:04:31 2005 +0000
     2.2 +++ b/extras/mini-os/events.c	Thu Dec 08 15:04:41 2005 +0000
     2.3 @@ -56,7 +56,7 @@ int do_event(u32 port, struct pt_regs *r
     2.4  
     2.5  }
     2.6  
     2.7 -void bind_evtchn( u32 port, void (*handler)(int, struct pt_regs *) )
     2.8 +int bind_evtchn( u32 port, void (*handler)(int, struct pt_regs *) )
     2.9  {
    2.10   	if(ev_actions[port].handler)
    2.11          printk("WARN: Handler for port %d already registered, replacing\n",
    2.12 @@ -67,6 +67,16 @@ void bind_evtchn( u32 port, void (*handl
    2.13   
    2.14  	/* Finally unmask the port */
    2.15  	unmask_evtchn(port);
    2.16 +
    2.17 +	return port;
    2.18 +}
    2.19 +
    2.20 +void unbind_evtchn( u32 port )
    2.21 +{
    2.22 +	if (!ev_actions[port].handler)
    2.23 +		printk("WARN: No handler for port %d when unbinding\n", port);
    2.24 +	ev_actions[port].handler = NULL;
    2.25 +	ev_actions[port].status |= EVS_DISABLED;
    2.26  }
    2.27  
    2.28  int bind_virq( u32 virq, void (*handler)(int, struct pt_regs *) )
    2.29 @@ -90,6 +100,10 @@ out:
    2.30  	return ret;
    2.31  }
    2.32  
    2.33 +void unbind_virq( u32 port )
    2.34 +{
    2.35 +	unbind_evtchn(port);
    2.36 +}
    2.37  
    2.38  
    2.39  /*
     3.1 --- a/extras/mini-os/include/events.h	Thu Dec 08 15:04:31 2005 +0000
     3.2 +++ b/extras/mini-os/include/events.h	Thu Dec 08 15:04:41 2005 +0000
     3.3 @@ -40,10 +40,12 @@ typedef struct _ev_action_t {
     3.4  /* prototypes */
     3.5  int do_event(u32 port, struct pt_regs *regs);
     3.6  int bind_virq( u32 virq, void (*handler)(int, struct pt_regs *) );
     3.7 -void bind_evtchn( u32 virq, void (*handler)(int, struct pt_regs *) );
     3.8 +int bind_evtchn( u32 virq, void (*handler)(int, struct pt_regs *) );
     3.9 +void unbind_evtchn( u32 port );
    3.10  void init_events(void);
    3.11 +void unbind_virq( u32 port );
    3.12  
    3.13 -static inline int notify_via_evtchn(int port)
    3.14 +static inline int notify_remote_via_evtchn(int port)
    3.15  {
    3.16      evtchn_op_t op;
    3.17      op.cmd = EVTCHNOP_send;
     4.1 --- a/extras/mini-os/include/os.h	Thu Dec 08 15:04:31 2005 +0000
     4.2 +++ b/extras/mini-os/include/os.h	Thu Dec 08 15:04:41 2005 +0000
     4.3 @@ -131,9 +131,11 @@ do {									\
     4.4  #if defined(__i386__)
     4.5  #define mb()    __asm__ __volatile__ ("lock; addl $0,0(%%esp)": : :"memory")
     4.6  #define rmb()   __asm__ __volatile__ ("lock; addl $0,0(%%esp)": : :"memory")
     4.7 +#define wmb()	__asm__ __volatile__ ("": : :"memory")
     4.8  #elif defined(__x86_64__)
     4.9  #define mb()    __asm__ __volatile__ ("mfence":::"memory")
    4.10  #define rmb()   __asm__ __volatile__ ("lfence":::"memory")
    4.11 +#define wmb()	__asm__ __volatile__ ("sfence" ::: "memory") /* From CONFIG_UNORDERED_IO (linux) */
    4.12  #endif
    4.13  
    4.14  
     5.1 --- a/extras/mini-os/include/semaphore.h	Thu Dec 08 15:04:31 2005 +0000
     5.2 +++ b/extras/mini-os/include/semaphore.h	Thu Dec 08 15:04:41 2005 +0000
     5.3 @@ -2,6 +2,7 @@
     5.4  #define _SEMAPHORE_H_
     5.5  
     5.6  #include <wait.h>
     5.7 +#include <spinlock.h>
     5.8  
     5.9  /*
    5.10   * Implementation of semaphore in Mini-os is simple, because 
    5.11 @@ -14,6 +15,15 @@ struct semaphore
    5.12  	struct wait_queue_head wait;
    5.13  };
    5.14  
    5.15 +/*
    5.16 + * the semaphore definition
    5.17 + */
    5.18 +struct rw_semaphore {
    5.19 +	signed long		count;
    5.20 +	spinlock_t		wait_lock;
    5.21 +	struct list_head	wait_list;
    5.22 +	int			debug;
    5.23 +};
    5.24  
    5.25  #define __SEMAPHORE_INITIALIZER(name, n)                            \
    5.26  {                                                                   \
    5.27 @@ -31,6 +41,12 @@ struct semaphore
    5.28  
    5.29  #define DECLARE_MUTEX_LOCKED(name) __DECLARE_SEMAPHORE_GENERIC(name,0)
    5.30  
    5.31 +static inline void init_MUTEX(struct semaphore *sem)
    5.32 +{
    5.33 +  sem->count = 1;
    5.34 +  init_waitqueue_head(&sem->wait);
    5.35 +}
    5.36 +
    5.37  static void inline down(struct semaphore *sem)
    5.38  {
    5.39      wait_event(sem->wait, sem->count > 0);
    5.40 @@ -43,4 +59,27 @@ static void inline up(struct semaphore *
    5.41      wake_up(&sem->wait);
    5.42  }
    5.43  
    5.44 +/* FIXME! Thre read/write semaphores are unimplemented! */
    5.45 +static inline void init_rwsem(struct rw_semaphore *sem)
    5.46 +{
    5.47 +  sem->count = 1;
    5.48 +}
    5.49 +
    5.50 +static inline void down_read(struct rw_semaphore *sem)
    5.51 +{
    5.52 +}
    5.53 +
    5.54 +
    5.55 +static inline void up_read(struct rw_semaphore *sem)
    5.56 +{
    5.57 +}
    5.58 +
    5.59 +static inline void up_write(struct rw_semaphore *sem)
    5.60 +{
    5.61 +}
    5.62 +
    5.63 +static inline void down_write(struct rw_semaphore *sem)
    5.64 +{
    5.65 +}
    5.66 +
    5.67  #endif /* _SEMAPHORE_H */
     6.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
     6.2 +++ b/extras/mini-os/include/spinlock.h	Thu Dec 08 15:04:41 2005 +0000
     6.3 @@ -0,0 +1,121 @@
     6.4 +#ifndef __ASM_SPINLOCK_H
     6.5 +#define __ASM_SPINLOCK_H
     6.6 +
     6.7 +#include <lib.h>
     6.8 +
     6.9 +/*
    6.10 + * Your basic SMP spinlocks, allowing only a single CPU anywhere
    6.11 + */
    6.12 +
    6.13 +typedef struct {
    6.14 +	volatile unsigned int slock;
    6.15 +} spinlock_t;
    6.16 +
    6.17 +#define SPINLOCK_MAGIC	0xdead4ead
    6.18 +
    6.19 +#define SPIN_LOCK_UNLOCKED (spinlock_t) { 1 }
    6.20 +
    6.21 +#define spin_lock_init(x)	do { *(x) = SPIN_LOCK_UNLOCKED; } while(0)
    6.22 +
    6.23 +/*
    6.24 + * Simple spin lock operations.  There are two variants, one clears IRQ's
    6.25 + * on the local processor, one does not.
    6.26 + *
    6.27 + * We make no fairness assumptions. They have a cost.
    6.28 + */
    6.29 +
    6.30 +#define spin_is_locked(x)	(*(volatile signed char *)(&(x)->slock) <= 0)
    6.31 +#define spin_unlock_wait(x)	do { barrier(); } while(spin_is_locked(x))
    6.32 +
    6.33 +#define spin_lock_string \
    6.34 +        "1:\n" \
    6.35 +	LOCK \
    6.36 +	"decb %0\n\t" \
    6.37 +	"jns 3f\n" \
    6.38 +	"2:\t" \
    6.39 +	"rep;nop\n\t" \
    6.40 +	"cmpb $0,%0\n\t" \
    6.41 +	"jle 2b\n\t" \
    6.42 +	"jmp 1b\n" \
    6.43 +	"3:\n\t"
    6.44 +
    6.45 +#define spin_lock_string_flags \
    6.46 +        "1:\n" \
    6.47 +	LOCK \
    6.48 +	"decb %0\n\t" \
    6.49 +	"jns 4f\n\t" \
    6.50 +	"2:\t" \
    6.51 +	"testl $0x200, %1\n\t" \
    6.52 +	"jz 3f\n\t" \
    6.53 +	"#sti\n\t" \
    6.54 +	"3:\t" \
    6.55 +	"rep;nop\n\t" \
    6.56 +	"cmpb $0, %0\n\t" \
    6.57 +	"jle 3b\n\t" \
    6.58 +	"#cli\n\t" \
    6.59 +	"jmp 1b\n" \
    6.60 +	"4:\n\t"
    6.61 +
    6.62 +/*
    6.63 + * This works. Despite all the confusion.
    6.64 + * (except on PPro SMP or if we are using OOSTORE)
    6.65 + * (PPro errata 66, 92)
    6.66 + */
    6.67 +
    6.68 +#define spin_unlock_string \
    6.69 +	"xchgb %b0, %1" \
    6.70 +		:"=q" (oldval), "=m" (lock->slock) \
    6.71 +		:"0" (oldval) : "memory"
    6.72 +
    6.73 +static inline void _raw_spin_unlock(spinlock_t *lock)
    6.74 +{
    6.75 +	char oldval = 1;
    6.76 +	__asm__ __volatile__(
    6.77 +		spin_unlock_string
    6.78 +	);
    6.79 +}
    6.80 +
    6.81 +static inline int _raw_spin_trylock(spinlock_t *lock)
    6.82 +{
    6.83 +	char oldval;
    6.84 +	__asm__ __volatile__(
    6.85 +		"xchgb %b0,%1\n"
    6.86 +		:"=q" (oldval), "=m" (lock->slock)
    6.87 +		:"0" (0) : "memory");
    6.88 +	return oldval > 0;
    6.89 +}
    6.90 +
    6.91 +static inline void _raw_spin_lock(spinlock_t *lock)
    6.92 +{
    6.93 +	__asm__ __volatile__(
    6.94 +		spin_lock_string
    6.95 +		:"=m" (lock->slock) : : "memory");
    6.96 +}
    6.97 +
    6.98 +static inline void _raw_spin_lock_flags (spinlock_t *lock, unsigned long flags)
    6.99 +{
   6.100 +	__asm__ __volatile__(
   6.101 +		spin_lock_string_flags
   6.102 +		:"=m" (lock->slock) : "r" (flags) : "memory");
   6.103 +}
   6.104 +
   6.105 +#define _spin_trylock(lock)     ({_raw_spin_trylock(lock) ? \
   6.106 +                                1 : ({ 0;});})
   6.107 +
   6.108 +#define _spin_lock(lock)        \
   6.109 +do {                            \
   6.110 +        _raw_spin_lock(lock);   \
   6.111 +} while(0)
   6.112 +
   6.113 +#define _spin_unlock(lock)      \
   6.114 +do {                            \
   6.115 +        _raw_spin_unlock(lock); \
   6.116 +} while (0)
   6.117 +
   6.118 +
   6.119 +#define spin_lock(lock)       _spin_lock(lock)
   6.120 +#define spin_unlock(lock)       _spin_unlock(lock)
   6.121 +
   6.122 +#define DEFINE_SPINLOCK(x) spinlock_t x = SPIN_LOCK_UNLOCKED
   6.123 +
   6.124 +#endif
     7.1 --- a/extras/mini-os/include/wait.h	Thu Dec 08 15:04:31 2005 +0000
     7.2 +++ b/extras/mini-os/include/wait.h	Thu Dec 08 15:04:41 2005 +0000
     7.3 @@ -33,6 +33,10 @@ struct wait_queue name = {              
     7.4  }
     7.5  
     7.6  
     7.7 +static inline void init_waitqueue_head(struct wait_queue_head *h)
     7.8 +{
     7.9 +  INIT_LIST_HEAD(&h->thread_list);
    7.10 +}
    7.11  
    7.12  static inline void init_waitqueue_entry(struct wait_queue *q, struct thread *thread)
    7.13  {
     8.1 --- a/extras/mini-os/include/xenbus.h	Thu Dec 08 15:04:31 2005 +0000
     8.2 +++ b/extras/mini-os/include/xenbus.h	Thu Dec 08 15:04:41 2005 +0000
     8.3 @@ -4,6 +4,7 @@
     8.4   * Talks to Xen Store to figure out what devices we have.
     8.5   *
     8.6   * Copyright (C) 2005 Rusty Russell, IBM Corporation
     8.7 + * Copyright (C) 2005 XenSource Ltd.
     8.8   * 
     8.9   * This file may be distributed separately from the Linux kernel, or
    8.10   * incorporated into other software packages, subject to the following license:
    8.11 @@ -30,45 +31,98 @@
    8.12  #ifndef _ASM_XEN_XENBUS_H
    8.13  #define _ASM_XEN_XENBUS_H
    8.14  
    8.15 -
    8.16 -/* Caller must hold this lock to call these functions: it's also held
    8.17 - * across watch callbacks. */
    8.18 -// TODO
    8.19 -//extern struct semaphore xenbus_lock;
    8.20 -
    8.21 -char **xenbus_directory(const char *dir, const char *node, unsigned int *num);
    8.22 -void *xenbus_read(const char *dir, const char *node, unsigned int *len);
    8.23 -int xenbus_write(const char *dir, const char *node,
    8.24 -		 const char *string, int createflags);
    8.25 -int xenbus_mkdir(const char *dir, const char *node);
    8.26 -int xenbus_exists(const char *dir, const char *node);
    8.27 -int xenbus_rm(const char *dir, const char *node);
    8.28 -int xenbus_transaction_start(const char *subtree);
    8.29 -int xenbus_transaction_end(int abort);
    8.30 -
    8.31 -/* Single read and scanf: returns -errno or num scanned if > 0. */
    8.32 -int xenbus_scanf(const char *dir, const char *node, const char *fmt, ...)
    8.33 -	__attribute__((format(scanf, 3, 4)));
    8.34 -
    8.35 -/* Single printf and write: returns -errno or 0. */
    8.36 -int xenbus_printf(const char *dir, const char *node, const char *fmt, ...)
    8.37 -	__attribute__((format(printf, 3, 4)));
    8.38 -
    8.39 -/* Generic read function: NULL-terminated triples of name,
    8.40 - * sprintf-style type string, and pointer. Returns 0 or errno.*/
    8.41 -int xenbus_gather(const char *dir, ...);
    8.42 +#include <errno.h>
    8.43 +#include <xen/io/xenbus.h>
    8.44 +#include <xen/io/xs_wire.h>
    8.45  
    8.46  /* Register callback to watch this node. */
    8.47  struct xenbus_watch
    8.48  {
    8.49  	struct list_head list;
    8.50 -	char *node;
    8.51 -	void (*callback)(struct xenbus_watch *, const char *node);
    8.52 +
    8.53 +	/* Path being watched. */
    8.54 +	const char *node;
    8.55 +
    8.56 +	/* Callback (executed in a process context with no locks held). */
    8.57 +	void (*callback)(struct xenbus_watch *,
    8.58 +			 const char **vec, unsigned int len);
    8.59 +};
    8.60 +
    8.61 +
    8.62 +/* A xenbus device. */
    8.63 +struct xenbus_device {
    8.64 +	const char *devicetype;
    8.65 +	const char *nodename;
    8.66 +	const char *otherend;
    8.67 +	int otherend_id;
    8.68 +	struct xenbus_watch otherend_watch;
    8.69 +	int has_error;
    8.70 +	void *data;
    8.71 +};
    8.72 +
    8.73 +struct xenbus_device_id
    8.74 +{
    8.75 +	/* .../device/<device_type>/<identifier> */
    8.76 +	char devicetype[32]; 	/* General class of device. */
    8.77  };
    8.78  
    8.79 +/* A xenbus driver. */
    8.80 +struct xenbus_driver {
    8.81 +	char *name;
    8.82 +	struct module *owner;
    8.83 +	const struct xenbus_device_id *ids;
    8.84 +	int (*probe)(struct xenbus_device *dev,
    8.85 +		     const struct xenbus_device_id *id);
    8.86 +	void (*otherend_changed)(struct xenbus_device *dev,
    8.87 +				 XenbusState backend_state);
    8.88 +	int (*remove)(struct xenbus_device *dev);
    8.89 +	int (*suspend)(struct xenbus_device *dev);
    8.90 +	int (*resume)(struct xenbus_device *dev);
    8.91 +	int (*hotplug)(struct xenbus_device *, char **, int, char *, int);
    8.92 +	int (*read_otherend_details)(struct xenbus_device *dev);
    8.93 +};
    8.94 +
    8.95 +int xenbus_register_frontend(struct xenbus_driver *drv);
    8.96 +int xenbus_register_backend(struct xenbus_driver *drv);
    8.97 +void xenbus_unregister_driver(struct xenbus_driver *drv);
    8.98 +
    8.99 +struct xenbus_transaction;
   8.100 +
   8.101 +char **xenbus_directory(struct xenbus_transaction *t,
   8.102 +			const char *dir, const char *node, unsigned int *num);
   8.103 +void *xenbus_read(struct xenbus_transaction *t,
   8.104 +		  const char *dir, const char *node, unsigned int *len);
   8.105 +int xenbus_write(struct xenbus_transaction *t,
   8.106 +		 const char *dir, const char *node, const char *string);
   8.107 +int xenbus_mkdir(struct xenbus_transaction *t,
   8.108 +		 const char *dir, const char *node);
   8.109 +int xenbus_exists(struct xenbus_transaction *t,
   8.110 +		  const char *dir, const char *node);
   8.111 +int xenbus_rm(struct xenbus_transaction *t, const char *dir, const char *node);
   8.112 +struct xenbus_transaction *xenbus_transaction_start(void);
   8.113 +int xenbus_transaction_end(struct xenbus_transaction *t, int abort);
   8.114 +
   8.115 +/* Single read and scanf: returns -errno or num scanned if > 0. */
   8.116 +int xenbus_scanf(struct xenbus_transaction *t,
   8.117 +		 const char *dir, const char *node, const char *fmt, ...)
   8.118 +	__attribute__((format(scanf, 4, 5)));
   8.119 +
   8.120 +/* Single printf and write: returns -errno or 0. */
   8.121 +int xenbus_printf(struct xenbus_transaction *t,
   8.122 +		  const char *dir, const char *node, const char *fmt, ...)
   8.123 +	__attribute__((format(printf, 4, 5)));
   8.124 +
   8.125 +/* Generic read function: NULL-terminated triples of name,
   8.126 + * sprintf-style type string, and pointer. Returns 0 or errno.*/
   8.127 +int xenbus_gather(struct xenbus_transaction *t, const char *dir, ...);
   8.128 +
   8.129  int register_xenbus_watch(struct xenbus_watch *watch);
   8.130  void unregister_xenbus_watch(struct xenbus_watch *watch);
   8.131 -void reregister_xenbus_watches(void);
   8.132 +void xs_suspend(void);
   8.133 +void xs_resume(void);
   8.134 +
   8.135 +/* Used by xenbus_dev to borrow kernel's store connection. */
   8.136 +void *xenbus_dev_request_and_reply(struct xsd_sockmsg *msg);
   8.137  
   8.138  /* Called from xen core code. */
   8.139  void xenbus_suspend(void);
   8.140 @@ -84,6 +138,87 @@ void xenbus_resume(void);
   8.141  
   8.142  #define XENBUS_EXIST_ERR(err) ((err) == -ENOENT || (err) == -ERANGE)
   8.143  
   8.144 -int xs_init(void);
   8.145 +
   8.146 +/**
   8.147 + * Register a watch on the given path, using the given xenbus_watch structure
   8.148 + * for storage, and the given callback function as the callback.  Return 0 on
   8.149 + * success, or -errno on error.  On success, the given path will be saved as
   8.150 + * watch->node, and remains the caller's to free.  On error, watch->node will
   8.151 + * be NULL, the device will switch to XenbusStateClosing, and the error will
   8.152 + * be saved in the store.
   8.153 + */
   8.154 +int xenbus_watch_path(struct xenbus_device *dev, const char *path,
   8.155 +		      struct xenbus_watch *watch, 
   8.156 +		      void (*callback)(struct xenbus_watch *,
   8.157 +				       const char **, unsigned int));
   8.158 +
   8.159 +
   8.160 +/**
   8.161 + * Register a watch on the given path/path2, using the given xenbus_watch
   8.162 + * structure for storage, and the given callback function as the callback.
   8.163 + * Return 0 on success, or -errno on error.  On success, the watched path
   8.164 + * (path/path2) will be saved as watch->node, and becomes the caller's to
   8.165 + * kfree().  On error, watch->node will be NULL, so the caller has nothing to
   8.166 + * free, the device will switch to XenbusStateClosing, and the error will be
   8.167 + * saved in the store.
   8.168 + */
   8.169 +int xenbus_watch_path2(struct xenbus_device *dev, const char *path,
   8.170 +		       const char *path2, struct xenbus_watch *watch, 
   8.171 +		       void (*callback)(struct xenbus_watch *,
   8.172 +					const char **, unsigned int));
   8.173 +
   8.174 +
   8.175 +/**
   8.176 + * Advertise in the store a change of the given driver to the given new_state.
   8.177 + * Perform the change inside the given transaction xbt.  xbt may be NULL, in
   8.178 + * which case this is performed inside its own transaction.  Return 0 on
   8.179 + * success, or -errno on error.  On error, the device will switch to
   8.180 + * XenbusStateClosing, and the error will be saved in the store.
   8.181 + */
   8.182 +int xenbus_switch_state(struct xenbus_device *dev,
   8.183 +			struct xenbus_transaction *xbt,
   8.184 +			XenbusState new_state);
   8.185 +
   8.186 +
   8.187 +/**
   8.188 + * Grant access to the given ring_mfn to the peer of the given device.  Return
   8.189 + * 0 on success, or -errno on error.  On error, the device will switch to
   8.190 + * XenbusStateClosing, and the error will be saved in the store.
   8.191 + */
   8.192 +int xenbus_grant_ring(struct xenbus_device *dev, unsigned long ring_mfn);
   8.193 +
   8.194 +
   8.195 +/**
   8.196 + * Allocate an event channel for the given xenbus_device, assigning the newly
   8.197 + * created local port to *port.  Return 0 on success, or -errno on error.  On
   8.198 + * error, the device will switch to XenbusStateClosing, and the error will be
   8.199 + * saved in the store.
   8.200 + */
   8.201 +int xenbus_alloc_evtchn(struct xenbus_device *dev, int *port);
   8.202 +
   8.203 +
   8.204 +/**
   8.205 + * Return the state of the driver rooted at the given store path, or
   8.206 + * XenbusStateClosed if no state can be read.
   8.207 + */
   8.208 +XenbusState xenbus_read_driver_state(const char *path);
   8.209 +
   8.210 +
   8.211 +/***
   8.212 + * Report the given negative errno into the store, along with the given
   8.213 + * formatted message.
   8.214 + */
   8.215 +void xenbus_dev_error(struct xenbus_device *dev, int err, const char *fmt,
   8.216 +		      ...);
   8.217 +
   8.218 +
   8.219 +/***
   8.220 + * Equivalent to xenbus_dev_error(dev, err, fmt, args), followed by
   8.221 + * xenbus_switch_state(dev, NULL, XenbusStateClosing) to schedule an orderly
   8.222 + * closedown of this driver and its peer.
   8.223 + */
   8.224 +void xenbus_dev_fatal(struct xenbus_device *dev, int err, const char *fmt,
   8.225 +		      ...);
   8.226 +
   8.227  
   8.228  #endif /* _ASM_XEN_XENBUS_H */
     9.1 --- a/extras/mini-os/include/xmalloc.h	Thu Dec 08 15:04:31 2005 +0000
     9.2 +++ b/extras/mini-os/include/xmalloc.h	Thu Dec 08 15:04:41 2005 +0000
     9.3 @@ -7,6 +7,9 @@
     9.4  /* Allocate space for array of typed objects. */
     9.5  #define xmalloc_array(_type, _num) ((_type *)_xmalloc_array(sizeof(_type), __alignof__(_type), _num))
     9.6  
     9.7 +#define malloc(size) _xmalloc(size, 4)
     9.8 +#define free(ptr) xfree(ptr)
     9.9 +
    9.10  /* Free any of the above. */
    9.11  extern void xfree(const void *);
    9.12  
    10.1 --- a/extras/mini-os/kernel.c	Thu Dec 08 15:04:31 2005 +0000
    10.2 +++ b/extras/mini-os/kernel.c	Thu Dec 08 15:04:41 2005 +0000
    10.3 @@ -35,6 +35,7 @@
    10.4  #include <lib.h>
    10.5  #include <sched.h>
    10.6  #include <xenbus.h>
    10.7 +#include "xenbus/xenbus_comms.h"
    10.8  
    10.9  /*
   10.10   * Shared page for communicating with the hypervisor.
    11.1 --- a/extras/mini-os/xenbus/xenbus_comms.c	Thu Dec 08 15:04:31 2005 +0000
    11.2 +++ b/extras/mini-os/xenbus/xenbus_comms.c	Thu Dec 08 15:04:41 2005 +0000
    11.3 @@ -33,199 +33,152 @@
    11.4  #include <events.h>
    11.5  #include <os.h>
    11.6  #include <lib.h>
    11.7 -
    11.8 -
    11.9 -#ifdef XENBUS_COMMS_DEBUG
   11.10 -#define DEBUG(_f, _a...) \
   11.11 -    printk("MINI_OS(file=xenbus_comms.c, line=%d) " _f "\n", __LINE__, ## _a)
   11.12 -#else
   11.13 -#define DEBUG(_f, _a...)    ((void)0)
   11.14 -#endif
   11.15 +#include <xenbus.h>
   11.16 +#include "xenbus_comms.h"
   11.17  
   11.18 +static int xenbus_irq;
   11.19  
   11.20 -#define RINGBUF_DATASIZE ((PAGE_SIZE / 2) - sizeof(struct ringbuf_head))
   11.21 -struct ringbuf_head
   11.22 -{
   11.23 -	u32 write; /* Next place to write to */
   11.24 -	u32 read; /* Next place to read from */
   11.25 -	u8 flags;
   11.26 -	char buf[0];
   11.27 -} __attribute__((packed));
   11.28 +extern void xenbus_probe(void *);
   11.29 +extern int xenstored_ready;
   11.30  
   11.31  DECLARE_WAIT_QUEUE_HEAD(xb_waitq);
   11.32  
   11.33 -static inline struct ringbuf_head *outbuf(void)
   11.34 +static inline struct xenstore_domain_interface *xenstore_domain_interface(void)
   11.35  {
   11.36  	return mfn_to_virt(start_info.store_mfn);
   11.37  }
   11.38  
   11.39 -static inline struct ringbuf_head *inbuf(void)
   11.40 -{
   11.41 -	return (struct ringbuf_head *)((char *)mfn_to_virt(start_info.store_mfn) + PAGE_SIZE/2);
   11.42 -}
   11.43 -
   11.44  static void wake_waiting(int port, struct pt_regs *regs)
   11.45  {
   11.46  	wake_up(&xb_waitq);
   11.47  }
   11.48  
   11.49 -static int check_buffer(const struct ringbuf_head *h)
   11.50 -{
   11.51 -	return (h->write < RINGBUF_DATASIZE && h->read < RINGBUF_DATASIZE);
   11.52 -}
   11.53 -
   11.54 -/* We can't fill last byte: would look like empty buffer. */
   11.55 -static void *get_output_chunk(const struct ringbuf_head *h,
   11.56 -			      void *buf, u32 *len)
   11.57 +static int check_indexes(XENSTORE_RING_IDX cons, XENSTORE_RING_IDX prod)
   11.58  {
   11.59 -	u32 read_mark;
   11.60 -
   11.61 -	if (h->read == 0)
   11.62 -		read_mark = RINGBUF_DATASIZE - 1;
   11.63 -	else
   11.64 -		read_mark = h->read - 1;
   11.65 -
   11.66 -	/* Here to the end of buffer, unless they haven't read some out. */
   11.67 -	*len = RINGBUF_DATASIZE - h->write;
   11.68 -	if (read_mark >= h->write)
   11.69 -		*len = read_mark - h->write;
   11.70 -	return (void *)((char *)buf + h->write);
   11.71 +	return ((prod - cons) <= XENSTORE_RING_SIZE);
   11.72  }
   11.73  
   11.74 -static const void *get_input_chunk(const struct ringbuf_head *h,
   11.75 -				   const void *buf, u32 *len)
   11.76 +static void *get_output_chunk(XENSTORE_RING_IDX cons,
   11.77 +			      XENSTORE_RING_IDX prod,
   11.78 +			      char *buf, uint32_t *len)
   11.79  {
   11.80 -	/* Here to the end of buffer, unless they haven't written some. */
   11.81 -	*len = RINGBUF_DATASIZE - h->read;
   11.82 -	if (h->write >= h->read)
   11.83 -		*len = h->write - h->read;
   11.84 -	return (void *)((char *)buf + h->read);
   11.85 +	*len = XENSTORE_RING_SIZE - MASK_XENSTORE_IDX(prod);
   11.86 +	if ((XENSTORE_RING_SIZE - (prod - cons)) < *len)
   11.87 +		*len = XENSTORE_RING_SIZE - (prod - cons);
   11.88 +	return buf + MASK_XENSTORE_IDX(prod);
   11.89  }
   11.90  
   11.91 -static void update_output_chunk(struct ringbuf_head *h, u32 len)
   11.92 -{
   11.93 -	h->write += len;
   11.94 -	if (h->write == RINGBUF_DATASIZE)
   11.95 -		h->write = 0;
   11.96 -}
   11.97 -
   11.98 -static void update_input_chunk(struct ringbuf_head *h, u32 len)
   11.99 +static const void *get_input_chunk(XENSTORE_RING_IDX cons,
  11.100 +				   XENSTORE_RING_IDX prod,
  11.101 +				   const char *buf, uint32_t *len)
  11.102  {
  11.103 -	h->read += len;
  11.104 -	if (h->read == RINGBUF_DATASIZE)
  11.105 -		h->read = 0;
  11.106 -}
  11.107 -
  11.108 -static int output_avail(struct ringbuf_head *out)
  11.109 -{
  11.110 -	unsigned int avail;
  11.111 -
  11.112 -	get_output_chunk(out, out->buf, &avail);
  11.113 -	return avail != 0;
  11.114 +	*len = XENSTORE_RING_SIZE - MASK_XENSTORE_IDX(cons);
  11.115 +	if ((prod - cons) < *len)
  11.116 +		*len = prod - cons;
  11.117 +	return buf + MASK_XENSTORE_IDX(cons);
  11.118  }
  11.119  
  11.120  int xb_write(const void *data, unsigned len)
  11.121  {
  11.122 -	struct ringbuf_head h;
  11.123 -	struct ringbuf_head *out = outbuf();
  11.124 +	struct xenstore_domain_interface *intf = xenstore_domain_interface();
  11.125 +	XENSTORE_RING_IDX cons, prod;
  11.126  
  11.127 -	do {
  11.128 +	while (len != 0) {
  11.129  		void *dst;
  11.130  		unsigned int avail;
  11.131  
  11.132 -		wait_event(xb_waitq, output_avail(out));
  11.133 +		wait_event(xb_waitq, (intf->req_prod - intf->req_cons) !=
  11.134 +			   XENSTORE_RING_SIZE);
  11.135  
  11.136 -		/* Read, then check: not that we don't trust store.
  11.137 -		 * Hell, some of my best friends are daemons.  But,
  11.138 -		 * in this post-911 world... */
  11.139 -		h = *out;
  11.140 +		/* Read indexes, then verify. */
  11.141 +		cons = intf->req_cons;
  11.142 +		prod = intf->req_prod;
  11.143  		mb();
  11.144 -		if (!check_buffer(&h)) {
  11.145 -			return -1; /* ETERRORIST! */
  11.146 -		}
  11.147 +		if (!check_indexes(cons, prod))
  11.148 +			return -EIO;
  11.149  
  11.150 -		dst = get_output_chunk(&h, out->buf, &avail);
  11.151 +		dst = get_output_chunk(cons, prod, intf->req, &avail);
  11.152 +		if (avail == 0)
  11.153 +			continue;
  11.154  		if (avail > len)
  11.155  			avail = len;
  11.156 +
  11.157  		memcpy(dst, data, avail);
  11.158 -		data = (void *)((char *)data + avail);
  11.159 +		data = (void*) ( (unsigned long)data + avail );
  11.160  		len -= avail;
  11.161 -		update_output_chunk(out, avail);
  11.162 -		notify_via_evtchn(start_info.store_evtchn);
  11.163 -	} while (len != 0);
  11.164 +
  11.165 +		/* Other side must not see new header until data is there. */
  11.166 +		wmb();
  11.167 +		intf->req_prod += avail;
  11.168 +
  11.169 +		/* This implies mb() before other side sees interrupt. */
  11.170 +		notify_remote_via_evtchn(start_info.store_evtchn);
  11.171 +	}
  11.172  
  11.173  	return 0;
  11.174  }
  11.175  
  11.176 -int xs_input_avail(void)
  11.177 -{
  11.178 -	unsigned int avail;
  11.179 -	struct ringbuf_head *in = inbuf();
  11.180 -
  11.181 -	get_input_chunk(in, in->buf, &avail);
  11.182 -	return avail != 0;
  11.183 -}
  11.184 -
  11.185  int xb_read(void *data, unsigned len)
  11.186  {
  11.187 -	struct ringbuf_head h;
  11.188 -	struct ringbuf_head *in = inbuf();
  11.189 -	int was_full;
  11.190 +	struct xenstore_domain_interface *intf = xenstore_domain_interface();
  11.191 +	XENSTORE_RING_IDX cons, prod;
  11.192  
  11.193  	while (len != 0) {
  11.194  		unsigned int avail;
  11.195  		const char *src;
  11.196  
  11.197 -		wait_event(xb_waitq, xs_input_avail());
  11.198 -		h = *in;
  11.199 +		wait_event(xb_waitq,
  11.200 +			   intf->rsp_cons != intf->rsp_prod);
  11.201 +
  11.202 +		/* Read indexes, then verify. */
  11.203 +		cons = intf->rsp_cons;
  11.204 +		prod = intf->rsp_prod;
  11.205  		mb();
  11.206 -		if (!check_buffer(&h)) {
  11.207 -			return -1;
  11.208 -		}
  11.209 +		if (!check_indexes(cons, prod))
  11.210 +			return -EIO;
  11.211  
  11.212 -		src = get_input_chunk(&h, in->buf, &avail);
  11.213 +		src = get_input_chunk(cons, prod, intf->rsp, &avail);
  11.214 +		if (avail == 0)
  11.215 +			continue;
  11.216  		if (avail > len)
  11.217  			avail = len;
  11.218 -		was_full = !output_avail(&h);
  11.219 +
  11.220 +		/* We must read header before we read data. */
  11.221 +		rmb();
  11.222  
  11.223  		memcpy(data, src, avail);
  11.224 -		data = (void *)((char *)data + avail);
  11.225 +		data = (void*) ( (unsigned long)data + avail );
  11.226  		len -= avail;
  11.227 -		update_input_chunk(in, avail);
  11.228 -		DEBUG("Finished read of %i bytes (%i to go)\n", avail, len);
  11.229 -		/* If it was full, tell them we've taken some. */
  11.230 -		if (was_full)
  11.231 -			notify_via_evtchn(start_info.store_evtchn);
  11.232 +
  11.233 +		/* Other side must not see free space until we've copied out */
  11.234 +		mb();
  11.235 +		intf->rsp_cons += avail;
  11.236 +
  11.237 +		printk("Finished read of %i bytes (%i to go)\n", avail, len);
  11.238 +
  11.239 +		/* Implies mb(): they will see new header. */
  11.240 +		notify_remote_via_evtchn(start_info.store_evtchn);
  11.241  	}
  11.242  
  11.243 -	/* If we left something, wake watch thread to deal with it. */
  11.244 -	if (xs_input_avail())
  11.245 -		wake_up(&xb_waitq);
  11.246 -
  11.247  	return 0;
  11.248  }
  11.249  
  11.250  /* Set up interrupt handler off store event channel. */
  11.251  int xb_init_comms(void)
  11.252  {
  11.253 -    printk("Init xenbus comms, store event channel %d\n", start_info.store_evtchn);
  11.254 -	if (!start_info.store_evtchn)
  11.255 -		return 0;
  11.256 -    printk("Binding virq\n");
  11.257 -	bind_evtchn(start_info.store_evtchn, &wake_waiting);
  11.258 +	int err;
  11.259 +
  11.260 +	if (xenbus_irq)
  11.261 +		unbind_evtchn(xenbus_irq);
  11.262  
  11.263 -	/* FIXME zero out page -- domain builder should probably do this*/
  11.264 -	memset(mfn_to_virt(start_info.store_mfn), 0, PAGE_SIZE);
  11.265 -    notify_via_evtchn(start_info.store_evtchn);
  11.266 +	err = bind_evtchn(
  11.267 +		start_info.store_evtchn, wake_waiting);
  11.268 +	if (err <= 0) {
  11.269 +		printk("XENBUS request irq failed %i\n", err);
  11.270 +		return err;
  11.271 +	}
  11.272 +
  11.273 +	xenbus_irq = err;
  11.274 +
  11.275  	return 0;
  11.276  }
  11.277 -
  11.278 -void xb_suspend_comms(void)
  11.279 -{
  11.280 -
  11.281 -	if (!start_info.store_evtchn)
  11.282 -		return;
  11.283 -
  11.284 -    // TODO
  11.285 -	//unbind_evtchn_from_irqhandler(xen_start_info.store_evtchn, &xb_waitq);
  11.286 -}
    12.1 --- a/extras/mini-os/xenbus/xenbus_comms.h	Thu Dec 08 15:04:31 2005 +0000
    12.2 +++ b/extras/mini-os/xenbus/xenbus_comms.h	Thu Dec 08 15:04:41 2005 +0000
    12.3 @@ -28,8 +28,8 @@
    12.4  #ifndef _XENBUS_COMMS_H
    12.5  #define _XENBUS_COMMS_H
    12.6  
    12.7 +int xs_init(void);
    12.8  int xb_init_comms(void);
    12.9 -void xb_suspend_comms(void);
   12.10  
   12.11  /* Low level routines. */
   12.12  int xb_write(const void *data, unsigned len);
    13.1 --- a/extras/mini-os/xenbus/xenbus_xs.c	Thu Dec 08 15:04:31 2005 +0000
    13.2 +++ b/extras/mini-os/xenbus/xenbus_xs.c	Thu Dec 08 15:04:41 2005 +0000
    13.3 @@ -39,15 +39,63 @@
    13.4  #include <wait.h>
    13.5  #include <sched.h>
    13.6  #include <semaphore.h>
    13.7 +#include <spinlock.h>
    13.8  #include <xen/io/xs_wire.h>
    13.9  #include "xenbus_comms.h"
   13.10  
   13.11  #define streq(a, b) (strcmp((a), (b)) == 0)
   13.12  
   13.13 -static char printf_buffer[4096];
   13.14 +struct xs_stored_msg {
   13.15 +	struct list_head list;
   13.16 +
   13.17 +	struct xsd_sockmsg hdr;
   13.18 +
   13.19 +	union {
   13.20 +		/* Queued replies. */
   13.21 +		struct {
   13.22 +			char *body;
   13.23 +		} reply;
   13.24 +
   13.25 +		/* Queued watch events. */
   13.26 +		struct {
   13.27 +			struct xenbus_watch *handle;
   13.28 +			char **vec;
   13.29 +			unsigned int vec_size;
   13.30 +		} watch;
   13.31 +	} u;
   13.32 +};
   13.33 +
   13.34 +struct xs_handle {
   13.35 +	/* A list of replies. Currently only one will ever be outstanding. */
   13.36 +	struct list_head reply_list;
   13.37 +	spinlock_t reply_lock;
   13.38 +	struct wait_queue_head reply_waitq;
   13.39 +
   13.40 +	/* One request at a time. */
   13.41 +	struct semaphore request_mutex;
   13.42 +
   13.43 +	/* Protect transactions against save/restore. */
   13.44 +	struct rw_semaphore suspend_mutex;
   13.45 +};
   13.46 +
   13.47 +static struct xs_handle xs_state;
   13.48 +
   13.49 +/* List of registered watches, and a lock to protect it. */
   13.50  static LIST_HEAD(watches);
   13.51 -//TODO
   13.52 -DECLARE_MUTEX(xenbus_lock);
   13.53 +static DEFINE_SPINLOCK(watches_lock);
   13.54 +
   13.55 +/* List of pending watch callback events, and a lock to protect it. */
   13.56 +static LIST_HEAD(watch_events);
   13.57 +static DEFINE_SPINLOCK(watch_events_lock);
   13.58 +
   13.59 +/*
   13.60 + * Details of the xenwatch callback kernel thread. The thread waits on the
   13.61 + * watch_events_waitq for work to do (queued on watch_events list). When it
   13.62 + * wakes up it acquires the xenwatch_mutex before reading the list and
   13.63 + * carrying out work.
   13.64 + */
   13.65 +/* static */ DECLARE_MUTEX(xenwatch_mutex);
   13.66 +static DECLARE_WAIT_QUEUE_HEAD(watch_events_waitq);
   13.67  
   13.68  static int get_error(const char *errorstring)
   13.69  {
   13.70 @@ -65,47 +113,82 @@ static int get_error(const char *errorst
   13.71  
   13.72  static void *read_reply(enum xsd_sockmsg_type *type, unsigned int *len)
   13.73  {
   13.74 -	struct xsd_sockmsg msg;
   13.75 -	void *ret;
   13.76 -	int err;
   13.77 +	struct xs_stored_msg *msg;
   13.78 +	char *body;
   13.79 +
   13.80 +	spin_lock(&xs_state.reply_lock);
   13.81  
   13.82 -	err = xb_read(&msg, sizeof(msg));
   13.83 -	if (err)
   13.84 -		return ERR_PTR(err);
   13.85 -
   13.86 -	ret = xmalloc_array(char, msg.len + 1);
   13.87 -	if (!ret)
   13.88 -		return ERR_PTR(-ENOMEM);
   13.89 +	while (list_empty(&xs_state.reply_list)) {
   13.90 +		spin_unlock(&xs_state.reply_lock);
   13.91 +		wait_event(xs_state.reply_waitq,
   13.92 +			   !list_empty(&xs_state.reply_list));
   13.93 +		spin_lock(&xs_state.reply_lock);
   13.94 +	}
   13.95  
   13.96 -	err = xb_read(ret, msg.len);
   13.97 -	if (err) {
   13.98 -		xfree(ret);
   13.99 -		return ERR_PTR(err);
  13.100 -	}
  13.101 -	((char*)ret)[msg.len] = '\0';
  13.102 +	msg = list_entry(xs_state.reply_list.next,
  13.103 +			 struct xs_stored_msg, list);
  13.104 +	list_del(&msg->list);
  13.105 +
  13.106 +	spin_unlock(&xs_state.reply_lock);
  13.107  
  13.108 -	*type = msg.type;
  13.109 +	*type = msg->hdr.type;
  13.110  	if (len)
  13.111 -		*len = msg.len;
  13.112 -	return ret;
  13.113 +		*len = msg->hdr.len;
  13.114 +	body = msg->u.reply.body;
  13.115 +
  13.116 +	free(msg);
  13.117 +
  13.118 +	return body;
  13.119  }
  13.120  
  13.121  /* Emergency write. */
  13.122  void xenbus_debug_write(const char *str, unsigned int count)
  13.123  {
  13.124 -	struct xsd_sockmsg msg;
  13.125 +	struct xsd_sockmsg msg = { 0 };
  13.126  
  13.127  	msg.type = XS_DEBUG;
  13.128  	msg.len = sizeof("print") + count + 1;
  13.129  
  13.130 +	down(&xs_state.request_mutex);
  13.131  	xb_write(&msg, sizeof(msg));
  13.132  	xb_write("print", sizeof("print"));
  13.133  	xb_write(str, count);
  13.134  	xb_write("", 1);
  13.135 +	up(&xs_state.request_mutex);
  13.136 +}
  13.137 +
  13.138 +void *xenbus_dev_request_and_reply(struct xsd_sockmsg *msg)
  13.139 +{
  13.140 +	void *ret;
  13.141 +	struct xsd_sockmsg req_msg = *msg;
  13.142 +	int err;
  13.143 +
  13.144 +	if (req_msg.type == XS_TRANSACTION_START)
  13.145 +		down_read(&xs_state.suspend_mutex);
  13.146 +
  13.147 +	down(&xs_state.request_mutex);
  13.148 +
  13.149 +	err = xb_write(msg, sizeof(*msg) + msg->len);
  13.150 +	if (err) {
  13.151 +		msg->type = XS_ERROR;
  13.152 +		ret = ERR_PTR(err);
  13.153 +	} else {
  13.154 +		ret = read_reply(&msg->type, &msg->len);
  13.155 +	}
  13.156 +
  13.157 +	up(&xs_state.request_mutex);
  13.158 +
  13.159 +	if ((msg->type == XS_TRANSACTION_END) ||
  13.160 +	    ((req_msg.type == XS_TRANSACTION_START) &&
  13.161 +	     (msg->type == XS_ERROR)))
  13.162 +		up_read(&xs_state.suspend_mutex);
  13.163 +
  13.164 +	return ret;
  13.165  }
  13.166  
  13.167  /* Send message to xs, get kmalloc'ed reply.  ERR_PTR() on error. */
  13.168 -static void *xs_talkv(enum xsd_sockmsg_type type,
  13.169 +static void *xs_talkv(struct xenbus_transaction *t,
  13.170 +		      enum xsd_sockmsg_type type,
  13.171  		      const struct kvec *iovec,
  13.172  		      unsigned int num_vecs,
  13.173  		      unsigned int *len)
  13.174 @@ -115,51 +198,57 @@ static void *xs_talkv(enum xsd_sockmsg_t
  13.175  	unsigned int i;
  13.176  	int err;
  13.177  
  13.178 -	//WARN_ON(down_trylock(&xenbus_lock) == 0);
  13.179 -
  13.180 +	msg.tx_id = (u32)(unsigned long)t;
  13.181 +	msg.req_id = 0;
  13.182  	msg.type = type;
  13.183  	msg.len = 0;
  13.184  	for (i = 0; i < num_vecs; i++)
  13.185  		msg.len += iovec[i].iov_len;
  13.186  
  13.187 +	down(&xs_state.request_mutex);
  13.188 +
  13.189  	err = xb_write(&msg, sizeof(msg));
  13.190 -	if (err)
  13.191 +	if (err) {
  13.192 +		up(&xs_state.request_mutex);
  13.193  		return ERR_PTR(err);
  13.194 +	}
  13.195  
  13.196  	for (i = 0; i < num_vecs; i++) {
  13.197 -		err = xb_write(iovec[i].iov_base, iovec[i].iov_len);
  13.198 -		if (err)
  13.199 +		err = xb_write(iovec[i].iov_base, iovec[i].iov_len);;
  13.200 +		if (err) {
  13.201 +			up(&xs_state.request_mutex);
  13.202  			return ERR_PTR(err);
  13.203 +		}
  13.204  	}
  13.205  
  13.206 -	/* Watches can have fired before reply comes: daemon detects
  13.207 -	 * and re-transmits, so we can ignore this. */
  13.208 -	do {
  13.209 -		xfree(ret);
  13.210 -		ret = read_reply(&msg.type, len);
  13.211 -		if (IS_ERR(ret))
  13.212 -			return ret;
  13.213 -	} while (msg.type == XS_WATCH_EVENT);
  13.214 +	ret = read_reply(&msg.type, len);
  13.215 +
  13.216 +	up(&xs_state.request_mutex);
  13.217 +
  13.218 +	if (IS_ERR(ret))
  13.219 +		return ret;
  13.220  
  13.221  	if (msg.type == XS_ERROR) {
  13.222  		err = get_error(ret);
  13.223 -		xfree(ret);
  13.224 +		free(ret);
  13.225  		return ERR_PTR(-err);
  13.226  	}
  13.227  
  13.228 -	//BUG_ON(msg.type != type);
  13.229 +	//	BUG_ON(msg.type != type);
  13.230  	return ret;
  13.231  }
  13.232  
  13.233  /* Simplified version of xs_talkv: single message. */
  13.234 -static void *xs_single(enum xsd_sockmsg_type type,
  13.235 -		       const char *string, unsigned int *len)
  13.236 +static void *xs_single(struct xenbus_transaction *t,
  13.237 +		       enum xsd_sockmsg_type type,
  13.238 +		       const char *string,
  13.239 +		       unsigned int *len)
  13.240  {
  13.241  	struct kvec iovec;
  13.242  
  13.243  	iovec.iov_base = (void *)string;
  13.244  	iovec.iov_len = strlen(string) + 1;
  13.245 -	return xs_talkv(type, &iovec, 1, len);
  13.246 +	return xs_talkv(t, type, &iovec, 1, len);
  13.247  }
  13.248  
  13.249  /* Many commands only need an ack, don't care what it says. */
  13.250 @@ -167,7 +256,7 @@ static int xs_error(char *reply)
  13.251  {
  13.252  	if (IS_ERR(reply))
  13.253  		return PTR_ERR(reply);
  13.254 -	xfree(reply);
  13.255 +	free(reply);
  13.256  	return 0;
  13.257  }
  13.258  
  13.259 @@ -182,60 +271,76 @@ static unsigned int count_strings(const 
  13.260  	return num;
  13.261  }
  13.262  
  13.263 -/* Return the path to dir with /name appended. */ 
  13.264 +/* Return the path to dir with /name appended. Buffer must be kfree()'ed. */ 
  13.265  static char *join(const char *dir, const char *name)
  13.266  {
  13.267 -	static char buffer[4096];
  13.268 +	char *buffer;
  13.269  
  13.270 -	//BUG_ON(down_trylock(&xenbus_lock) == 0);
  13.271 -	/* XXX FIXME: might not be correct if name == "" */
  13.272 -	//BUG_ON(strlen(dir) + strlen("/") + strlen(name) + 1 > sizeof(buffer));
  13.273 +	buffer = malloc(strlen(dir) + strlen("/") + strlen(name) + 1);
  13.274 +	if (buffer == NULL)
  13.275 +		return ERR_PTR(-ENOMEM);
  13.276  
  13.277  	strcpy(buffer, dir);
  13.278  	if (!streq(name, "")) {
  13.279  		strcat(buffer, "/");
  13.280  		strcat(buffer, name);
  13.281  	}
  13.282 +
  13.283  	return buffer;
  13.284  }
  13.285  
  13.286 -char **xenbus_directory(const char *dir, const char *node, unsigned int *num)
  13.287 +static char **split(char *strings, unsigned int len, unsigned int *num)
  13.288  {
  13.289 -	char *strings, *p, **ret;
  13.290 -	unsigned int len;
  13.291 -
  13.292 -	strings = xs_single(XS_DIRECTORY, join(dir, node), &len);
  13.293 -	if (IS_ERR(strings))
  13.294 -		return (char **)strings;
  13.295 +	char *p, **ret;
  13.296  
  13.297  	/* Count the strings. */
  13.298  	*num = count_strings(strings, len);
  13.299  
  13.300  	/* Transfer to one big alloc for easy freeing. */
  13.301 -	ret = (char **)xmalloc_array(char, *num * sizeof(char *) + len);
  13.302 +	ret = malloc(*num * sizeof(char *) + len);
  13.303  	if (!ret) {
  13.304 -		xfree(strings);
  13.305 +		free(strings);
  13.306  		return ERR_PTR(-ENOMEM);
  13.307  	}
  13.308  	memcpy(&ret[*num], strings, len);
  13.309 -	xfree(strings);
  13.310 +	free(strings);
  13.311  
  13.312  	strings = (char *)&ret[*num];
  13.313  	for (p = strings, *num = 0; p < strings + len; p += strlen(p) + 1)
  13.314  		ret[(*num)++] = p;
  13.315 +
  13.316  	return ret;
  13.317  }
  13.318  
  13.319 +char **xenbus_directory(struct xenbus_transaction *t,
  13.320 +			const char *dir, const char *node, unsigned int *num)
  13.321 +{
  13.322 +	char *strings, *path;
  13.323 +	unsigned int len;
  13.324 +
  13.325 +	path = join(dir, node);
  13.326 +	if (IS_ERR(path))
  13.327 +		return (char **)path;
  13.328 +
  13.329 +	strings = xs_single(t, XS_DIRECTORY, path, &len);
  13.330 +	free(path);
  13.331 +	if (IS_ERR(strings))
  13.332 +		return (char **)strings;
  13.333 +
  13.334 +	return split(strings, len, num);
  13.335 +}
  13.336 +
  13.337  /* Check if a path exists. Return 1 if it does. */
  13.338 -int xenbus_exists(const char *dir, const char *node)
  13.339 +int xenbus_exists(struct xenbus_transaction *t,
  13.340 +		  const char *dir, const char *node)
  13.341  {
  13.342  	char **d;
  13.343  	int dir_n;
  13.344  
  13.345 -	d = xenbus_directory(dir, node, &dir_n);
  13.346 +	d = xenbus_directory(t, dir, node, &dir_n);
  13.347  	if (IS_ERR(d))
  13.348  		return 0;
  13.349 -	xfree(d);
  13.350 +	free(d);
  13.351  	return 1;
  13.352  }
  13.353  
  13.354 @@ -243,92 +348,134 @@ int xenbus_exists(const char *dir, const
  13.355   * Returns a kmalloced value: call free() on it after use.
  13.356   * len indicates length in bytes.
  13.357   */
  13.358 -void *xenbus_read(const char *dir, const char *node, unsigned int *len)
  13.359 +void *xenbus_read(struct xenbus_transaction *t,
  13.360 +		  const char *dir, const char *node, unsigned int *len)
  13.361  {
  13.362 -	return xs_single(XS_READ, join(dir, node), len);
  13.363 +	char *path;
  13.364 +	void *ret;
  13.365 +
  13.366 +	path = join(dir, node);
  13.367 +	if (IS_ERR(path))
  13.368 +		return (void *)path;
  13.369 +
  13.370 +	ret = xs_single(t, XS_READ, path, len);
  13.371 +	free(path);
  13.372 +	return ret;
  13.373  }
  13.374  
  13.375  /* Write the value of a single file.
  13.376 - * Returns -err on failure.  createflags can be 0, O_CREAT, or O_CREAT|O_EXCL.
  13.377 + * Returns -err on failure.
  13.378   */
  13.379 -int xenbus_write(const char *dir, const char *node,
  13.380 -		 const char *string, int createflags)
  13.381 +int xenbus_write(struct xenbus_transaction *t,
  13.382 +		 const char *dir, const char *node, const char *string)
  13.383  {
  13.384 -	const char *flags, *path;
  13.385 -	struct kvec iovec[3];
  13.386 +	const char *path;
  13.387 +	struct kvec iovec[2];
  13.388 +	int ret;
  13.389  
  13.390  	path = join(dir, node);
  13.391 -	/* Format: Flags (as string), path, data. */
  13.392 -	if (createflags == 0)
  13.393 -		flags = XS_WRITE_NONE;
  13.394 -	else if (createflags == O_CREAT)
  13.395 -		flags = XS_WRITE_CREATE;
  13.396 -	else if (createflags == (O_CREAT|O_EXCL))
  13.397 -		flags = XS_WRITE_CREATE_EXCL;
  13.398 -	else
  13.399 -		return -EINVAL;
  13.400 +	if (IS_ERR(path))
  13.401 +		return PTR_ERR(path);
  13.402  
  13.403  	iovec[0].iov_base = (void *)path;
  13.404  	iovec[0].iov_len = strlen(path) + 1;
  13.405 -	iovec[1].iov_base = (void *)flags;
  13.406 -	iovec[1].iov_len = strlen(flags) + 1;
  13.407 -	iovec[2].iov_base = (void *)string;
  13.408 -	iovec[2].iov_len = strlen(string);
  13.409 +	iovec[1].iov_base = (void *)string;
  13.410 +	iovec[1].iov_len = strlen(string);
  13.411  
  13.412 -	return xs_error(xs_talkv(XS_WRITE, iovec, ARRAY_SIZE(iovec), NULL));
  13.413 +	ret = xs_error(xs_talkv(t, XS_WRITE, iovec, ARRAY_SIZE(iovec), NULL));
  13.414 +	free(path);
  13.415 +	return ret;
  13.416  }
  13.417  
  13.418  /* Create a new directory. */
  13.419 -int xenbus_mkdir(const char *dir, const char *node)
  13.420 +int xenbus_mkdir(struct xenbus_transaction *t,
  13.421 +		 const char *dir, const char *node)
  13.422  {
  13.423 -	return xs_error(xs_single(XS_MKDIR, join(dir, node), NULL));
  13.424 +	char *path;
  13.425 +	int ret;
  13.426 +
  13.427 +	path = join(dir, node);
  13.428 +	if (IS_ERR(path))
  13.429 +		return PTR_ERR(path);
  13.430 +
  13.431 +	ret = xs_error(xs_single(t, XS_MKDIR, path, NULL));
  13.432 +	free(path);
  13.433 +	return ret;
  13.434  }
  13.435  
  13.436  /* Destroy a file or directory (directories must be empty). */
  13.437 -int xenbus_rm(const char *dir, const char *node)
  13.438 +int xenbus_rm(struct xenbus_transaction *t, const char *dir, const char *node)
  13.439  {
  13.440 -	return xs_error(xs_single(XS_RM, join(dir, node), NULL));
  13.441 +	char *path;
  13.442 +	int ret;
  13.443 +
  13.444 +	path = join(dir, node);
  13.445 +	if (IS_ERR(path))
  13.446 +		return PTR_ERR(path);
  13.447 +
  13.448 +	ret = xs_error(xs_single(t, XS_RM, path, NULL));
  13.449 +	free(path);
  13.450 +	return ret;
  13.451  }
  13.452  
  13.453  /* Start a transaction: changes by others will not be seen during this
  13.454   * transaction, and changes will not be visible to others until end.
  13.455 - * Transaction only applies to the given subtree.
  13.456 - * You can only have one transaction at any time.
  13.457   */
  13.458 -int xenbus_transaction_start(const char *subtree)
  13.459 +struct xenbus_transaction *xenbus_transaction_start(void)
  13.460  {
  13.461 -	return xs_error(xs_single(XS_TRANSACTION_START, subtree, NULL));
  13.462 +	char *id_str;
  13.463 +	unsigned long id;
  13.464 +
  13.465 +	down_read(&xs_state.suspend_mutex);
  13.466 +
  13.467 +	id_str = xs_single(NULL, XS_TRANSACTION_START, "", NULL);
  13.468 +	if (IS_ERR(id_str)) {
  13.469 +		up_read(&xs_state.suspend_mutex);
  13.470 +		return (struct xenbus_transaction *)id_str;
  13.471 +	}
  13.472 +
  13.473 +	id = simple_strtoul(id_str, NULL, 0);
  13.474 +	free(id_str);
  13.475 +
  13.476 +	return (struct xenbus_transaction *)id;
  13.477  }
  13.478  
  13.479  /* End a transaction.
  13.480   * If abandon is true, transaction is discarded instead of committed.
  13.481   */
  13.482 -int xenbus_transaction_end(int abort)
  13.483 +int xenbus_transaction_end(struct xenbus_transaction *t, int abort)
  13.484  {
  13.485  	char abortstr[2];
  13.486 +	int err;
  13.487  
  13.488  	if (abort)
  13.489  		strcpy(abortstr, "F");
  13.490  	else
  13.491  		strcpy(abortstr, "T");
  13.492 -	return xs_error(xs_single(XS_TRANSACTION_END, abortstr, NULL));
  13.493 +
  13.494 +	err = xs_error(xs_single(t, XS_TRANSACTION_END, abortstr, NULL));
  13.495 +
  13.496 +	up_read(&xs_state.suspend_mutex);
  13.497 +
  13.498 +	return err;
  13.499  }
  13.500  
  13.501  /* Single read and scanf: returns -errno or num scanned. */
  13.502 -int xenbus_scanf(const char *dir, const char *node, const char *fmt, ...)
  13.503 +int xenbus_scanf(struct xenbus_transaction *t,
  13.504 +		 const char *dir, const char *node, const char *fmt, ...)
  13.505  {
  13.506  	va_list ap;
  13.507  	int ret;
  13.508  	char *val;
  13.509  
  13.510 -	val = xenbus_read(dir, node, NULL);
  13.511 +	val = xenbus_read(t, dir, node, NULL);
  13.512  	if (IS_ERR(val))
  13.513  		return PTR_ERR(val);
  13.514  
  13.515  	va_start(ap, fmt);
  13.516  	ret = vsscanf(val, fmt, ap);
  13.517  	va_end(ap);
  13.518 -	xfree(val);
  13.519 +	free(val);
  13.520  	/* Distinctive errno. */
  13.521  	if (ret == 0)
  13.522  		return -ERANGE;
  13.523 @@ -336,23 +483,32 @@ int xenbus_scanf(const char *dir, const 
  13.524  }
  13.525  
  13.526  /* Single printf and write: returns -errno or 0. */
  13.527 -int xenbus_printf(const char *dir, const char *node, const char *fmt, ...)
  13.528 +int xenbus_printf(struct xenbus_transaction *t,
  13.529 +		  const char *dir, const char *node, const char *fmt, ...)
  13.530  {
  13.531  	va_list ap;
  13.532  	int ret;
  13.533 +#define PRINTF_BUFFER_SIZE 4096
  13.534 +	char *printf_buffer;
  13.535  
  13.536 -	//BUG_ON(down_trylock(&xenbus_lock) == 0);
  13.537 +	printf_buffer = malloc(PRINTF_BUFFER_SIZE);
  13.538 +	if (printf_buffer == NULL)
  13.539 +		return -ENOMEM;
  13.540 +
  13.541  	va_start(ap, fmt);
  13.542 -	ret = vsnprintf(printf_buffer, sizeof(printf_buffer), fmt, ap);
  13.543 +	ret = vsnprintf(printf_buffer, PRINTF_BUFFER_SIZE, fmt, ap);
  13.544  	va_end(ap);
  13.545  
  13.546 -	//BUG_ON(ret > sizeof(printf_buffer)-1);
  13.547 -	return xenbus_write(dir, node, printf_buffer, O_CREAT);
  13.548 +	//	BUG_ON(ret > PRINTF_BUFFER_SIZE-1);
  13.549 +	ret = xenbus_write(t, dir, node, printf_buffer);
  13.550 +
  13.551 +	free(printf_buffer);
  13.552 +
  13.553 +	return ret;
  13.554  }
  13.555  
  13.556 -	
  13.557  /* Takes tuples of names, scanf-style args, and void **, NULL terminated. */
  13.558 -int xenbus_gather(const char *dir, ...)
  13.559 +int xenbus_gather(struct xenbus_transaction *t, const char *dir, ...)
  13.560  {
  13.561  	va_list ap;
  13.562  	const char *name;
  13.563 @@ -364,7 +520,7 @@ int xenbus_gather(const char *dir, ...)
  13.564  		void *result = va_arg(ap, void *);
  13.565  		char *p;
  13.566  
  13.567 -		p = xenbus_read(dir, name, NULL);
  13.568 +		p = xenbus_read(t, dir, name, NULL);
  13.569  		if (IS_ERR(p)) {
  13.570  			ret = PTR_ERR(p);
  13.571  			break;
  13.572 @@ -372,7 +528,7 @@ int xenbus_gather(const char *dir, ...)
  13.573  		if (fmt) {
  13.574  			if (sscanf(p, fmt, result) == 0)
  13.575  				ret = -EINVAL;
  13.576 -			xfree(p);
  13.577 +			free(p);
  13.578  		} else
  13.579  			*(char **)result = p;
  13.580  	}
  13.581 @@ -389,31 +545,8 @@ static int xs_watch(const char *path, co
  13.582  	iov[1].iov_base = (void *)token;
  13.583  	iov[1].iov_len = strlen(token) + 1;
  13.584  
  13.585 -	return xs_error(xs_talkv(XS_WATCH, iov, ARRAY_SIZE(iov), NULL));
  13.586 -}
  13.587 -
  13.588 -static char *xs_read_watch(char **token)
  13.589 -{
  13.590 -	enum xsd_sockmsg_type type;
  13.591 -	char *ret;
  13.592 -
  13.593 -	ret = read_reply(&type, NULL);
  13.594 -	if (IS_ERR(ret))
  13.595 -		return ret;
  13.596 -
  13.597 -	//BUG_ON(type != XS_WATCH_EVENT);
  13.598 -	*token = ret + strlen(ret) + 1;
  13.599 -	return ret;
  13.600 -}
  13.601 -
  13.602 -static int xs_acknowledge_watch(const char *token)
  13.603 -{
  13.604 -#if 0
  13.605 -	return xs_error(xs_single(XS_WATCH_ACK, token, NULL));
  13.606 -#else
  13.607 -	/* XS_WATCH_ACK is no longer available */
  13.608 -	return 0;
  13.609 -#endif
  13.610 +	return xs_error(xs_talkv(NULL, XS_WATCH, iov,
  13.611 +				 ARRAY_SIZE(iov), NULL));
  13.612  }
  13.613  
  13.614  static int xs_unwatch(const char *path, const char *token)
  13.615 @@ -425,10 +558,10 @@ static int xs_unwatch(const char *path, 
  13.616  	iov[1].iov_base = (char *)token;
  13.617  	iov[1].iov_len = strlen(token) + 1;
  13.618  
  13.619 -	return xs_error(xs_talkv(XS_UNWATCH, iov, ARRAY_SIZE(iov), NULL));
  13.620 +	return xs_error(xs_talkv(NULL, XS_UNWATCH, iov,
  13.621 +				 ARRAY_SIZE(iov), NULL));
  13.622  }
  13.623  
  13.624 -/* A little paranoia: we don't just trust token. */
  13.625  static struct xenbus_watch *find_watch(const char *token)
  13.626  {
  13.627  	struct xenbus_watch *i, *cmp;
  13.628 @@ -438,6 +571,7 @@ static struct xenbus_watch *find_watch(c
  13.629  	list_for_each_entry(i, &watches, list)
  13.630  		if (i == cmp)
  13.631  			return i;
  13.632 +
  13.633  	return NULL;
  13.634  }
  13.635  
  13.636 @@ -449,111 +583,214 @@ int register_xenbus_watch(struct xenbus_
  13.637  	int err;
  13.638  
  13.639  	sprintf(token, "%lX", (long)watch);
  13.640 -	//BUG_ON(find_watch(token));
  13.641 -printk("Registered watch for: %s\n", token);
  13.642 +
  13.643 +	down_read(&xs_state.suspend_mutex);
  13.644 +
  13.645 +	spin_lock(&watches_lock);
  13.646 +	//	BUG_ON(find_watch(token));
  13.647 +	list_add(&watch->list, &watches);
  13.648 +	spin_unlock(&watches_lock);
  13.649 +
  13.650  	err = xs_watch(watch->node, token);
  13.651 -	if (!err)
  13.652 -		list_add(&watch->list, &watches);
  13.653 +
  13.654 +	/* Ignore errors due to multiple registration. */
  13.655 +	if ((err != 0) && (err != -EEXIST)) {
  13.656 +		spin_lock(&watches_lock);
  13.657 +		list_del(&watch->list);
  13.658 +		spin_unlock(&watches_lock);
  13.659 +	}
  13.660 +
  13.661 +	up_read(&xs_state.suspend_mutex);
  13.662 +
  13.663  	return err;
  13.664  }
  13.665  
  13.666  void unregister_xenbus_watch(struct xenbus_watch *watch)
  13.667  {
  13.668 +	struct xs_stored_msg *msg, *tmp;
  13.669  	char token[sizeof(watch) * 2 + 1];
  13.670  	int err;
  13.671  
  13.672  	sprintf(token, "%lX", (long)watch);
  13.673 -	//BUG_ON(!find_watch(token));
  13.674 +
  13.675 +	down_read(&xs_state.suspend_mutex);
  13.676 +
  13.677 +	spin_lock(&watches_lock);
  13.678 +	//	BUG_ON(!find_watch(token));
  13.679 +	list_del(&watch->list);
  13.680 +	spin_unlock(&watches_lock);
  13.681  
  13.682  	err = xs_unwatch(watch->node, token);
  13.683 -	list_del(&watch->list);
  13.684 -
  13.685  	if (err)
  13.686  		printk("XENBUS Failed to release watch %s: %i\n",
  13.687  		       watch->node, err);
  13.688 +
  13.689 +	up_read(&xs_state.suspend_mutex);
  13.690 +
  13.691 +	/* Cancel pending watch events. */
  13.692 +	spin_lock(&watch_events_lock);
  13.693 +	list_for_each_entry_safe(msg, tmp, &watch_events, list) {
  13.694 +		if (msg->u.watch.handle != watch)
  13.695 +			continue;
  13.696 +		list_del(&msg->list);
  13.697 +		free(msg->u.watch.vec);
  13.698 +		free(msg);
  13.699 +	}
  13.700 +	spin_unlock(&watch_events_lock);
  13.701  }
  13.702  
  13.703 -/* Re-register callbacks to all watches. */
  13.704 -void reregister_xenbus_watches(void)
  13.705 +void xs_suspend(void)
  13.706 +{
  13.707 +	down_write(&xs_state.suspend_mutex);
  13.708 +	down(&xs_state.request_mutex);
  13.709 +}
  13.710 +
  13.711 +void xs_resume(void)
  13.712  {
  13.713  	struct xenbus_watch *watch;
  13.714  	char token[sizeof(watch) * 2 + 1];
  13.715  
  13.716 +	up(&xs_state.request_mutex);
  13.717 +
  13.718 +	/* No need for watches_lock: the suspend_mutex is sufficient. */
  13.719  	list_for_each_entry(watch, &watches, list) {
  13.720  		sprintf(token, "%lX", (long)watch);
  13.721  		xs_watch(watch->node, token);
  13.722  	}
  13.723 +
  13.724 +	up_write(&xs_state.suspend_mutex);
  13.725  }
  13.726  
  13.727 -void watch_thread(void *unused)
  13.728 +static void xenwatch_thread(void *unused)
  13.729  {
  13.730 +	struct list_head *ent;
  13.731 +	struct xs_stored_msg *msg;
  13.732 +
  13.733  	for (;;) {
  13.734 -		char *token;
  13.735 -		char *node = NULL;
  13.736 +		wait_event(watch_events_waitq,
  13.737 +			   !list_empty(&watch_events));
  13.738  
  13.739 -		wait_event(xb_waitq, xs_input_avail());
  13.740 +		down(&xenwatch_mutex);
  13.741  
  13.742 -		/* If this is a spurious wakeup caused by someone
  13.743 -		 * doing an op, they'll hold the lock and the buffer
  13.744 -		 * will be empty by the time we get there.		 
  13.745 -		 */
  13.746 -		down(&xenbus_lock);
  13.747 -		if (xs_input_avail())
  13.748 -			node = xs_read_watch(&token);
  13.749 +		spin_lock(&watch_events_lock);
  13.750 +		ent = watch_events.next;
  13.751 +		if (ent != &watch_events)
  13.752 +			list_del(ent);
  13.753 +		spin_unlock(&watch_events_lock);
  13.754  
  13.755 -		if (node && !IS_ERR(node)) {
  13.756 -			struct xenbus_watch *w;
  13.757 -			int err;
  13.758 +		if (ent != &watch_events) {
  13.759 +			msg = list_entry(ent, struct xs_stored_msg, list);
  13.760 +			msg->u.watch.handle->callback(
  13.761 +				msg->u.watch.handle,
  13.762 +				(const char **)msg->u.watch.vec,
  13.763 +				msg->u.watch.vec_size);
  13.764 +			free(msg->u.watch.vec);
  13.765 +			free(msg);
  13.766 +		}
  13.767  
  13.768 -			err = xs_acknowledge_watch(token);
  13.769 -			if (err)
  13.770 -				printk("XENBUS ack %s fail %i\n", node, err);
  13.771 -			w = find_watch(token);
  13.772 -			//BUG_ON(!w);
  13.773 -			w->callback(w, node);
  13.774 -			xfree(node);
  13.775 -		} else
  13.776 -			printk("XENBUS xs_read_watch: %li\n", PTR_ERR(node));
  13.777 -		up(&xenbus_lock);
  13.778 +		up(&xenwatch_mutex);
  13.779  	}
  13.780  }
  13.781  
  13.782 -
  13.783 -static void ballon_changed(struct xenbus_watch *watch, const char *node)
  13.784 +static int process_msg(void)
  13.785  {
  13.786 -    unsigned long new_target;
  13.787 -    int err;
  13.788 -    err = xenbus_scanf("memory", "target", "%lu", &new_target);
  13.789 +	struct xs_stored_msg *msg;
  13.790 +	char *body;
  13.791 +	int err;
  13.792 +
  13.793 +	msg = malloc(sizeof(*msg));
  13.794 +	if (msg == NULL)
  13.795 +		return -ENOMEM;
  13.796 +
  13.797 +	err = xb_read(&msg->hdr, sizeof(msg->hdr));
  13.798 +	if (err) {
  13.799 +		free(msg);
  13.800 +		return err;
  13.801 +	}
  13.802 +
  13.803 +	body = malloc(msg->hdr.len + 1);
  13.804 +	if (body == NULL) {
  13.805 +		free(msg);
  13.806 +		return -ENOMEM;
  13.807 +	}
  13.808 +
  13.809 +	err = xb_read(body, msg->hdr.len);
  13.810 +	if (err) {
  13.811 +		free(body);
  13.812 +		free(msg);
  13.813 +		return err;
  13.814 +	}
  13.815 +	body[msg->hdr.len] = '\0';
  13.816  
  13.817 -    if(err != 1)
  13.818 -    {
  13.819 -        printk("Unable to read memory/target\n");
  13.820 -        return;
  13.821 -    }
  13.822 +	if (msg->hdr.type == XS_WATCH_EVENT) {
  13.823 +		msg->u.watch.vec = split(body, msg->hdr.len,
  13.824 +					 &msg->u.watch.vec_size);
  13.825 +		if (IS_ERR(msg->u.watch.vec)) {
  13.826 +			free(msg);
  13.827 +			return PTR_ERR(msg->u.watch.vec);
  13.828 +		}
  13.829  
  13.830 -    printk("Memory target changed to: %ld bytes, ignoring.\n", new_target);
  13.831 +		spin_lock(&watches_lock);
  13.832 +		msg->u.watch.handle = find_watch(
  13.833 +			msg->u.watch.vec[XS_WATCH_TOKEN]);
  13.834 +		if (msg->u.watch.handle != NULL) {
  13.835 +			spin_lock(&watch_events_lock);
  13.836 +			list_add_tail(&msg->list, &watch_events);
  13.837 +			wake_up(&watch_events_waitq);
  13.838 +			spin_unlock(&watch_events_lock);
  13.839 +		} else {
  13.840 +			free(msg->u.watch.vec);
  13.841 +			free(msg);
  13.842 +		}
  13.843 +		spin_unlock(&watches_lock);
  13.844 +	} else {
  13.845 +		msg->u.reply.body = body;
  13.846 +		spin_lock(&xs_state.reply_lock);
  13.847 +		list_add_tail(&msg->list, &xs_state.reply_list);
  13.848 +		spin_unlock(&xs_state.reply_lock);
  13.849 +		wake_up(&xs_state.reply_waitq);
  13.850 +	}
  13.851 +
  13.852 +	return 0;
  13.853  }
  13.854  
  13.855 +static void xenbus_thread(void *unused)
  13.856 +{
  13.857 +	int err;
  13.858  
  13.859 -static struct xenbus_watch ballon_watch = {
  13.860 -    .node = "memory/target",
  13.861 -    .callback = ballon_changed,
  13.862 -};
  13.863 -
  13.864 -
  13.865 +	for (;;) {
  13.866 +		err = process_msg();
  13.867 +		if (err)
  13.868 +			printk("XENBUS error %d while reading "
  13.869 +			       "message\n", err);
  13.870 +	}
  13.871 +}
  13.872  
  13.873  int xs_init(void)
  13.874  {
  13.875  	int err;
  13.876 -	struct thread *watcher;
  13.877 -    printk("xb_init_comms\n");
  13.878 +	struct thread *kxwatcher_thread;
  13.879 +	struct thread *kxenbus_thread;
  13.880 +
  13.881 +	INIT_LIST_HEAD(&xs_state.reply_list);
  13.882 +	spin_lock_init(&xs_state.reply_lock);
  13.883 +	init_waitqueue_head(&xs_state.reply_waitq);
  13.884 +
  13.885 +	init_MUTEX(&xs_state.request_mutex);
  13.886 +	init_rwsem(&xs_state.suspend_mutex);
  13.887 +
  13.888 +	/* Initialize the shared memory rings to talk to xenstored */
  13.889  	err = xb_init_comms();
  13.890  	if (err)
  13.891  		return err;
  13.892 -	
  13.893 -	watcher = create_thread("kxwatch", watch_thread, NULL);
  13.894 -    down(&xenbus_lock);
  13.895 -    register_xenbus_watch(&ballon_watch);
  13.896 -    up(&xenbus_lock);
  13.897 +
  13.898 +	kxwatcher_thread = create_thread("kxwatch", xenwatch_thread, NULL);
  13.899 +	if (IS_ERR(kxwatcher_thread))
  13.900 +		return PTR_ERR(kxwatcher_thread);
  13.901 +
  13.902 +	kxenbus_thread = create_thread("kxenbus", xenbus_thread, NULL);
  13.903 +	if (IS_ERR(kxenbus_thread))
  13.904 +		return PTR_ERR(kxenbus_thread);
  13.905 +
  13.906  	return 0;
  13.907  }
    14.1 --- a/linux-2.6-xen-sparse/drivers/xen/balloon/balloon.c	Thu Dec 08 15:04:31 2005 +0000
    14.2 +++ b/linux-2.6-xen-sparse/drivers/xen/balloon/balloon.c	Thu Dec 08 15:04:41 2005 +0000
    14.3 @@ -192,8 +192,8 @@ static int increase_reservation(unsigned
    14.4  		page = balloon_retrieve();
    14.5  		BUG_ON(page == NULL);
    14.6  
    14.7 -		pfn = page - mem_map;
    14.8 -		BUG_ON(phys_to_machine_mapping[pfn] != INVALID_P2M_ENTRY);
    14.9 +		pfn = page_to_pfn(page);
   14.10 +		BUG_ON(phys_to_machine_mapping_valid(pfn));
   14.11  
   14.12  		/* Update P->M and M->P tables. */
   14.13  		set_phys_to_machine(pfn, mfn_list[i]);
   14.14 @@ -253,8 +253,8 @@ static int decrease_reservation(unsigned
   14.15  			break;
   14.16  		}
   14.17  
   14.18 -		pfn = page - mem_map;
   14.19 -		mfn_list[i] = phys_to_machine_mapping[pfn];
   14.20 +		pfn = page_to_pfn(page);
   14.21 +		mfn_list[i] = pfn_to_mfn(pfn);
   14.22  
   14.23  		if (!PageHighMem(page)) {
   14.24  			v = phys_to_virt(pfn << PAGE_SHIFT);
   14.25 @@ -444,6 +444,9 @@ static int __init balloon_init(void)
   14.26  
   14.27  	IPRINTK("Initialising balloon driver.\n");
   14.28  
   14.29 +	if (xen_init() < 0)
   14.30 +		return -1;
   14.31 +
   14.32  	current_pages = min(xen_start_info->nr_pages, max_pfn);
   14.33  	target_pages  = current_pages;
   14.34  	balloon_low   = 0;
   14.35 @@ -465,7 +468,7 @@ static int __init balloon_init(void)
   14.36      
   14.37  	/* Initialise the balloon with excess memory space. */
   14.38  	for (pfn = xen_start_info->nr_pages; pfn < max_pfn; pfn++) {
   14.39 -		page = &mem_map[pfn];
   14.40 +		page = pfn_to_page(pfn);
   14.41  		if (!PageReserved(page))
   14.42  			balloon_append(page);
   14.43  	}
    15.1 --- a/linux-2.6-xen-sparse/include/asm-xen/asm-i386/page.h	Thu Dec 08 15:04:31 2005 +0000
    15.2 +++ b/linux-2.6-xen-sparse/include/asm-xen/asm-i386/page.h	Thu Dec 08 15:04:41 2005 +0000
    15.3 @@ -65,6 +65,8 @@
    15.4  extern unsigned long *phys_to_machine_mapping;
    15.5  #define pfn_to_mfn(pfn)	\
    15.6  (phys_to_machine_mapping[(unsigned int)(pfn)] & ~(1UL<<31))
    15.7 +#define	phys_to_machine_mapping_valid(pfn) \
    15.8 +	(phys_to_machine_mapping[pfn] != INVALID_P2M_ENTRY)
    15.9  static inline unsigned long mfn_to_pfn(unsigned long mfn)
   15.10  {
   15.11  	unsigned long pfn;
    16.1 --- a/linux-2.6-xen-sparse/include/asm-xen/asm-ia64/hypercall.h	Thu Dec 08 15:04:31 2005 +0000
    16.2 +++ b/linux-2.6-xen-sparse/include/asm-xen/asm-ia64/hypercall.h	Thu Dec 08 15:04:41 2005 +0000
    16.3 @@ -355,34 +355,27 @@ HYPERVISOR_multicall(
    16.4  #endif
    16.5      return 1;
    16.6  }
    16.7 +#endif
    16.8  
    16.9  static inline int
   16.10  HYPERVISOR_update_va_mapping(
   16.11      unsigned long va, pte_t new_val, unsigned long flags)
   16.12  {
   16.13 -#if 0
   16.14 -    int ret;
   16.15 -    unsigned long ign1, ign2, ign3;
   16.16 -
   16.17 -    __asm__ __volatile__ (
   16.18 -        TRAP_INSTR
   16.19 -        : "=a" (ret), "=b" (ign1), "=c" (ign2), "=d" (ign3)
   16.20 -	: "0" (__HYPERVISOR_update_va_mapping), 
   16.21 -          "1" (va), "2" ((new_val).pte_low), "3" (flags)
   16.22 -	: "memory" );
   16.23 -
   16.24 -    if ( unlikely(ret < 0) )
   16.25 -    {
   16.26 -        printk(KERN_ALERT "Failed update VA mapping: %08lx, %08lx, %08lx\n",
   16.27 -               va, (new_val).pte_low, flags);
   16.28 -        BUG();
   16.29 -    }
   16.30 -
   16.31 -    return ret;
   16.32 -#endif
   16.33 +    /* no-op */
   16.34      return 1;
   16.35  }
   16.36 -#endif
   16.37 +
   16.38 +static inline int
   16.39 +HYPERVISOR_memory_op(
   16.40 +    unsigned int cmd, void *arg)
   16.41 +{
   16.42 +    int ret;
   16.43 +    __asm__ __volatile__ ( ";; mov r14=%2 ; mov r15=%3 ; mov r2=%1 ; break 0x1000 ;; mov %0=r8 ;;"
   16.44 +        : "=r" (ret)
   16.45 +        : "i" (__HYPERVISOR_console_io), "r"(cmd), "r"(arg)
   16.46 +        : "r14","r15","r2","r8","memory" );
   16.47 +    return ret;
   16.48 +}
   16.49  
   16.50  static inline int
   16.51  HYPERVISOR_event_channel_op(
    17.1 --- a/linux-2.6-xen-sparse/include/asm-xen/asm-ia64/hypervisor.h	Thu Dec 08 15:04:31 2005 +0000
    17.2 +++ b/linux-2.6-xen-sparse/include/asm-xen/asm-ia64/hypervisor.h	Thu Dec 08 15:04:41 2005 +0000
    17.3 @@ -52,4 +52,19 @@ void force_evtchn_callback(void);
    17.4  #define	mfn_to_pfn(x)	(x)
    17.5  #define machine_to_phys_mapping 0
    17.6  
    17.7 +// for drivers/xen/balloon/balloon.c
    17.8 +#ifdef CONFIG_XEN_SCRUB_PAGES
    17.9 +#define scrub_pages(_p,_n) memset((void *)(_p), 0, (_n) << PAGE_SHIFT)
   17.10 +#else
   17.11 +#define scrub_pages(_p,_n) ((void)0)
   17.12 +#endif
   17.13 +#define	pte_mfn(_x)	pte_pfn(_x)
   17.14 +#define INVALID_P2M_ENTRY	(~0UL)
   17.15 +#define __pte_ma(_x)	((pte_t) {(_x)})
   17.16 +#define phys_to_machine_mapping_valid(_x)	(1)
   17.17 +#define	kmap_flush_unused()	do {} while (0)
   17.18 +#define set_phys_to_machine(_x,_y)	do {} while (0)
   17.19 +#define xen_machphys_update(_x,_y)	do {} while (0)
   17.20 +#define pfn_pte_ma(_x,_y)	__pte_ma(0)
   17.21 +
   17.22  #endif /* __HYPERVISOR_H__ */
    18.1 --- a/linux-2.6-xen-sparse/include/asm-xen/asm-x86_64/page.h	Thu Dec 08 15:04:31 2005 +0000
    18.2 +++ b/linux-2.6-xen-sparse/include/asm-xen/asm-x86_64/page.h	Thu Dec 08 15:04:41 2005 +0000
    18.3 @@ -67,6 +67,8 @@ void copy_page(void *, void *);
    18.4  extern unsigned long *phys_to_machine_mapping;
    18.5  #define pfn_to_mfn(pfn)	\
    18.6  (phys_to_machine_mapping[(unsigned int)(pfn)] & ~(1UL << 63))
    18.7 +#define	phys_to_machine_mapping_valid(pfn) \
    18.8 +	(phys_to_machine_mapping[pfn] != INVALID_P2M_ENTRY)
    18.9  static inline unsigned long mfn_to_pfn(unsigned long mfn)
   18.10  {
   18.11  	unsigned long pfn;
    19.1 --- a/xen/arch/x86/shadow.c	Thu Dec 08 15:04:31 2005 +0000
    19.2 +++ b/xen/arch/x86/shadow.c	Thu Dec 08 15:04:41 2005 +0000
    19.3 @@ -1450,6 +1450,7 @@ static int resync_all(struct domain *d, 
    19.4      int changed;
    19.5      u32 min_max_shadow, min_max_snapshot;
    19.6      int min_shadow, max_shadow, min_snapshot, max_snapshot;
    19.7 +    struct vcpu *v;
    19.8  
    19.9      ASSERT(shadow_lock_is_acquired(d));
   19.10  
   19.11 @@ -1739,6 +1740,9 @@ static int resync_all(struct domain *d, 
   19.12  
   19.13          if ( unlikely(unshadow) )
   19.14          {
   19.15 +            for_each_vcpu(d, v)
   19.16 +                if(smfn == pagetable_get_pfn(v->arch.shadow_table))
   19.17 +                    return need_flush;
   19.18              perfc_incrc(unshadow_l2_count);
   19.19              shadow_unpin(smfn);
   19.20  #if CONFIG_PAGING_LEVELS == 2
    20.1 --- a/xen/arch/x86/shadow32.c	Thu Dec 08 15:04:31 2005 +0000
    20.2 +++ b/xen/arch/x86/shadow32.c	Thu Dec 08 15:04:41 2005 +0000
    20.3 @@ -2326,6 +2326,7 @@ static int resync_all(struct domain *d, 
    20.4      int changed;
    20.5      u32 min_max_shadow, min_max_snapshot;
    20.6      int min_shadow, max_shadow, min_snapshot, max_snapshot;
    20.7 +    struct vcpu *v;
    20.8  
    20.9      ASSERT(shadow_lock_is_acquired(d));
   20.10  
   20.11 @@ -2527,6 +2528,9 @@ static int resync_all(struct domain *d, 
   20.12  
   20.13          if ( unlikely(unshadow) )
   20.14          {
   20.15 +            for_each_vcpu(d, v)
   20.16 +                if(smfn == pagetable_get_pfn(v->arch.shadow_table))
   20.17 +                    return need_flush;
   20.18              perfc_incrc(unshadow_l2_count);
   20.19              shadow_unpin(smfn);
   20.20              if ( unlikely(shadow_mode_external(d)) )
    21.1 --- a/xen/arch/x86/vmx.c	Thu Dec 08 15:04:31 2005 +0000
    21.2 +++ b/xen/arch/x86/vmx.c	Thu Dec 08 15:04:41 2005 +0000
    21.3 @@ -108,7 +108,7 @@ void vmx_relinquish_resources(struct vcp
    21.4      destroy_vmcs(&v->arch.arch_vmx);
    21.5      free_monitor_pagetable(v);
    21.6      vpit = &v->domain->arch.vmx_platform.vmx_pit;
    21.7 -    if ( vpit->ticking && active_ac_timer(&(vpit->pit_timer)) )
    21.8 +    if ( active_ac_timer(&(vpit->pit_timer)) )
    21.9          rem_ac_timer(&vpit->pit_timer);
   21.10      if ( active_ac_timer(&v->arch.arch_vmx.hlt_timer) ) {
   21.11          rem_ac_timer(&v->arch.arch_vmx.hlt_timer);
   21.12 @@ -905,7 +905,7 @@ vmx_world_save(struct vcpu *v, struct vm
   21.13  int
   21.14  vmx_world_restore(struct vcpu *v, struct vmx_assist_context *c)
   21.15  {
   21.16 -    unsigned long mfn, old_cr4;
   21.17 +    unsigned long mfn, old_cr4, old_base_mfn;
   21.18      int error = 0;
   21.19  
   21.20      error |= __vmwrite(GUEST_RIP, c->eip);
   21.21 @@ -945,7 +945,12 @@ vmx_world_restore(struct vcpu *v, struct
   21.22              return 0;
   21.23          }
   21.24          mfn = get_mfn_from_pfn(c->cr3 >> PAGE_SHIFT);
   21.25 +        if(!get_page(pfn_to_page(mfn), v->domain))
   21.26 +                return 0;
   21.27 +        old_base_mfn = pagetable_get_pfn(v->arch.guest_table);
   21.28          v->arch.guest_table = mk_pagetable(mfn << PAGE_SHIFT);
   21.29 +        if (old_base_mfn)
   21.30 +             put_page(pfn_to_page(old_base_mfn));
   21.31          update_pagetables(v);
   21.32          /*
   21.33           * arch.shadow_table should now hold the next CR3 for shadow
   21.34 @@ -1174,9 +1179,11 @@ static int vmx_set_cr0(unsigned long val
   21.35      }
   21.36  
   21.37      if(!((value & X86_CR0_PE) && (value & X86_CR0_PG)) && paging_enabled)
   21.38 -        if(v->arch.arch_vmx.cpu_cr3)
   21.39 +        if(v->arch.arch_vmx.cpu_cr3){
   21.40              put_page(pfn_to_page(get_mfn_from_pfn(
   21.41                        v->arch.arch_vmx.cpu_cr3 >> PAGE_SHIFT)));
   21.42 +            v->arch.guest_table = mk_pagetable(0);
   21.43 +        }
   21.44  
   21.45      /*
   21.46       * VMX does not implement real-mode virtualization. We emulate
    22.1 --- a/xen/arch/x86/vmx_intercept.c	Thu Dec 08 15:04:31 2005 +0000
    22.2 +++ b/xen/arch/x86/vmx_intercept.c	Thu Dec 08 15:04:41 2005 +0000
    22.3 @@ -387,7 +387,6 @@ void vmx_hooks_assist(struct vcpu *v)
    22.4          }
    22.5          else {
    22.6              init_ac_timer(&vpit->pit_timer, pit_timer_fn, v, v->processor);
    22.7 -            vpit->ticking = 1;
    22.8          }
    22.9  
   22.10          /* init count for this channel */
    23.1 --- a/xen/arch/x86/vmx_io.c	Thu Dec 08 15:04:31 2005 +0000
    23.2 +++ b/xen/arch/x86/vmx_io.c	Thu Dec 08 15:04:41 2005 +0000
    23.3 @@ -748,7 +748,7 @@ void vmx_check_events(struct vcpu *v)
    23.4  {
    23.5      /* clear the event *before* checking for work. This should avoid
    23.6         the set-and-check races */
    23.7 -    if (vmx_clear_pending_io_event(current))
    23.8 +    if (vmx_clear_pending_io_event(v))
    23.9          vmx_io_assist(v);
   23.10  }
   23.11  
   23.12 @@ -793,29 +793,39 @@ static __inline__ int find_highest_irq(u
   23.13      return __fls(pintr[0]);
   23.14  }
   23.15  
   23.16 +void set_tsc_shift(struct vcpu *v,struct vmx_virpit *vpit)
   23.17 +{
   23.18 +    u64   drift;
   23.19 +
   23.20 +    if ( vpit->first_injected )
   23.21 +        drift = vpit->period_cycles * vpit->pending_intr_nr;
   23.22 +    else 
   23.23 +        drift = 0;
   23.24 +    drift = v->arch.arch_vmx.tsc_offset - drift;
   23.25 +    __vmwrite(TSC_OFFSET, drift);
   23.26 +
   23.27 +#if defined (__i386__)
   23.28 +    __vmwrite(TSC_OFFSET_HIGH, (drift >> 32));
   23.29 +#endif
   23.30 +}
   23.31 +
   23.32  #define BSP_CPU(v)    (!(v->vcpu_id))
   23.33  static inline void
   23.34  interrupt_post_injection(struct vcpu * v, int vector, int type)
   23.35  {
   23.36      struct vmx_virpit *vpit = &(v->domain->arch.vmx_platform.vmx_pit);
   23.37 -    u64    drift;
   23.38  
   23.39      if ( is_pit_irq(v, vector, type) ) {
   23.40          if ( !vpit->first_injected ) {
   23.41 +            vpit->pending_intr_nr = 0;
   23.42 +            vpit->scheduled = NOW() + vpit->period;
   23.43 +            set_ac_timer(&vpit->pit_timer, vpit->scheduled);
   23.44              vpit->first_injected = 1;
   23.45 -            vpit->pending_intr_nr = 0;
   23.46          } else {
   23.47              vpit->pending_intr_nr--;
   23.48          }
   23.49          vpit->inject_point = NOW();
   23.50 -        drift = vpit->period_cycles * vpit->pending_intr_nr;
   23.51 -        drift = v->arch.arch_vmx.tsc_offset - drift;
   23.52 -        __vmwrite(TSC_OFFSET, drift);
   23.53 -
   23.54 -#if defined (__i386__)
   23.55 -        __vmwrite(TSC_OFFSET_HIGH, (drift >> 32));
   23.56 -#endif
   23.57 -
   23.58 +        set_tsc_shift (v, vpit);
   23.59      }
   23.60  
   23.61      switch(type)
   23.62 @@ -982,8 +992,10 @@ void vmx_do_resume(struct vcpu *v)
   23.63              vmx_wait_io();
   23.64      }
   23.65      /* pick up the elapsed PIT ticks and re-enable pit_timer */
   23.66 -    if ( vpit->ticking )
   23.67 +    if ( vpit->first_injected ) {
   23.68          pickup_deactive_ticks(vpit);
   23.69 +    }
   23.70 +    set_tsc_shift(v,vpit);
   23.71  
   23.72      /* We can't resume the guest if we're waiting on I/O */
   23.73      ASSERT(!test_bit(ARCH_VMX_IO_WAIT, &v->arch.arch_vmx.flags));
    24.1 --- a/xen/arch/x86/vmx_vmcs.c	Thu Dec 08 15:04:31 2005 +0000
    24.2 +++ b/xen/arch/x86/vmx_vmcs.c	Thu Dec 08 15:04:41 2005 +0000
    24.3 @@ -243,9 +243,6 @@ static void vmx_setup_platform(struct do
    24.4  {
    24.5      struct vmx_platform *platform;
    24.6  
    24.7 -    if (!(VMX_DOMAIN(current) && (current->vcpu_id == 0)))
    24.8 -        return;
    24.9 -
   24.10      vmx_map_io_shared_page(d);
   24.11      vmx_set_vcpu_nr(d);
   24.12  
   24.13 @@ -290,6 +287,7 @@ static void vmx_do_launch(struct vcpu *v
   24.14  /* Update CR3, GDT, LDT, TR */
   24.15      unsigned int  error = 0;
   24.16      unsigned long cr0, cr4;
   24.17 +    u64     host_tsc;
   24.18  
   24.19      if (v->vcpu_id == 0)
   24.20          vmx_setup_platform(v->domain);
   24.21 @@ -337,6 +335,10 @@ static void vmx_do_launch(struct vcpu *v
   24.22      __vmwrite(HOST_RSP, (unsigned long)get_stack_bottom());
   24.23  
   24.24      v->arch.schedule_tail = arch_vmx_do_resume;
   24.25 +    /* init guest tsc to start from 0 */
   24.26 +    rdtscll(host_tsc);
   24.27 +    v->arch.arch_vmx.tsc_offset = 0 - host_tsc;
   24.28 +    set_tsc_shift (v, &v->domain->arch.vmx_platform.vmx_pit);
   24.29  }
   24.30  
   24.31  /*
   24.32 @@ -366,7 +368,6 @@ static inline int construct_init_vmcs_gu
   24.33      error |= __vmwrite(PAGE_FAULT_ERROR_CODE_MATCH, 0);
   24.34  
   24.35      /* TSC */
   24.36 -    error |= __vmwrite(TSC_OFFSET, 0);
   24.37      error |= __vmwrite(CR3_TARGET_COUNT, 0);
   24.38  
   24.39      /* Guest Selectors */
    25.1 --- a/xen/include/asm-x86/vmx_vpit.h	Thu Dec 08 15:04:31 2005 +0000
    25.2 +++ b/xen/include/asm-x86/vmx_vpit.h	Thu Dec 08 15:04:41 2005 +0000
    25.3 @@ -27,7 +27,6 @@ struct vmx_virpit {
    25.4      unsigned int pending_intr_nr; /* the couner for pending timer interrupts */
    25.5      u32 period;		/* pit frequency in ns */
    25.6      int first_injected;                 /* flag to prevent shadow window */
    25.7 -    int ticking;    /* indicating it is ticking */
    25.8  
    25.9      /* virtual PIT state for handle related I/O */
   25.10      int read_state;
   25.11 @@ -51,5 +50,6 @@ static __inline__ s_time_t get_pit_sched
   25.12      else
   25.13          return -1;
   25.14  }
   25.15 +extern void set_tsc_shift(struct vcpu *v,struct vmx_virpit *vpit);
   25.16  
   25.17  #endif /* _VMX_VIRPIT_H_ */