ia64/xen-unstable

changeset 8172:43582de050c6

Merged.
author emellor@leeni.uk.xensource.com
date Fri Dec 02 01:00:01 2005 +0000 (2005-12-02)
parents 36f09499bd8c f62f9b1732b9
children 5d8a5e7187d5
files
line diff
     1.1 --- a/linux-2.6-xen-sparse/arch/xen/i386/kernel/setup.c	Thu Dec 01 11:15:31 2005 +0000
     1.2 +++ b/linux-2.6-xen-sparse/arch/xen/i386/kernel/setup.c	Fri Dec 02 01:00:01 2005 +0000
     1.3 @@ -1752,7 +1752,7 @@ void __init setup_arch(char **cmdline_p)
     1.4  #endif
     1.5  #endif
     1.6  	} else {
     1.7 -#ifdef CONFIG_XEN_PRIVILEGED_GUEST
     1.8 +#ifdef CONFIG_XEN_PHYSDEV_ACCESS
     1.9  		extern const struct consw xennull_con;
    1.10  		extern int console_use_vt;
    1.11  #if defined(CONFIG_VGA_CONSOLE)
     2.1 --- a/linux-2.6-xen-sparse/drivers/xen/blkback/blkback.c	Thu Dec 01 11:15:31 2005 +0000
     2.2 +++ b/linux-2.6-xen-sparse/drivers/xen/blkback/blkback.c	Fri Dec 02 01:00:01 2005 +0000
     2.3 @@ -481,6 +481,7 @@ static void make_response(blkif_t *blkif
     2.4  	blkif_response_t *resp;
     2.5  	unsigned long     flags;
     2.6  	blkif_back_ring_t *blk_ring = &blkif->blk_ring;
     2.7 +	int notify;
     2.8  
     2.9  	/* Place on the response ring for the relevant domain. */ 
    2.10  	spin_lock_irqsave(&blkif->blk_ring_lock, flags);
    2.11 @@ -488,13 +489,23 @@ static void make_response(blkif_t *blkif
    2.12  	resp->id        = id;
    2.13  	resp->operation = op;
    2.14  	resp->status    = st;
    2.15 -	wmb(); /* Ensure other side can see the response fields. */
    2.16  	blk_ring->rsp_prod_pvt++;
    2.17 -	RING_PUSH_RESPONSES(blk_ring);
    2.18 +	RING_PUSH_RESPONSES_AND_CHECK_NOTIFY(blk_ring, notify);
    2.19  	spin_unlock_irqrestore(&blkif->blk_ring_lock, flags);
    2.20  
    2.21 -	/* Kick the relevant domain. */
    2.22 -	notify_remote_via_irq(blkif->irq);
    2.23 +	/*
    2.24 +         * Tail check for pending requests. Allows frontend to avoid
    2.25 +         * notifications if requests are already in flight (lower overheads
    2.26 +         * and promotes batching).
    2.27 +         */
    2.28 +	if (!__on_blkdev_list(blkif) &&
    2.29 +	    RING_HAS_UNCONSUMED_REQUESTS(blk_ring)) {
    2.30 +		add_to_blkdev_list_tail(blkif);
    2.31 +		maybe_trigger_blkio_schedule();
    2.32 +	}
    2.33 +
    2.34 +	if (notify)
    2.35 +		notify_remote_via_irq(blkif->irq);
    2.36  }
    2.37  
    2.38  void blkif_deschedule(blkif_t *blkif)
     3.1 --- a/linux-2.6-xen-sparse/drivers/xen/blkfront/blkfront.c	Thu Dec 01 11:15:31 2005 +0000
     3.2 +++ b/linux-2.6-xen-sparse/drivers/xen/blkfront/blkfront.c	Fri Dec 02 01:00:01 2005 +0000
     3.3 @@ -300,6 +300,10 @@ static void backend_changed(struct xenbu
     3.4  /* ** Connection ** */
     3.5  
     3.6  
     3.7 +/* 
     3.8 +** Invoked when the backend is finally 'ready' (and has told produced 
     3.9 +** the details about the physical device - #sectors, size, etc). 
    3.10 +*/
    3.11  static void connect(struct blkfront_info *info)
    3.12  {
    3.13  	unsigned long sectors, sector_size;
    3.14 @@ -324,20 +328,17 @@ static void connect(struct blkfront_info
    3.15  		return;
    3.16  	}
    3.17  	
    3.18 -        info->connected = BLKIF_STATE_CONNECTED;
    3.19          xlvbd_add(sectors, info->vdevice, binfo, sector_size, info);
    3.20 -	
    3.21 -	err = xenbus_switch_state(info->xbdev, NULL, XenbusStateConnected);
    3.22 -	if (err)
    3.23 -		return;
    3.24 +
    3.25 +	(void)xenbus_switch_state(info->xbdev, NULL, XenbusStateConnected); 
    3.26  	
    3.27  	/* Kick pending requests. */
    3.28  	spin_lock_irq(&blkif_io_lock);
    3.29 +	info->connected = BLKIF_STATE_CONNECTED;
    3.30  	kick_pending_request_queues(info);
    3.31  	spin_unlock_irq(&blkif_io_lock);
    3.32  }
    3.33  
    3.34 -
    3.35  /**
    3.36   * Handle the change of state of the backend to Closing.  We must delete our
    3.37   * device-layer structures now, to ensure that writes are flushed through to
    3.38 @@ -394,8 +395,17 @@ static inline void ADD_ID_TO_FREELIST(
    3.39  
    3.40  static inline void flush_requests(struct blkfront_info *info)
    3.41  {
    3.42 +	RING_IDX old_prod = info->ring.sring->req_prod;
    3.43 +
    3.44  	RING_PUSH_REQUESTS(&info->ring);
    3.45 -	notify_remote_via_irq(info->irq);
    3.46 +
    3.47 +	/*
    3.48 +         * Send new requests /then/ check if any old requests are still in
    3.49 +         * flight. If so then there is no need to send a notification.
    3.50 +         */
    3.51 +	mb();
    3.52 +	if (info->ring.sring->rsp_prod == old_prod)
    3.53 +		notify_remote_via_irq(info->irq);
    3.54  }
    3.55  
    3.56  static void kick_pending_request_queues(struct blkfront_info *info)
    3.57 @@ -631,6 +641,7 @@ static irqreturn_t blkif_int(int irq, vo
    3.58  		return IRQ_HANDLED;
    3.59  	}
    3.60  
    3.61 + again:
    3.62  	rp = info->ring.sring->rsp_prod;
    3.63  	rmb(); /* Ensure we see queued responses up to 'rp'. */
    3.64  
    3.65 @@ -666,6 +677,15 @@ static irqreturn_t blkif_int(int irq, vo
    3.66  
    3.67  	info->ring.rsp_cons = i;
    3.68  
    3.69 +	if (i != info->ring.req_prod_pvt) {
    3.70 +		int more_to_do;
    3.71 +		RING_FINAL_CHECK_FOR_RESPONSES(&info->ring, more_to_do);
    3.72 +		if (more_to_do)
    3.73 +			goto again;
    3.74 +	} else {
    3.75 +		info->ring.sring->rsp_event = i + 1;
    3.76 +	}
    3.77 +
    3.78  	kick_pending_request_queues(info);
    3.79  
    3.80  	spin_unlock_irqrestore(&blkif_io_lock, flags);
    3.81 @@ -751,14 +771,20 @@ static void blkif_recover(struct blkfron
    3.82  
    3.83  	kfree(copy);
    3.84  
    3.85 -	/* info->ring->req_prod will be set when we flush_requests().*/
    3.86 -	wmb();
    3.87 +	(void)xenbus_switch_state(info->xbdev, NULL, XenbusStateConnected); 
    3.88 +	
    3.89 +	/* Now safe for us to use the shared ring */
    3.90 +	spin_lock_irq(&blkif_io_lock);
    3.91 +        info->connected = BLKIF_STATE_CONNECTED;
    3.92 +	spin_unlock_irq(&blkif_io_lock);
    3.93  
    3.94 -	/* Kicks things back into life. */
    3.95 +	/* Send off requeued requests */
    3.96  	flush_requests(info);
    3.97  
    3.98 -	/* Now safe to let other people use the interface. */
    3.99 -	info->connected = BLKIF_STATE_CONNECTED;
   3.100 +	/* Kick any other new requests queued since we resumed */
   3.101 +	spin_lock_irq(&blkif_io_lock);
   3.102 +	kick_pending_request_queues(info);
   3.103 +	spin_unlock_irq(&blkif_io_lock);
   3.104  }
   3.105  
   3.106  
     4.1 --- a/linux-2.6-xen-sparse/drivers/xen/blktap/blktap.c	Thu Dec 01 11:15:31 2005 +0000
     4.2 +++ b/linux-2.6-xen-sparse/drivers/xen/blktap/blktap.c	Fri Dec 02 01:00:01 2005 +0000
     4.3 @@ -375,7 +375,7 @@ static int blktap_ioctl(struct inode *in
     4.4  static unsigned int blktap_poll(struct file *file, poll_table *wait)
     4.5  {
     4.6  	poll_wait(file, &blktap_wait, wait);
     4.7 -	if (RING_HAS_UNPUSHED_REQUESTS(&blktap_ufe_ring)) {
     4.8 +	if (blktap_ufe_ring.req_prod_pvt != blktap_ufe_ring.sring->req_prod) {
     4.9  		flush_tlb_all();
    4.10  		RING_PUSH_REQUESTS(&blktap_ufe_ring);
    4.11  		return POLLIN | POLLRDNORM;
     5.1 --- a/linux-2.6-xen-sparse/drivers/xen/console/console.c	Thu Dec 01 11:15:31 2005 +0000
     5.2 +++ b/linux-2.6-xen-sparse/drivers/xen/console/console.c	Fri Dec 02 01:00:01 2005 +0000
     5.3 @@ -579,7 +579,7 @@ static struct tty_operations xencons_ops
     5.4  	.wait_until_sent = xencons_wait_until_sent,
     5.5  };
     5.6  
     5.7 -#ifdef CONFIG_XEN_PRIVILEGED_GUEST
     5.8 +#ifdef CONFIG_XEN_PHYSDEV_ACCESS
     5.9  static const char *xennullcon_startup(void)
    5.10  {
    5.11  	return NULL;
     6.1 --- a/linux-2.6-xen-sparse/drivers/xen/evtchn/evtchn.c	Thu Dec 01 11:15:31 2005 +0000
     6.2 +++ b/linux-2.6-xen-sparse/drivers/xen/evtchn/evtchn.c	Fri Dec 02 01:00:01 2005 +0000
     6.3 @@ -50,9 +50,9 @@
     6.4  
     6.5  struct per_user_data {
     6.6  	/* Notification ring, accessed via /dev/xen/evtchn. */
     6.7 -#define EVTCHN_RING_SIZE     2048  /* 2048 16-bit entries */
     6.8 +#define EVTCHN_RING_SIZE     (PAGE_SIZE / sizeof(evtchn_port_t))
     6.9  #define EVTCHN_RING_MASK(_i) ((_i)&(EVTCHN_RING_SIZE-1))
    6.10 -	u16 *ring;
    6.11 +	evtchn_port_t *ring;
    6.12  	unsigned int ring_cons, ring_prod, ring_overflow;
    6.13  
    6.14  	/* Processes wait on this queue when ring is empty. */
    6.15 @@ -75,7 +75,7 @@ void evtchn_device_upcall(int port)
    6.16  
    6.17  	if ((u = port_user[port]) != NULL) {
    6.18  		if ((u->ring_prod - u->ring_cons) < EVTCHN_RING_SIZE) {
    6.19 -			u->ring[EVTCHN_RING_MASK(u->ring_prod)] = (u16)port;
    6.20 +			u->ring[EVTCHN_RING_MASK(u->ring_prod)] = port;
    6.21  			if (u->ring_cons == u->ring_prod++) {
    6.22  				wake_up_interruptible(&u->evtchn_wait);
    6.23  				kill_fasync(&u->evtchn_async_queue,
    6.24 @@ -94,52 +94,40 @@ static ssize_t evtchn_read(struct file *
    6.25  {
    6.26  	int rc;
    6.27  	unsigned int c, p, bytes1 = 0, bytes2 = 0;
    6.28 -	DECLARE_WAITQUEUE(wait, current);
    6.29  	struct per_user_data *u = file->private_data;
    6.30  
    6.31 -	add_wait_queue(&u->evtchn_wait, &wait);
    6.32 -
    6.33 -	count &= ~1; /* even number of bytes */
    6.34 +	/* Whole number of ports. */
    6.35 +	count &= ~(sizeof(evtchn_port_t)-1);
    6.36  
    6.37 -	if (count == 0) {
    6.38 -		rc = 0;
    6.39 -		goto out;
    6.40 -	}
    6.41 +	if (count == 0)
    6.42 +		return 0;
    6.43  
    6.44  	if (count > PAGE_SIZE)
    6.45  		count = PAGE_SIZE;
    6.46  
    6.47  	for (;;) {
    6.48 -		set_current_state(TASK_INTERRUPTIBLE);
    6.49 +		if (u->ring_overflow)
    6.50 +			return -EFBIG;
    6.51  
    6.52  		if ((c = u->ring_cons) != (p = u->ring_prod))
    6.53  			break;
    6.54  
    6.55 -		if (u->ring_overflow) {
    6.56 -			rc = -EFBIG;
    6.57 -			goto out;
    6.58 -		}
    6.59 +		if (file->f_flags & O_NONBLOCK)
    6.60 +			return -EAGAIN;
    6.61  
    6.62 -		if (file->f_flags & O_NONBLOCK) {
    6.63 -			rc = -EAGAIN;
    6.64 -			goto out;
    6.65 -		}
    6.66 -
    6.67 -		if (signal_pending(current)) {
    6.68 -			rc = -ERESTARTSYS;
    6.69 -			goto out;
    6.70 -		}
    6.71 -
    6.72 -		schedule();
    6.73 +		rc = wait_event_interruptible(
    6.74 +			u->evtchn_wait, u->ring_cons != u->ring_prod);
    6.75 +		if (rc)
    6.76 +			return rc;
    6.77  	}
    6.78  
    6.79  	/* Byte lengths of two chunks. Chunk split (if any) is at ring wrap. */
    6.80  	if (((c ^ p) & EVTCHN_RING_SIZE) != 0) {
    6.81  		bytes1 = (EVTCHN_RING_SIZE - EVTCHN_RING_MASK(c)) *
    6.82 -			sizeof(u16);
    6.83 -		bytes2 = EVTCHN_RING_MASK(p) * sizeof(u16);
    6.84 +			sizeof(evtchn_port_t);
    6.85 +		bytes2 = EVTCHN_RING_MASK(p) * sizeof(evtchn_port_t);
    6.86  	} else {
    6.87 -		bytes1 = (p - c) * sizeof(u16);
    6.88 +		bytes1 = (p - c) * sizeof(evtchn_port_t);
    6.89  		bytes2 = 0;
    6.90  	}
    6.91  
    6.92 @@ -153,32 +141,26 @@ static ssize_t evtchn_read(struct file *
    6.93  
    6.94  	if (copy_to_user(buf, &u->ring[EVTCHN_RING_MASK(c)], bytes1) ||
    6.95  	    ((bytes2 != 0) &&
    6.96 -	     copy_to_user(&buf[bytes1], &u->ring[0], bytes2))) {
    6.97 -		rc = -EFAULT;
    6.98 -		goto out;
    6.99 -	}
   6.100 -
   6.101 -	u->ring_cons += (bytes1 + bytes2) / sizeof(u16);
   6.102 +	     copy_to_user(&buf[bytes1], &u->ring[0], bytes2)))
   6.103 +		return -EFAULT;
   6.104  
   6.105 -	rc = bytes1 + bytes2;
   6.106 +	u->ring_cons += (bytes1 + bytes2) / sizeof(evtchn_port_t);
   6.107  
   6.108 - out:
   6.109 -	__set_current_state(TASK_RUNNING);
   6.110 -	remove_wait_queue(&u->evtchn_wait, &wait);
   6.111 -	return rc;
   6.112 +	return bytes1 + bytes2;
   6.113  }
   6.114  
   6.115  static ssize_t evtchn_write(struct file *file, const char __user *buf,
   6.116                              size_t count, loff_t *ppos)
   6.117  {
   6.118  	int  rc, i;
   6.119 -	u16 *kbuf = (u16 *)__get_free_page(GFP_KERNEL);
   6.120 +	evtchn_port_t *kbuf = (evtchn_port_t *)__get_free_page(GFP_KERNEL);
   6.121  	struct per_user_data *u = file->private_data;
   6.122  
   6.123  	if (kbuf == NULL)
   6.124  		return -ENOMEM;
   6.125  
   6.126 -	count &= ~1; /* even number of bytes */
   6.127 +	/* Whole number of ports. */
   6.128 +	count &= ~(sizeof(evtchn_port_t)-1);
   6.129  
   6.130  	if (count == 0) {
   6.131  		rc = 0;
   6.132 @@ -194,7 +176,7 @@ static ssize_t evtchn_write(struct file 
   6.133  	}
   6.134  
   6.135  	spin_lock_irq(&port_user_lock);
   6.136 -	for (i = 0; i < (count/2); i++)
   6.137 +	for (i = 0; i < (count/sizeof(evtchn_port_t)); i++)
   6.138  		if ((kbuf[i] < NR_EVENT_CHANNELS) && (port_user[kbuf[i]] == u))
   6.139  			unmask_evtchn(kbuf[i]);
   6.140  	spin_unlock_irq(&port_user_lock);
   6.141 @@ -379,8 +361,8 @@ static int evtchn_open(struct inode *ino
   6.142  	memset(u, 0, sizeof(*u));
   6.143  	init_waitqueue_head(&u->evtchn_wait);
   6.144  
   6.145 -	if ((u->ring = (u16 *)__get_free_page(GFP_KERNEL)) == NULL)
   6.146 -	{
   6.147 +	u->ring = (evtchn_port_t *)__get_free_page(GFP_KERNEL);
   6.148 +	if (u->ring == NULL) {
   6.149  		kfree(u);
   6.150  		return -ENOMEM;
   6.151  	}
   6.152 @@ -400,8 +382,7 @@ static int evtchn_release(struct inode *
   6.153  
   6.154  	free_page((unsigned long)u->ring);
   6.155  
   6.156 -	for (i = 0; i < NR_EVENT_CHANNELS; i++)
   6.157 -	{
   6.158 +	for (i = 0; i < NR_EVENT_CHANNELS; i++) {
   6.159  		int ret;
   6.160  		if (port_user[i] != u)
   6.161  			continue;
   6.162 @@ -447,10 +428,9 @@ static int __init evtchn_init(void)
   6.163  	spin_lock_init(&port_user_lock);
   6.164  	memset(port_user, 0, sizeof(port_user));
   6.165  
   6.166 -	/* (DEVFS) create '/dev/misc/evtchn'. */
   6.167 +	/* Create '/dev/misc/evtchn'. */
   6.168  	err = misc_register(&evtchn_miscdev);
   6.169 -	if (err != 0)
   6.170 -	{
   6.171 +	if (err != 0) {
   6.172  		printk(KERN_ALERT "Could not register /dev/misc/evtchn\n");
   6.173  		return err;
   6.174  	}
     7.1 --- a/linux-2.6-xen-sparse/drivers/xen/netback/netback.c	Thu Dec 01 11:15:31 2005 +0000
     7.2 +++ b/linux-2.6-xen-sparse/drivers/xen/netback/netback.c	Fri Dec 02 01:00:01 2005 +0000
     7.3 @@ -44,9 +44,6 @@ static mmu_update_t rx_mmu[NET_RX_RING_S
     7.4  static gnttab_transfer_t grant_rx_op[MAX_PENDING_REQS];
     7.5  static unsigned char rx_notify[NR_IRQS];
     7.6  
     7.7 -/* Don't currently gate addition of an interface to the tx scheduling list. */
     7.8 -#define tx_work_exists(_if) (1)
     7.9 -
    7.10  static unsigned long mmap_vstart;
    7.11  #define MMAP_VADDR(_req) (mmap_vstart + ((_req) * PAGE_SIZE))
    7.12  
    7.13 @@ -377,25 +374,22 @@ static void add_to_net_schedule_list_tai
    7.14   * aggressive in avoiding new-packet notifications -- frontend only needs to
    7.15   * send a notification if there are no outstanding unreceived responses.
    7.16   * If we may be buffer transmit buffers for any reason then we must be rather
    7.17 - * more conservative and advertise that we are 'sleeping' this connection here.
    7.18 + * more conservative and treat this as the final check for pending work.
    7.19   */
    7.20  void netif_schedule_work(netif_t *netif)
    7.21  {
    7.22 -	if (RING_HAS_UNCONSUMED_REQUESTS(&netif->tx)) {
    7.23 +	int more_to_do;
    7.24 +
    7.25 +#ifdef CONFIG_XEN_NETDEV_PIPELINED_TRANSMITTER
    7.26 +	more_to_do = RING_HAS_UNCONSUMED_REQUESTS(&netif->tx);
    7.27 +#else
    7.28 +	RING_FINAL_CHECK_FOR_REQUESTS(&netif->tx, more_to_do);
    7.29 +#endif
    7.30 +
    7.31 +	if (more_to_do) {
    7.32  		add_to_net_schedule_list_tail(netif);
    7.33  		maybe_schedule_tx_action();
    7.34  	}
    7.35 -#ifndef CONFIG_XEN_NETDEV_PIPELINED_TRANSMITTER
    7.36 -	else {
    7.37 -		netif->tx.sring->server_is_sleeping = 1;
    7.38 -		mb();
    7.39 -		if (RING_HAS_UNCONSUMED_REQUESTS(&netif->tx)) {
    7.40 -			netif->tx.sring->server_is_sleeping = 0;
    7.41 -			add_to_net_schedule_list_tail(netif);
    7.42 -			maybe_schedule_tx_action();
    7.43 -		}
    7.44 -	}
    7.45 -#endif
    7.46  }
    7.47  
    7.48  void netif_deschedule_work(netif_t *netif)
    7.49 @@ -447,26 +441,6 @@ inline static void net_tx_action_dealloc
    7.50          
    7.51  		pending_ring[MASK_PEND_IDX(pending_prod++)] = pending_idx;
    7.52  
    7.53 -		/*
    7.54 -		 * Scheduling checks must happen after the above response is
    7.55 -		 * posted. This avoids a possible race with a guest OS on
    7.56 -		 * another CPU if that guest is testing against 'resp_prod'
    7.57 -		 * when deciding whether to notify us when it queues additional
    7.58 -                 * packets.
    7.59 -		 */
    7.60 -		mb();
    7.61 -
    7.62 -		if (RING_HAS_UNCONSUMED_REQUESTS(&netif->tx)) {
    7.63 -			add_to_net_schedule_list_tail(netif);
    7.64 -		} else {
    7.65 -			netif->tx.sring->server_is_sleeping = 1;
    7.66 -			mb();
    7.67 -			if (RING_HAS_UNCONSUMED_REQUESTS(&netif->tx)) {
    7.68 -				netif->tx.sring->server_is_sleeping = 0;
    7.69 -				add_to_net_schedule_list_tail(netif);
    7.70 -			}
    7.71 -		}
    7.72 -
    7.73  		netif_put(netif);
    7.74  	}
    7.75  }
    7.76 @@ -482,7 +456,7 @@ static void net_tx_action(unsigned long 
    7.77  	RING_IDX i;
    7.78  	gnttab_map_grant_ref_t *mop;
    7.79  	unsigned int data_len;
    7.80 -	int ret;
    7.81 +	int ret, work_to_do;
    7.82  
    7.83  	if (dealloc_cons != dealloc_prod)
    7.84  		net_tx_action_dealloc();
    7.85 @@ -496,8 +470,8 @@ static void net_tx_action(unsigned long 
    7.86  		netif_get(netif);
    7.87  		remove_from_net_schedule_list(netif);
    7.88  
    7.89 -		/* Work to do? */
    7.90 -		if (!RING_HAS_UNCONSUMED_REQUESTS(&netif->tx)) {
    7.91 +		RING_FINAL_CHECK_FOR_REQUESTS(&netif->tx, work_to_do);
    7.92 +		if (!work_to_do) {
    7.93  			netif_put(netif);
    7.94  			continue;
    7.95  		}
    7.96 @@ -695,10 +669,8 @@ static void netif_page_release(struct pa
    7.97  irqreturn_t netif_be_int(int irq, void *dev_id, struct pt_regs *regs)
    7.98  {
    7.99  	netif_t *netif = dev_id;
   7.100 -	if (tx_work_exists(netif)) {
   7.101 -		add_to_net_schedule_list_tail(netif);
   7.102 -		maybe_schedule_tx_action();
   7.103 -	}
   7.104 +	add_to_net_schedule_list_tail(netif);
   7.105 +	maybe_schedule_tx_action();
   7.106  	return IRQ_HANDLED;
   7.107  }
   7.108  
   7.109 @@ -708,17 +680,25 @@ static void make_tx_response(netif_t *ne
   7.110  {
   7.111  	RING_IDX i = netif->tx.rsp_prod_pvt;
   7.112  	netif_tx_response_t *resp;
   7.113 +	int notify;
   7.114  
   7.115  	resp = RING_GET_RESPONSE(&netif->tx, i);
   7.116  	resp->id     = id;
   7.117  	resp->status = st;
   7.118 -	wmb();
   7.119 +
   7.120  	netif->tx.rsp_prod_pvt = ++i;
   7.121 -	RING_PUSH_RESPONSES(&netif->tx);
   7.122 +	RING_PUSH_RESPONSES_AND_CHECK_NOTIFY(&netif->tx, notify);
   7.123 +	if (notify)
   7.124 +		notify_remote_via_irq(netif->irq);
   7.125  
   7.126 -	mb(); /* Update producer before checking event threshold. */
   7.127 -	if (i == netif->tx.sring->rsp_event)
   7.128 -		notify_remote_via_irq(netif->irq);
   7.129 +#ifdef CONFIG_XEN_NETDEV_PIPELINED_TRANSMITTER
   7.130 +	if (i == netif->tx.req_cons) {
   7.131 +		int more_to_do;
   7.132 +		RING_FINAL_CHECK_FOR_REQUESTS(&netif->tx, more_to_do);
   7.133 +		if (more_to_do)
   7.134 +			add_to_net_schedule_list_tail(netif);
   7.135 +	}
   7.136 +#endif
   7.137  }
   7.138  
   7.139  static int make_rx_response(netif_t *netif, 
   7.140 @@ -730,6 +710,7 @@ static int make_rx_response(netif_t *net
   7.141  {
   7.142  	RING_IDX i = netif->rx.rsp_prod_pvt;
   7.143  	netif_rx_response_t *resp;
   7.144 +	int notify;
   7.145  
   7.146  	resp = RING_GET_RESPONSE(&netif->rx, i);
   7.147  	resp->offset     = offset;
   7.148 @@ -738,12 +719,11 @@ static int make_rx_response(netif_t *net
   7.149  	resp->status     = (s16)size;
   7.150  	if (st < 0)
   7.151  		resp->status = (s16)st;
   7.152 -	wmb();
   7.153 +
   7.154  	netif->rx.rsp_prod_pvt = ++i;
   7.155 -	RING_PUSH_RESPONSES(&netif->rx);
   7.156 +	RING_PUSH_RESPONSES_AND_CHECK_NOTIFY(&netif->rx, notify);
   7.157  
   7.158 -	mb(); /* Update producer before checking event threshold. */
   7.159 -	return (i == netif->rx.sring->rsp_event);
   7.160 +	return notify;
   7.161  }
   7.162  
   7.163  static irqreturn_t netif_be_dbg(int irq, void *dev_id, struct pt_regs *regs)
     8.1 --- a/linux-2.6-xen-sparse/drivers/xen/netfront/netfront.c	Thu Dec 01 11:15:31 2005 +0000
     8.2 +++ b/linux-2.6-xen-sparse/drivers/xen/netfront/netfront.c	Fri Dec 02 01:00:01 2005 +0000
     8.3 @@ -616,6 +616,7 @@ static int network_start_xmit(struct sk_
     8.4  	RING_IDX i;
     8.5  	grant_ref_t ref;
     8.6  	unsigned long mfn;
     8.7 +	int notify;
     8.8  
     8.9  	if (unlikely(np->tx_full)) {
    8.10  		printk(KERN_ALERT "%s: full queue wasn't stopped!\n",
    8.11 @@ -661,9 +662,10 @@ static int network_start_xmit(struct sk_
    8.12  	tx->size = skb->len;
    8.13  	tx->csum_blank = (skb->ip_summed == CHECKSUM_HW);
    8.14  
    8.15 -	wmb(); /* Ensure that backend will see the request. */
    8.16  	np->tx.req_prod_pvt = i + 1;
    8.17 -	RING_PUSH_REQUESTS(&np->tx);
    8.18 +	RING_PUSH_REQUESTS_AND_CHECK_NOTIFY(&np->tx, notify);
    8.19 +	if (notify)
    8.20 +		notify_remote_via_irq(np->irq);
    8.21  
    8.22  	network_tx_buf_gc(dev);
    8.23  
    8.24 @@ -677,13 +679,6 @@ static int network_start_xmit(struct sk_
    8.25  	np->stats.tx_bytes += skb->len;
    8.26  	np->stats.tx_packets++;
    8.27  
    8.28 -	/* Only notify Xen if we really have to. */
    8.29 -	mb();
    8.30 -	if (np->tx.sring->server_is_sleeping) {
    8.31 -		np->tx.sring->server_is_sleeping = 0;
    8.32 -		notify_remote_via_irq(np->irq);
    8.33 -	}
    8.34 -
    8.35  	return 0;
    8.36  
    8.37   drop:
    8.38 @@ -761,7 +756,8 @@ static int netif_poll(struct net_device 
    8.39  					rx->id, rx->status);
    8.40  			RING_GET_REQUEST(&np->rx, np->rx.req_prod_pvt)->id =
    8.41  				rx->id;
    8.42 -			wmb();
    8.43 +			RING_GET_REQUEST(&np->rx, np->rx.req_prod_pvt)->gref =
    8.44 +				ref;
    8.45  			np->rx.req_prod_pvt++;
    8.46  			RING_PUSH_REQUESTS(&np->rx);
    8.47  			work_done--;
    8.48 @@ -882,14 +878,9 @@ static int netif_poll(struct net_device 
    8.49  	if (work_done < budget) {
    8.50  		local_irq_save(flags);
    8.51  
    8.52 -		np->rx.sring->rsp_event = i + 1;
    8.53 -    
    8.54 -		/* Deal with hypervisor racing our resetting of rx_event. */
    8.55 -		mb();
    8.56 -		if (np->rx.sring->rsp_prod == i) {
    8.57 +		RING_FINAL_CHECK_FOR_RESPONSES(&np->rx, more_to_do);
    8.58 +		if (!more_to_do)
    8.59  			__netif_rx_complete(dev);
    8.60 -			more_to_do = 0;
    8.61 -		}
    8.62  
    8.63  		local_irq_restore(flags);
    8.64  	}
    8.65 @@ -930,7 +921,6 @@ static void network_connect(struct net_d
    8.66  
    8.67  	/* Step 1: Reinitialise variables. */
    8.68  	np->tx_full = 0;
    8.69 -	np->rx.sring->rsp_event = np->tx.sring->rsp_event = 1;
    8.70  
    8.71  	/*
    8.72  	 * Step 2: Rebuild the RX and TX ring contents.
    8.73 @@ -972,7 +962,7 @@ static void network_connect(struct net_d
    8.74  		np->stats.tx_bytes += skb->len;
    8.75  		np->stats.tx_packets++;
    8.76  	}
    8.77 -	wmb();
    8.78 +
    8.79  	np->tx.req_prod_pvt = requeue_idx;
    8.80  	RING_PUSH_REQUESTS(&np->tx);
    8.81  
    8.82 @@ -987,7 +977,7 @@ static void network_connect(struct net_d
    8.83  		RING_GET_REQUEST(&np->rx, requeue_idx)->id = i;
    8.84  		requeue_idx++; 
    8.85  	}
    8.86 -	wmb();                
    8.87 +
    8.88  	np->rx.req_prod_pvt = requeue_idx;
    8.89  	RING_PUSH_REQUESTS(&np->rx);
    8.90  
    8.91 @@ -998,7 +988,6 @@ static void network_connect(struct net_d
    8.92  	 * packets.
    8.93  	 */
    8.94  	np->backend_state = BEST_CONNECTED;
    8.95 -	wmb();
    8.96  	notify_remote_via_irq(np->irq);
    8.97  	network_tx_buf_gc(dev);
    8.98  
     9.1 --- a/tools/console/daemon/io.c	Thu Dec 01 11:15:31 2005 +0000
     9.2 +++ b/tools/console/daemon/io.c	Fri Dec 02 01:00:01 2005 +0000
     9.3 @@ -62,7 +62,7 @@ struct domain
     9.4  	struct domain *next;
     9.5  	char *conspath;
     9.6  	int ring_ref;
     9.7 -	int local_port;
     9.8 +	evtchn_port_t local_port;
     9.9  	int evtchn_fd;
    9.10  	struct xencons_interface *interface;
    9.11  };
    9.12 @@ -488,7 +488,7 @@ static void handle_tty_write(struct doma
    9.13  
    9.14  static void handle_ring_read(struct domain *dom)
    9.15  {
    9.16 -	uint16_t v;
    9.17 +	evtchn_port_t v;
    9.18  
    9.19  	if (!read_sync(dom->evtchn_fd, &v, sizeof(v)))
    9.20  		return;
    10.1 --- a/tools/ioemu/target-i386-dm/helper2.c	Thu Dec 01 11:15:31 2005 +0000
    10.2 +++ b/tools/ioemu/target-i386-dm/helper2.c	Fri Dec 02 01:00:01 2005 +0000
    10.3 @@ -125,7 +125,7 @@ int evtchn_fd = -1;
    10.4  
    10.5  //the evtchn port for polling the notification,
    10.6  //should be inputed as bochs's parameter
    10.7 -uint16_t ioreq_remote_port, ioreq_local_port;
    10.8 +evtchn_port_t ioreq_remote_port, ioreq_local_port;
    10.9  
   10.10  //some functions to handle the io req packet
   10.11  void sp_info()
   10.12 @@ -170,12 +170,12 @@ ioreq_t* __cpu_get_ioreq(void)
   10.13  ioreq_t* cpu_get_ioreq(void)
   10.14  {
   10.15      int rc;
   10.16 -    uint16_t port;
   10.17 +    evtchn_port_t port;
   10.18  
   10.19      rc = read(evtchn_fd, &port, sizeof(port));
   10.20      if ((rc == sizeof(port)) && (port == ioreq_local_port)) {
   10.21          // unmask the wanted port again
   10.22 -        write(evtchn_fd, &ioreq_local_port, 2);
   10.23 +        write(evtchn_fd, &ioreq_local_port, sizeof(port));
   10.24  
   10.25          //get the io packet from shared memory
   10.26          return __cpu_get_ioreq();
    11.1 --- a/tools/ioemu/vl.c	Thu Dec 01 11:15:31 2005 +0000
    11.2 +++ b/tools/ioemu/vl.c	Fri Dec 02 01:00:01 2005 +0000
    11.3 @@ -2907,7 +2907,7 @@ int main(int argc, char **argv)
    11.4                  break;
    11.5              case QEMU_OPTION_p:
    11.6                  {
    11.7 -                    extern uint16_t ioreq_remote_port;
    11.8 +                    extern evtchn_port_t ioreq_remote_port;
    11.9                      ioreq_remote_port = atoi(optarg);
   11.10                      fprintf(logfile, "eport: %d\n", ioreq_remote_port);
   11.11                  }
    12.1 --- a/tools/libxc/xc_evtchn.c	Thu Dec 01 11:15:31 2005 +0000
    12.2 +++ b/tools/libxc/xc_evtchn.c	Fri Dec 02 01:00:01 2005 +0000
    12.3 @@ -51,7 +51,7 @@ int xc_evtchn_alloc_unbound(int xc_handl
    12.4  
    12.5  int xc_evtchn_status(int xc_handle,
    12.6                       uint32_t dom,
    12.7 -                     int port,
    12.8 +                     evtchn_port_t port,
    12.9                       xc_evtchn_status_t *status)
   12.10  {
   12.11      int         rc;
    13.1 --- a/tools/libxc/xc_linux_save.c	Thu Dec 01 11:15:31 2005 +0000
    13.2 +++ b/tools/libxc/xc_linux_save.c	Fri Dec 02 01:00:01 2005 +0000
    13.3 @@ -677,7 +677,7 @@ int xc_linux_save(int xc_handle, int io_
    13.4                               live_shinfo->arch.pfn_to_mfn_frame_list_list);
    13.5  
    13.6      if (!live_p2m_frame_list_list) {
    13.7 -        ERR("Couldn't map p2m_frame_list_list");
    13.8 +        ERR("Couldn't map p2m_frame_list_list (errno %d)", errno);
    13.9          goto out;
   13.10      }
   13.11  
    14.1 --- a/tools/libxc/xenctrl.h	Thu Dec 01 11:15:31 2005 +0000
    14.2 +++ b/tools/libxc/xenctrl.h	Fri Dec 02 01:00:01 2005 +0000
    14.3 @@ -334,7 +334,7 @@ int xc_evtchn_alloc_unbound(int xc_handl
    14.4  
    14.5  int xc_evtchn_status(int xc_handle,
    14.6                       uint32_t dom, /* may be DOMID_SELF */
    14.7 -                     int port,
    14.8 +                     evtchn_port_t port,
    14.9                       xc_evtchn_status_t *status);
   14.10  
   14.11  int xc_physdev_pci_access_modify(int xc_handle,
    15.1 --- a/tools/python/xen/xend/XendCheckpoint.py	Thu Dec 01 11:15:31 2005 +0000
    15.2 +++ b/tools/python/xen/xend/XendCheckpoint.py	Fri Dec 02 01:00:01 2005 +0000
    15.3 @@ -40,11 +40,19 @@ def write_exact(fd, buf, errmsg):
    15.4          raise XendError(errmsg)
    15.5  
    15.6  def read_exact(fd, size, errmsg):
    15.7 -    buf = os.read(fd, size)
    15.8 -    if len(buf) != size:
    15.9 -        raise XendError(errmsg)
   15.10 +    buf  = '' 
   15.11 +    while size != 0: 
   15.12 +        str = os.read(fd, size)
   15.13 +        if not len(str):
   15.14 +            log.error("read_exact: EOF trying to read %d (buf='%s')" % \
   15.15 +                      (size, buf))
   15.16 +            raise XendError(errmsg)
   15.17 +        size = size - len(str)
   15.18 +        buf  = buf + str
   15.19      return buf
   15.20  
   15.21 +
   15.22 +
   15.23  def save(fd, dominfo, live):
   15.24      write_exact(fd, SIGNATURE, "could not write guest state file: signature")
   15.25  
    16.1 --- a/tools/python/xen/xend/XendDomain.py	Thu Dec 01 11:15:31 2005 +0000
    16.2 +++ b/tools/python/xen/xend/XendDomain.py	Fri Dec 02 01:00:01 2005 +0000
    16.3 @@ -397,6 +397,7 @@ class XendDomain:
    16.4              raise XendError("can't connect: %s" % err[1])
    16.5  
    16.6          sock.send("receive\n")
    16.7 +        sock.recv(80) 
    16.8          XendCheckpoint.save(sock.fileno(), dominfo, live)
    16.9  
   16.10  
    17.1 --- a/tools/xenstore/fake_libxc.c	Thu Dec 01 11:15:31 2005 +0000
    17.2 +++ b/tools/xenstore/fake_libxc.c	Fri Dec 02 01:00:01 2005 +0000
    17.3 @@ -34,7 +34,7 @@
    17.4  
    17.5  static int sigfd;
    17.6  static int xs_test_pid;
    17.7 -static uint16_t port;
    17.8 +static evtchn_port_t port;
    17.9  
   17.10  /* The event channel maps to a signal, shared page to an mmapped file. */
   17.11  void evtchn_notify(int local_port)
    18.1 --- a/tools/xenstore/xenstored_domain.c	Thu Dec 01 11:15:31 2005 +0000
    18.2 +++ b/tools/xenstore/xenstored_domain.c	Fri Dec 02 01:00:01 2005 +0000
    18.3 @@ -41,7 +41,7 @@
    18.4  #include <xen/linux/evtchn.h>
    18.5  
    18.6  static int *xc_handle;
    18.7 -static int virq_port;
    18.8 +static evtchn_port_t virq_port;
    18.9  
   18.10  int eventchn_fd = -1; 
   18.11  
   18.12 @@ -53,11 +53,11 @@ struct domain
   18.13  	unsigned int domid;
   18.14  
   18.15  	/* Event channel port */
   18.16 -	uint16_t port;
   18.17 +	evtchn_port_t port;
   18.18  
   18.19  	/* The remote end of the event channel, used only to validate
   18.20  	   repeated domain introductions. */
   18.21 -	uint16_t remote_port;
   18.22 +	evtchn_port_t remote_port;
   18.23  
   18.24  	/* The mfn associated with the event channel, used only to validate
   18.25  	   repeated domain introductions. */
   18.26 @@ -224,7 +224,7 @@ static void domain_cleanup(void)
   18.27  /* We scan all domains rather than use the information given here. */
   18.28  void handle_event(void)
   18.29  {
   18.30 -	uint16_t port;
   18.31 +	evtchn_port_t port;
   18.32  
   18.33  	if (read(eventchn_fd, &port, sizeof(port)) != sizeof(port))
   18.34  		barf_perror("Failed to read from event fd");
   18.35 @@ -314,7 +314,7 @@ void do_introduce(struct connection *con
   18.36  	char *vec[3];
   18.37  	unsigned int domid;
   18.38  	unsigned long mfn;
   18.39 -	uint16_t port;
   18.40 +	evtchn_port_t port;
   18.41  
   18.42  	if (get_strings(in, vec, ARRAY_SIZE(vec)) < ARRAY_SIZE(vec)) {
   18.43  		send_error(conn, EINVAL);
   18.44 @@ -460,7 +460,8 @@ void restore_existing_connections(void)
   18.45  
   18.46  static int dom0_init(void) 
   18.47  { 
   18.48 -        int rc, fd, port; 
   18.49 +        int rc, fd;
   18.50 +	evtchn_port_t port; 
   18.51          unsigned long mfn; 
   18.52          char str[20]; 
   18.53          struct domain *dom0; 
    19.1 --- a/xen/common/sched_bvt.c	Thu Dec 01 11:15:31 2005 +0000
    19.2 +++ b/xen/common/sched_bvt.c	Fri Dec 02 01:00:01 2005 +0000
    19.3 @@ -67,6 +67,7 @@ struct bvt_cpu_info
    19.4  #define MCU            (s32)MICROSECS(100)    /* Minimum unit */
    19.5  #define MCU_ADVANCE    10                     /* default weight */
    19.6  #define TIME_SLOP      (s32)MICROSECS(50)     /* allow time to slip a bit */
    19.7 +#define CTX_MIN        (s32)MICROSECS(10)     /* Low limit for ctx_allow */
    19.8  static s32 ctx_allow = (s32)MILLISECS(5);     /* context switch allowance */
    19.9  
   19.10  static inline void __add_to_runqueue_head(struct vcpu *d)
   19.11 @@ -297,7 +298,11 @@ static int bvt_ctl(struct sched_ctl_cmd 
   19.12      if ( cmd->direction == SCHED_INFO_PUT )
   19.13          ctx_allow = params->ctx_allow;
   19.14      else
   19.15 +    {
   19.16 +        if ( ctx_allow < CTX_MIN )
   19.17 +            ctx_allow = CTX_MIN;
   19.18          params->ctx_allow = ctx_allow;
   19.19 +    }
   19.20      
   19.21      return 0;
   19.22  }
    20.1 --- a/xen/include/public/event_channel.h	Thu Dec 01 11:15:31 2005 +0000
    20.2 +++ b/xen/include/public/event_channel.h	Fri Dec 02 01:00:01 2005 +0000
    20.3 @@ -9,6 +9,8 @@
    20.4  #ifndef __XEN_PUBLIC_EVENT_CHANNEL_H__
    20.5  #define __XEN_PUBLIC_EVENT_CHANNEL_H__
    20.6  
    20.7 +typedef uint32_t evtchn_port_t;
    20.8 +
    20.9  /*
   20.10   * EVTCHNOP_alloc_unbound: Allocate a port in domain <dom> and mark as
   20.11   * accepting interdomain bindings from domain <remote_dom>. A fresh port
   20.12 @@ -20,9 +22,9 @@
   20.13  #define EVTCHNOP_alloc_unbound    6
   20.14  typedef struct evtchn_alloc_unbound {
   20.15      /* IN parameters */
   20.16 -    domid_t  dom, remote_dom;
   20.17 +    domid_t dom, remote_dom;
   20.18      /* OUT parameters */
   20.19 -    uint32_t port;
   20.20 +    evtchn_port_t port;
   20.21  } evtchn_alloc_unbound_t;
   20.22  
   20.23  /*
   20.24 @@ -37,10 +39,10 @@ typedef struct evtchn_alloc_unbound {
   20.25  #define EVTCHNOP_bind_interdomain 0
   20.26  typedef struct evtchn_bind_interdomain {
   20.27      /* IN parameters. */
   20.28 -    domid_t  remote_dom;
   20.29 -    uint32_t remote_port;
   20.30 +    domid_t remote_dom;
   20.31 +    evtchn_port_t remote_port;
   20.32      /* OUT parameters. */
   20.33 -    uint32_t local_port;
   20.34 +    evtchn_port_t local_port;
   20.35  } evtchn_bind_interdomain_t;
   20.36  
   20.37  /*
   20.38 @@ -57,7 +59,7 @@ typedef struct evtchn_bind_virq {
   20.39      uint32_t virq;
   20.40      uint32_t vcpu;
   20.41      /* OUT parameters. */
   20.42 -    uint32_t port;
   20.43 +    evtchn_port_t port;
   20.44  } evtchn_bind_virq_t;
   20.45  
   20.46  /*
   20.47 @@ -73,7 +75,7 @@ typedef struct evtchn_bind_pirq {
   20.48  #define BIND_PIRQ__WILL_SHARE 1
   20.49      uint32_t flags; /* BIND_PIRQ__* */
   20.50      /* OUT parameters. */
   20.51 -    uint32_t port;
   20.52 +    evtchn_port_t port;
   20.53  } evtchn_bind_pirq_t;
   20.54  
   20.55  /*
   20.56 @@ -86,7 +88,7 @@ typedef struct evtchn_bind_pirq {
   20.57  typedef struct evtchn_bind_ipi {
   20.58      uint32_t vcpu;
   20.59      /* OUT parameters. */
   20.60 -    uint32_t port;
   20.61 +    evtchn_port_t port;
   20.62  } evtchn_bind_ipi_t;
   20.63  
   20.64  /*
   20.65 @@ -97,7 +99,7 @@ typedef struct evtchn_bind_ipi {
   20.66  #define EVTCHNOP_close            3
   20.67  typedef struct evtchn_close {
   20.68      /* IN parameters. */
   20.69 -    uint32_t port;
   20.70 +    evtchn_port_t port;
   20.71  } evtchn_close_t;
   20.72  
   20.73  /*
   20.74 @@ -107,7 +109,7 @@ typedef struct evtchn_close {
   20.75  #define EVTCHNOP_send             4
   20.76  typedef struct evtchn_send {
   20.77      /* IN parameters. */
   20.78 -    uint32_t port;
   20.79 +    evtchn_port_t port;
   20.80  } evtchn_send_t;
   20.81  
   20.82  /*
   20.83 @@ -122,7 +124,7 @@ typedef struct evtchn_send {
   20.84  typedef struct evtchn_status {
   20.85      /* IN parameters */
   20.86      domid_t  dom;
   20.87 -    uint32_t port;
   20.88 +    evtchn_port_t port;
   20.89      /* OUT parameters */
   20.90  #define EVTCHNSTAT_closed       0  /* Channel is not in use.                 */
   20.91  #define EVTCHNSTAT_unbound      1  /* Channel is waiting interdom connection.*/
   20.92 @@ -134,11 +136,11 @@ typedef struct evtchn_status {
   20.93      uint32_t vcpu;                 /* VCPU to which this channel is bound.   */
   20.94      union {
   20.95          struct {
   20.96 -            domid_t  dom;
   20.97 +            domid_t dom;
   20.98          } unbound; /* EVTCHNSTAT_unbound */
   20.99          struct {
  20.100 -            domid_t  dom;
  20.101 -            uint32_t port;
  20.102 +            domid_t dom;
  20.103 +            evtchn_port_t port;
  20.104          } interdomain; /* EVTCHNSTAT_interdomain */
  20.105          uint32_t pirq;      /* EVTCHNSTAT_pirq        */
  20.106          uint32_t virq;      /* EVTCHNSTAT_virq        */
  20.107 @@ -158,7 +160,7 @@ typedef struct evtchn_status {
  20.108  #define EVTCHNOP_bind_vcpu        8
  20.109  typedef struct evtchn_bind_vcpu {
  20.110      /* IN parameters. */
  20.111 -    uint32_t port;
  20.112 +    evtchn_port_t port;
  20.113      uint32_t vcpu;
  20.114  } evtchn_bind_vcpu_t;
  20.115  
    21.1 --- a/xen/include/public/io/blkif.h	Thu Dec 01 11:15:31 2005 +0000
    21.2 +++ b/xen/include/public/io/blkif.h	Fri Dec 02 01:00:01 2005 +0000
    21.3 @@ -11,6 +11,19 @@
    21.4  
    21.5  #include "ring.h"
    21.6  
    21.7 +/*
    21.8 + * Front->back notifications: When enqueuing a new request, there is no
    21.9 + * need to send a notification if there are old requests still in flight
   21.10 + * (that is, old_req_prod != sring->rsp_prod). The backend guarantees to check
   21.11 + * for new requests after queuing the response for the last in-flight request.
   21.12 + * (NB. The generic req_event mechanism is not used for blk requests.)
   21.13 + * 
   21.14 + * Back->front notifications: When enqueuing a new response, sending a
   21.15 + * notification can be made conditional on rsp_event (i.e., the generic
   21.16 + * hold-off mechanism provided by the ring macros). Frontends must set
   21.17 + * rsp_event appropriately (e.g., using RING_FINAL_CHECK_FOR_RESPONSES()).
   21.18 + */
   21.19 +
   21.20  #ifndef blkif_vdev_t
   21.21  #define blkif_vdev_t   uint16_t
   21.22  #endif
    22.1 --- a/xen/include/public/io/netif.h	Thu Dec 01 11:15:31 2005 +0000
    22.2 +++ b/xen/include/public/io/netif.h	Fri Dec 02 01:00:01 2005 +0000
    22.3 @@ -11,6 +11,13 @@
    22.4  
    22.5  #include "ring.h"
    22.6  
    22.7 +/*
    22.8 + * Note that there is *never* any need to notify the backend when enqueuing
    22.9 + * receive requests (netif_rx_request_t). Notifications after enqueuing any
   22.10 + * other type of message should be conditional on the appropriate req_event
   22.11 + * or rsp_event field in the shared ring.
   22.12 + */
   22.13 +
   22.14  typedef struct netif_tx_request {
   22.15      grant_ref_t gref;      /* Reference to buffer page */
   22.16      uint16_t offset:15;    /* Offset within buffer page */
    23.1 --- a/xen/include/public/io/ring.h	Thu Dec 01 11:15:31 2005 +0000
    23.2 +++ b/xen/include/public/io/ring.h	Fri Dec 02 01:00:01 2005 +0000
    23.3 @@ -1,10 +1,10 @@
    23.4 -
    23.5 -
    23.6 -
    23.7 -/*
    23.8 +/******************************************************************************
    23.9 + * ring.h
   23.10 + * 
   23.11   * Shared producer-consumer ring macros.
   23.12 + *
   23.13   * Tim Deegan and Andrew Warfield November 2004.
   23.14 - */ 
   23.15 + */
   23.16  
   23.17  #ifndef __XEN_PUBLIC_IO_RING_H__
   23.18  #define __XEN_PUBLIC_IO_RING_H__
   23.19 @@ -28,32 +28,35 @@ typedef unsigned int RING_IDX;
   23.20      (__RD32(((_sz) - (long)&(_s)->ring + (long)(_s)) / sizeof((_s)->ring[0])))
   23.21  
   23.22  /*
   23.23 - *  Macros to make the correct C datatypes for a new kind of ring.
   23.24 + * Macros to make the correct C datatypes for a new kind of ring.
   23.25   * 
   23.26 - *  To make a new ring datatype, you need to have two message structures,
   23.27 - *  let's say request_t, and response_t already defined.
   23.28 + * To make a new ring datatype, you need to have two message structures,
   23.29 + * let's say request_t, and response_t already defined.
   23.30   *
   23.31 - *  In a header where you want the ring datatype declared, you then do:
   23.32 + * In a header where you want the ring datatype declared, you then do:
   23.33   *
   23.34   *     DEFINE_RING_TYPES(mytag, request_t, response_t);
   23.35   *
   23.36 - *  These expand out to give you a set of types, as you can see below.
   23.37 - *  The most important of these are:
   23.38 + * These expand out to give you a set of types, as you can see below.
   23.39 + * The most important of these are:
   23.40   *  
   23.41   *     mytag_sring_t      - The shared ring.
   23.42   *     mytag_front_ring_t - The 'front' half of the ring.
   23.43   *     mytag_back_ring_t  - The 'back' half of the ring.
   23.44   *
   23.45 - *  To initialize a ring in your code you need to know the location and size
   23.46 - *  of the shared memory area (PAGE_SIZE, for instance). To initialise
   23.47 - *  the front half:
   23.48 + * To initialize a ring in your code you need to know the location and size
   23.49 + * of the shared memory area (PAGE_SIZE, for instance). To initialise
   23.50 + * the front half:
   23.51   *
   23.52 - *      mytag_front_ring_t front_ring;
   23.53 + *     mytag_front_ring_t front_ring;
   23.54 + *     SHARED_RING_INIT((mytag_sring_t *)shared_page);
   23.55 + *     FRONT_RING_INIT(&front_ring, (mytag_sring_t *)shared_page, PAGE_SIZE);
   23.56   *
   23.57 - *      SHARED_RING_INIT((mytag_sring_t *)shared_page);
   23.58 - *      FRONT_RING_INIT(&front_ring, (mytag_sring_t *)shared_page, PAGE_SIZE);
   23.59 + * Initializing the back follows similarly (note that only the front
   23.60 + * initializes the shared ring):
   23.61   *
   23.62 - *  Initializing the back follows similarly...
   23.63 + *     mytag_back_ring_t back_ring;
   23.64 + *     BACK_RING_INIT(&back_ring, (mytag_sring_t *)shared_page, PAGE_SIZE);
   23.65   */
   23.66           
   23.67  #define DEFINE_RING_TYPES(__name, __req_t, __rsp_t)                     \
   23.68 @@ -66,10 +69,8 @@ union __name##_sring_entry {            
   23.69                                                                          \
   23.70  /* Shared ring page */                                                  \
   23.71  struct __name##_sring {                                                 \
   23.72 -    RING_IDX req_prod;                                                  \
   23.73 -    RING_IDX rsp_prod;                                                  \
   23.74 -    RING_IDX rsp_event; /* notify client when rsp_prod == rsp_event */  \
   23.75 -    uint8_t  server_is_sleeping; /* notify server to kick off work  */  \
   23.76 +    RING_IDX req_prod, req_event;                                       \
   23.77 +    RING_IDX rsp_prod, rsp_event;                                       \
   23.78      union __name##_sring_entry ring[1]; /* variable-length */           \
   23.79  };                                                                      \
   23.80                                                                          \
   23.81 @@ -95,24 +96,24 @@ typedef struct __name##_front_ring __nam
   23.82  typedef struct __name##_back_ring __name##_back_ring_t
   23.83  
   23.84  /*
   23.85 - *   Macros for manipulating rings.  
   23.86 + * Macros for manipulating rings.  
   23.87   * 
   23.88 - *   FRONT_RING_whatever works on the "front end" of a ring: here 
   23.89 - *   requests are pushed on to the ring and responses taken off it.
   23.90 + * FRONT_RING_whatever works on the "front end" of a ring: here 
   23.91 + * requests are pushed on to the ring and responses taken off it.
   23.92   * 
   23.93 - *   BACK_RING_whatever works on the "back end" of a ring: here 
   23.94 - *   requests are taken off the ring and responses put on.
   23.95 + * BACK_RING_whatever works on the "back end" of a ring: here 
   23.96 + * requests are taken off the ring and responses put on.
   23.97   * 
   23.98 - *   N.B. these macros do NO INTERLOCKS OR FLOW CONTROL.  
   23.99 - *   This is OK in 1-for-1 request-response situations where the 
  23.100 - *   requestor (front end) never has more than RING_SIZE()-1
  23.101 - *   outstanding requests.
  23.102 + * N.B. these macros do NO INTERLOCKS OR FLOW CONTROL.  
  23.103 + * This is OK in 1-for-1 request-response situations where the 
  23.104 + * requestor (front end) never has more than RING_SIZE()-1
  23.105 + * outstanding requests.
  23.106   */
  23.107  
  23.108  /* Initialising empty rings */
  23.109  #define SHARED_RING_INIT(_s) do {                                       \
  23.110 -    (_s)->req_prod = 0;                                                 \
  23.111 -    (_s)->rsp_prod = 0;                                                 \
  23.112 +    (_s)->req_prod  = (_s)->rsp_prod  = 0;                              \
  23.113 +    (_s)->req_event = (_s)->rsp_event = 1;                              \
  23.114  } while(0)
  23.115  
  23.116  #define FRONT_RING_INIT(_r, _s, __size) do {                            \
  23.117 @@ -148,10 +149,6 @@ typedef struct __name##_back_ring __name
  23.118  #define RING_SIZE(_r)                                                   \
  23.119      ((_r)->nr_ents)
  23.120  
  23.121 -/* How many empty slots are on a ring? */
  23.122 -#define RING_PENDING_REQUESTS(_r)                                       \
  23.123 -   ( ((_r)->req_prod_pvt - (_r)->rsp_cons) )
  23.124 -   
  23.125  /* Test if there is an empty slot available on the front ring. 
  23.126   * (This is only meaningful from the front. )
  23.127   */
  23.128 @@ -167,25 +164,6 @@ typedef struct __name##_back_ring __name
  23.129       (((_r)->req_cons - (_r)->rsp_prod_pvt) !=                          \
  23.130        RING_SIZE(_r)) )
  23.131        
  23.132 -/* Test if there are messages waiting to be pushed. */
  23.133 -#define RING_HAS_UNPUSHED_REQUESTS(_r)                                  \
  23.134 -   ( (_r)->req_prod_pvt != (_r)->sring->req_prod )
  23.135 -   
  23.136 -#define RING_HAS_UNPUSHED_RESPONSES(_r)                                 \
  23.137 -   ( (_r)->rsp_prod_pvt != (_r)->sring->rsp_prod )
  23.138 -
  23.139 -/* Copy the private producer pointer into the shared ring so the other end 
  23.140 - * can see the updates we've made. */
  23.141 -#define RING_PUSH_REQUESTS(_r) do {                                     \
  23.142 -    wmb();                                                              \
  23.143 -    (_r)->sring->req_prod = (_r)->req_prod_pvt;                         \
  23.144 -} while (0)
  23.145 -
  23.146 -#define RING_PUSH_RESPONSES(_r) do {                                    \
  23.147 -    wmb();                                                              \
  23.148 -    (_r)->sring->rsp_prod = (_r)->rsp_prod_pvt;                         \
  23.149 -} while (0)
  23.150 -
  23.151  /* Direct access to individual ring elements, by index. */
  23.152  #define RING_GET_REQUEST(_r, _idx)                                      \
  23.153   (&((_r)->sring->ring[                                                  \
  23.154 @@ -201,6 +179,82 @@ typedef struct __name##_back_ring __name
  23.155  #define RING_REQUEST_CONS_OVERFLOW(_r, _cons)                           \
  23.156      (((_cons) - (_r)->rsp_prod_pvt) >= RING_SIZE(_r))
  23.157  
  23.158 +#define RING_PUSH_REQUESTS(_r) do {                                     \
  23.159 +    wmb(); /* back sees requests /before/ updated producer index */     \
  23.160 +    (_r)->sring->req_prod = (_r)->req_prod_pvt;                         \
  23.161 +} while (0)
  23.162 +
  23.163 +#define RING_PUSH_RESPONSES(_r) do {                                    \
  23.164 +    wmb(); /* front sees responses /before/ updated producer index */   \
  23.165 +    (_r)->sring->rsp_prod = (_r)->rsp_prod_pvt;                         \
  23.166 +} while (0)
  23.167 +
  23.168 +/*
  23.169 + * Notification hold-off (req_event and rsp_event):
  23.170 + * 
  23.171 + * When queueing requests or responses on a shared ring, it may not always be
  23.172 + * necessary to notify the remote end. For example, if requests are in flight
  23.173 + * in a backend, the front may be able to queue further requests without
  23.174 + * notifying the back (if the back checks for new requests when it queues
  23.175 + * responses).
  23.176 + * 
  23.177 + * When enqueuing requests or responses:
  23.178 + * 
  23.179 + *  Use RING_PUSH_{REQUESTS,RESPONSES}_AND_CHECK_NOTIFY(). The second argument
  23.180 + *  is a boolean return value. True indicates that the receiver requires an
  23.181 + *  asynchronous notification.
  23.182 + * 
  23.183 + * After dequeuing requests or responses (before sleeping the connection):
  23.184 + * 
  23.185 + *  Use RING_FINAL_CHECK_FOR_REQUESTS() or RING_FINAL_CHECK_FOR_RESPONSES().
  23.186 + *  The second argument is a boolean return value. True indicates that there
  23.187 + *  are pending messages on the ring (i.e., the connection should not be put
  23.188 + *  to sleep).
  23.189 + *  
  23.190 + *  These macros will set the req_event/rsp_event field to trigger a
  23.191 + *  notification on the very next message that is enqueued. If you want to
  23.192 + *  create batches of work (i.e., only receive a notification after several
  23.193 + *  messages have been enqueued) then you will need to create a customised
  23.194 + *  version of the FINAL_CHECK macro in your own code, which sets the event
  23.195 + *  field appropriately.
  23.196 + */
  23.197 +
  23.198 +#define RING_PUSH_REQUESTS_AND_CHECK_NOTIFY(_r, _notify) do {           \
  23.199 +    RING_IDX __old = (_r)->sring->req_prod;                             \
  23.200 +    RING_IDX __new = (_r)->req_prod_pvt;                                \
  23.201 +    wmb(); /* back sees requests /before/ updated producer index */     \
  23.202 +    (_r)->sring->req_prod = __new;                                      \
  23.203 +    mb(); /* back sees new requests /before/ we check req_event */      \
  23.204 +    (_notify) = ((RING_IDX)(__new - (_r)->sring->req_event) <           \
  23.205 +                 (RING_IDX)(__new - __old));                            \
  23.206 +} while (0)
  23.207 +
  23.208 +#define RING_PUSH_RESPONSES_AND_CHECK_NOTIFY(_r, _notify) do {          \
  23.209 +    RING_IDX __old = (_r)->sring->rsp_prod;                             \
  23.210 +    RING_IDX __new = (_r)->rsp_prod_pvt;                                \
  23.211 +    wmb(); /* front sees responses /before/ updated producer index */   \
  23.212 +    (_r)->sring->rsp_prod = __new;                                      \
  23.213 +    mb(); /* front sees new responses /before/ we check rsp_event */    \
  23.214 +    (_notify) = ((RING_IDX)(__new - (_r)->sring->rsp_event) <           \
  23.215 +                 (RING_IDX)(__new - __old));                            \
  23.216 +} while (0)
  23.217 +
  23.218 +#define RING_FINAL_CHECK_FOR_REQUESTS(_r, _work_to_do) do {             \
  23.219 +    (_work_to_do) = RING_HAS_UNCONSUMED_REQUESTS(_r);                   \
  23.220 +    if (_work_to_do) break;                                             \
  23.221 +    (_r)->sring->req_event = (_r)->req_cons + 1;                        \
  23.222 +    mb();                                                               \
  23.223 +    (_work_to_do) = RING_HAS_UNCONSUMED_REQUESTS(_r);                   \
  23.224 +} while (0)
  23.225 +
  23.226 +#define RING_FINAL_CHECK_FOR_RESPONSES(_r, _work_to_do) do {            \
  23.227 +    (_work_to_do) = RING_HAS_UNCONSUMED_RESPONSES(_r);                  \
  23.228 +    if (_work_to_do) break;                                             \
  23.229 +    (_r)->sring->rsp_event = (_r)->rsp_cons + 1;                        \
  23.230 +    mb();                                                               \
  23.231 +    (_work_to_do) = RING_HAS_UNCONSUMED_RESPONSES(_r);                  \
  23.232 +} while (0)
  23.233 +
  23.234  #endif /* __XEN_PUBLIC_IO_RING_H__ */
  23.235  
  23.236  /*