ia64/xen-unstable

changeset 218:4e2e24ecaab0

bitkeeper revision 1.77 (3e54ed8eXTuV-1E8SKTADy-ALYgPLw)

xl_block.c, Makefile, xen_block.c, perfc.c:
First cut of I/O scheduling in Xen, using a Linux-style tasklet.
author kaf24@labyrinth.cl.cam.ac.uk
date Thu Feb 20 15:00:30 2003 +0000 (2003-02-20)
parents 48f852d4241d
children 7f19fcd72411
files xen-2.4.16/common/perfc.c xen-2.4.16/drivers/block/xen_block.c xenolinux-2.4.16-sparse/Makefile xenolinux-2.4.16-sparse/arch/xeno/drivers/block/xl_block.c
line diff
     1.1 --- a/xen-2.4.16/common/perfc.c	Thu Feb 20 12:24:36 2003 +0000
     1.2 +++ b/xen-2.4.16/common/perfc.c	Thu Feb 20 15:00:30 2003 +0000
     1.3 @@ -19,7 +19,7 @@ void __perfc_print (unsigned long counte
     1.4    int loop;
     1.5    int total_size = 0;
     1.6    int element_size = 0;
     1.7 -  int num;
     1.8 +  int num = 0;
     1.9  
    1.10    for (loop = 0; loop < sizeof(perfc_name) / sizeof(char *); loop++)
    1.11    {
     2.1 --- a/xen-2.4.16/drivers/block/xen_block.c	Thu Feb 20 12:24:36 2003 +0000
     2.2 +++ b/xen-2.4.16/drivers/block/xen_block.c	Thu Feb 20 15:00:30 2003 +0000
     2.3 @@ -15,78 +15,118 @@
     2.4  #include <asm-i386/io.h>
     2.5  #include <asm/spinlock.h>
     2.6  #include <xeno/keyhandler.h>
     2.7 +#include <xeno/interrupt.h>
     2.8  
     2.9  #define XEN_BLK_DEBUG 0
    2.10  #define XEN_BLK_DEBUG_LEVEL KERN_ALERT
    2.11  
    2.12 -typedef struct blk_request {
    2.13 +typedef struct blk_request
    2.14 +{
    2.15      struct buffer_head *bh;
    2.16      void               *id;
    2.17      struct task_struct *domain;
    2.18  } blk_request_t;
    2.19 +
    2.20  #define MAX_PENDING_REQS 32
    2.21  #define BATCH_PER_DOMAIN 8
    2.22 +
    2.23  static kmem_cache_t *blk_request_cachep;
    2.24  static atomic_t nr_pending;
    2.25  
    2.26 +static void io_schedule(unsigned long unused);
    2.27  static int do_block_io_op_domain(struct task_struct* task, int max_to_do);
    2.28 -static int dispatch_rw_block_io(struct task_struct *p, int index);
    2.29 -static int dispatch_probe_block_io(struct task_struct *p, int index);
    2.30 -static int dispatch_debug_block_io(struct task_struct *p, int index);
    2.31 +static void dispatch_rw_block_io(struct task_struct *p, int index);
    2.32 +static void dispatch_probe_block_io(struct task_struct *p, int index);
    2.33 +static void dispatch_debug_block_io(struct task_struct *p, int index);
    2.34 +
    2.35  
    2.36 -static spinlock_t io_schedule_lock;
    2.37 +/******************************************************************
    2.38 + * BLOCK-DEVICE SCHEDULER LIST MAINTENANCE
    2.39 + */
    2.40 +
    2.41  static struct list_head io_schedule_list;
    2.42 +static spinlock_t io_schedule_list_lock;
    2.43  
    2.44 -static int on_blkdev_list(struct task_struct *p)
    2.45 +static int __on_blkdev_list(struct task_struct *p)
    2.46  {
    2.47      return p->blkdev_list.next != NULL;
    2.48  }
    2.49  
    2.50  static void remove_from_blkdev_list(struct task_struct *p)
    2.51  {
    2.52 -    list_del(&p->blkdev_list);
    2.53 -    p->blkdev_list.next = NULL;
    2.54 +    unsigned long flags;
    2.55 +    if ( !__on_blkdev_list(p) ) return;
    2.56 +    spin_lock_irqsave(&io_schedule_list_lock, flags);
    2.57 +    if ( __on_blkdev_list(p) )
    2.58 +    {
    2.59 +        list_del(&p->blkdev_list);
    2.60 +        p->blkdev_list.next = NULL;
    2.61 +    }
    2.62 +    spin_unlock_irqrestore(&io_schedule_list_lock, flags);
    2.63  }
    2.64  
    2.65  static void add_to_blkdev_list(struct task_struct *p)
    2.66  {
    2.67 -    list_add(&p->blkdev_list, &io_schedule_list);
    2.68 +    unsigned long flags;
    2.69 +    if ( __on_blkdev_list(p) ) return;
    2.70 +    spin_lock_irqsave(&io_schedule_list_lock, flags);
    2.71 +    if ( !__on_blkdev_list(p) )
    2.72 +    {
    2.73 +        list_add(&p->blkdev_list, &io_schedule_list);
    2.74 +    }
    2.75 +    spin_unlock_irqrestore(&io_schedule_list_lock, flags);
    2.76  }
    2.77  
    2.78  static void add_to_blkdev_list_tail(struct task_struct *p)
    2.79  {
    2.80 -    list_add_tail(&p->blkdev_list, &io_schedule_list);
    2.81 +    unsigned long flags;
    2.82 +    if ( __on_blkdev_list(p) ) return;
    2.83 +    spin_lock_irqsave(&io_schedule_list_lock, flags);
    2.84 +    if ( !__on_blkdev_list(p) )
    2.85 +    {
    2.86 +        list_add_tail(&p->blkdev_list, &io_schedule_list);
    2.87 +    }
    2.88 +    spin_unlock_irqrestore(&io_schedule_list_lock, flags);
    2.89  }
    2.90  
    2.91 -static void io_schedule(void)
    2.92 +
    2.93 +/******************************************************************
    2.94 + * SCHEDULER FUNCTIONS
    2.95 + */
    2.96 +
    2.97 +static DECLARE_TASKLET(io_schedule_tasklet, io_schedule, 0);
    2.98 +
    2.99 +static void io_schedule(unsigned long unused)
   2.100  {
   2.101      struct task_struct *p;
   2.102      struct list_head *ent;
   2.103  
   2.104 -    while ( (atomic_read(&nr_pending) < (MAX_PENDING_REQS / 2)) &&
   2.105 -            !list_empty(&io_schedule_list) &&
   2.106 -            spin_trylock(&io_schedule_lock) )
   2.107 +    while ( (atomic_read(&nr_pending) < MAX_PENDING_REQS) &&
   2.108 +            !list_empty(&io_schedule_list) )
   2.109      {
   2.110 -        while ( (atomic_read(&nr_pending) < MAX_PENDING_REQS) &&
   2.111 -                !list_empty(&io_schedule_list) )
   2.112 -        {
   2.113 -            ent = io_schedule_list.next;
   2.114 -            p = list_entry(ent, struct task_struct, blkdev_list);
   2.115 -            remove_from_blkdev_list(p);
   2.116 -            if ( do_block_io_op_domain(p, BATCH_PER_DOMAIN) )
   2.117 -                add_to_blkdev_list_tail(p);
   2.118 -        }
   2.119 -        spin_unlock(&io_schedule_lock);
   2.120 +        ent = io_schedule_list.next;
   2.121 +        p = list_entry(ent, struct task_struct, blkdev_list);
   2.122 +        remove_from_blkdev_list(p);
   2.123 +        if ( do_block_io_op_domain(p, BATCH_PER_DOMAIN) )
   2.124 +            add_to_blkdev_list_tail(p);
   2.125 +    }
   2.126 +}
   2.127 +
   2.128 +static void maybe_trigger_io_schedule(void)
   2.129 +{
   2.130 +    if ( (atomic_read(&nr_pending) < (MAX_PENDING_REQS/2)) &&
   2.131 +         !list_empty(&io_schedule_list) )
   2.132 +    {
   2.133 +        tasklet_schedule(&io_schedule_tasklet);
   2.134      }
   2.135  }
   2.136  
   2.137  
   2.138 -/*
   2.139 - * end_block_io_op:
   2.140 - *  IO has completed.  Need to notify the guest operating system.
   2.141 - *  Called from ll_rw_block -- currently /DIRECTLY/ -- XXX FIXME 
   2.142 - *  (e.g. hook into proper end processing of ll_rw) 
   2.143 +
   2.144 +/******************************************************************
   2.145 + * COMPLETION CALLBACK -- XXX Hook properly into bh->b_end_io
   2.146   */
   2.147 +
   2.148  void end_block_io_op(struct buffer_head * bh)
   2.149  {
   2.150      unsigned long cpu_mask;
   2.151 @@ -96,12 +136,13 @@ void end_block_io_op(struct buffer_head 
   2.152      int position = 0;
   2.153      blk_ring_t *blk_ring;
   2.154  
   2.155 -    if (XEN_BLK_DEBUG)  
   2.156 +    if ( XEN_BLK_DEBUG )  
   2.157  	printk(XEN_BLK_DEBUG_LEVEL "XEN end_block_io_op,  bh: %lx\n",
   2.158  	       (unsigned long)bh);
   2.159      
   2.160      if ( (blk_request = (blk_request_t *)bh->b_xen_request) == NULL) 
   2.161          goto bad_interrupt;
   2.162 +
   2.163      atomic_dec(&nr_pending);
   2.164      
   2.165      p = blk_request->domain;
   2.166 @@ -124,8 +165,7 @@ void end_block_io_op(struct buffer_head 
   2.167          kfree(blk_request->bh);     
   2.168      kmem_cache_free(blk_request_cachep, blk_request);
   2.169  
   2.170 -    /* Get more work to do. */
   2.171 -    io_schedule();
   2.172 +    maybe_trigger_io_schedule();
   2.173  
   2.174      return;
   2.175  
   2.176 @@ -138,30 +178,28 @@ void end_block_io_op(struct buffer_head 
   2.177  }
   2.178  
   2.179  
   2.180 -/*
   2.181 - * do_block_io_op:
   2.182 - *  Accept a block io request from a guest operating system.
   2.183 - *  There is an entry in the hypervisor_call_table (xen/arch/i386/entry.S).
   2.184 +
   2.185 +/******************************************************************
   2.186 + * GUEST-OS SYSCALL -- Indicates there are requests outstanding.
   2.187   */
   2.188 +
   2.189  long do_block_io_op(void)
   2.190  {
   2.191 -    if ( !on_blkdev_list(current) )
   2.192 -    {
   2.193 -        spin_lock_irq(&io_schedule_lock);
   2.194 -        add_to_blkdev_list_tail(current);
   2.195 -        spin_unlock_irq(&io_schedule_lock);
   2.196 -    }
   2.197 -
   2.198 -    io_schedule();
   2.199 -
   2.200 +    add_to_blkdev_list_tail(current);
   2.201 +    maybe_trigger_io_schedule();
   2.202      return 0L;
   2.203  }
   2.204  
   2.205  
   2.206 +
   2.207 +/******************************************************************
   2.208 + * DOWNWARD CALLS -- These interface with the block-device layer proper.
   2.209 + */
   2.210 +
   2.211  static int do_block_io_op_domain(struct task_struct* task, int max_to_do)
   2.212  {
   2.213      blk_ring_t *blk_ring = task->blk_ring_base;
   2.214 -    int loop, status = 0;
   2.215 +    int loop, more_to_do = 0;
   2.216  
   2.217      if (XEN_BLK_DEBUG)  
   2.218  	printk(XEN_BLK_DEBUG_LEVEL "XEN do_block_io_op %d %d\n",
   2.219 @@ -171,23 +209,25 @@ static int do_block_io_op_domain(struct 
   2.220  	  loop != blk_ring->req_prod; 
   2.221  	  loop = BLK_REQ_RING_INC(loop) ) 
   2.222      {
   2.223 -	status = 1;
   2.224 -
   2.225 -        if ( max_to_do-- == 0 ) break;
   2.226 +        if ( max_to_do-- == 0 )
   2.227 +        {
   2.228 +            more_to_do = 1;
   2.229 +            break;
   2.230 +        }
   2.231          
   2.232 -	switch (blk_ring->req_ring[loop].operation) {
   2.233 -
   2.234 +	switch (blk_ring->req_ring[loop].operation)
   2.235 +        {
   2.236  	case XEN_BLOCK_READ:
   2.237  	case XEN_BLOCK_WRITE:
   2.238 -	    status = dispatch_rw_block_io(task, loop);
   2.239 +	    dispatch_rw_block_io(task, loop);
   2.240  	    break;
   2.241  
   2.242  	case XEN_BLOCK_PROBE:
   2.243 -	    status = dispatch_probe_block_io(task, loop);
   2.244 +	    dispatch_probe_block_io(task, loop);
   2.245  	    break;
   2.246  
   2.247  	case XEN_BLOCK_DEBUG:
   2.248 -	    status = dispatch_debug_block_io(task, loop);
   2.249 +	    dispatch_debug_block_io(task, loop);
   2.250  	    break;
   2.251  
   2.252  	default:
   2.253 @@ -195,23 +235,18 @@ static int do_block_io_op_domain(struct 
   2.254  		    blk_ring->req_ring[loop].operation);
   2.255  	    BUG();
   2.256  	}
   2.257 -
   2.258 -	if ( status ) break;
   2.259      }
   2.260  
   2.261      blk_ring->req_cons = loop;
   2.262 -    return status;
   2.263 +    return more_to_do;
   2.264  }
   2.265  
   2.266 -
   2.267 -static int dispatch_debug_block_io(struct task_struct *p, int index)
   2.268 +static void dispatch_debug_block_io(struct task_struct *p, int index)
   2.269  {
   2.270      printk (KERN_ALERT "dispatch_debug_block_io: UNIMPL\n"); 
   2.271 -    return 1; 
   2.272  }
   2.273  
   2.274 -
   2.275 -static int dispatch_probe_block_io(struct task_struct *p, int index)
   2.276 +static void dispatch_probe_block_io(struct task_struct *p, int index)
   2.277  {
   2.278      extern void ide_probe_devices(xen_disk_info_t *xdi);
   2.279      blk_ring_t *blk_ring = p->blk_ring_base;
   2.280 @@ -224,12 +259,9 @@ static int dispatch_probe_block_io(struc
   2.281      blk_ring->resp_ring[blk_ring->resp_prod].id = blk_ring->req_ring[index].id;
   2.282      blk_ring->resp_ring[blk_ring->resp_prod].status = 0;
   2.283      blk_ring->resp_prod = BLK_RESP_RING_INC(blk_ring->resp_prod);
   2.284 -    
   2.285 -    return 0;
   2.286  }
   2.287  
   2.288 -
   2.289 -static int dispatch_rw_block_io(struct task_struct *p, int index)
   2.290 +static void dispatch_rw_block_io(struct task_struct *p, int index)
   2.291  {
   2.292      extern void ll_rw_block(int rw, int nr, struct buffer_head * bhs[]); 
   2.293      blk_ring_t *blk_ring = p->blk_ring_base;
   2.294 @@ -242,18 +274,20 @@ static int dispatch_rw_block_io(struct t
   2.295       * check to make sure that the block request seems at least
   2.296       * a bit legitimate
   2.297       */
   2.298 -    if ((blk_ring->req_ring[index].block_size & (0x200 - 1)) != 0) {
   2.299 +    if ( (blk_ring->req_ring[index].block_size & (0x200 - 1)) != 0 )
   2.300 +    {
   2.301  	printk(KERN_ALERT "    error: dodgy block size: %d\n", 
   2.302  	       blk_ring->req_ring[index].block_size);
   2.303  	BUG();
   2.304      }
   2.305      
   2.306 -    if(blk_ring->req_ring[index].buffer == NULL) { 
   2.307 +    if ( blk_ring->req_ring[index].buffer == NULL )
   2.308 +    { 
   2.309  	printk(KERN_ALERT "xen_block: bogus buffer from guestOS\n"); 
   2.310  	BUG();
   2.311      }
   2.312  
   2.313 -    if (XEN_BLK_DEBUG) {
   2.314 +    if (XEN_BLK_DEBUG)
   2.315  	printk(XEN_BLK_DEBUG_LEVEL "    req_cons: %d  req_prod %d  index: %d "
   2.316  	       "op: %s, pri: %s\n", blk_ring->req_cons, blk_ring->req_prod, 
   2.317  	       index, 
   2.318 @@ -261,7 +295,6 @@ static int dispatch_rw_block_io(struct t
   2.319  		"read" : "write"), 
   2.320  	       (blk_ring->req_ring[index].priority == XEN_BLOCK_SYNC ? 
   2.321  		"sync" : "async"));
   2.322 -    }
   2.323  
   2.324      atomic_inc(&nr_pending);
   2.325      blk_request = kmem_cache_alloc(blk_request_cachep, GFP_ATOMIC);
   2.326 @@ -269,7 +302,8 @@ static int dispatch_rw_block_io(struct t
   2.327      /* we'll be doing this frequently, would a cache be appropriate? */
   2.328      bh = (struct buffer_head *) kmalloc(sizeof(struct buffer_head), 
   2.329  					GFP_KERNEL);
   2.330 -    if (!bh) {
   2.331 +    if ( bh == NULL )
   2.332 +    {
   2.333  	printk(KERN_ALERT "ERROR: bh is null\n");
   2.334  	BUG();
   2.335      }
   2.336 @@ -286,11 +320,14 @@ static int dispatch_rw_block_io(struct t
   2.337      bh->b_count.counter = 1;
   2.338      bh->b_xen_request   = (void *)blk_request;  
   2.339      
   2.340 -    if (blk_ring->req_ring[index].operation == XEN_BLOCK_WRITE) {
   2.341 +    if (blk_ring->req_ring[index].operation == XEN_BLOCK_WRITE)
   2.342 +    {
   2.343  	bh->b_state = ((1 << BH_JBD) | (1 << BH_Mapped) | (1 << BH_Req) |
   2.344  		       (1 << BH_Dirty) | (1 << BH_Uptodate));
   2.345  	operation = WRITE;
   2.346 -    } else {
   2.347 +    } 
   2.348 +    else
   2.349 +    {
   2.350  	bh->b_state = (1 << BH_Mapped);
   2.351  	operation = READ;
   2.352      }
   2.353 @@ -304,18 +341,20 @@ static int dispatch_rw_block_io(struct t
   2.354      ll_rw_block(operation, 1, &bh);       /* linux top half */
   2.355      rq = blk_get_queue(bh->b_rdev);                         
   2.356      generic_unplug_device(rq);            /* linux bottom half */
   2.357 -
   2.358 -    return 0;
   2.359  }
   2.360  
   2.361  
   2.362 +
   2.363 +/******************************************************************
   2.364 + * MISCELLANEOUS SETUP / TEARDOWN / DEBUGGING
   2.365 + */
   2.366 +
   2.367  static void dump_blockq(u_char key, void *dev_id, struct pt_regs *regs) 
   2.368  {
   2.369      printk("Dumping block queue stats: nr_pending = %d\n",
   2.370             atomic_read(&nr_pending));
   2.371  }
   2.372  
   2.373 -
   2.374  /* Start-of-day initialisation for a new domain. */
   2.375  void init_blkdev_info(struct task_struct *p)
   2.376  {
   2.377 @@ -326,27 +365,19 @@ void init_blkdev_info(struct task_struct
   2.378      p->blkdev_list.next = NULL;
   2.379  }
   2.380  
   2.381 -
   2.382  /* End-of-day teardown for a domain. XXX Outstanding requests? */
   2.383  void destroy_blkdev_info(struct task_struct *p)
   2.384  {
   2.385 -    unsigned long flags;
   2.386 -    if ( on_blkdev_list(p) )
   2.387 -    {
   2.388 -        spin_lock_irqsave(&io_schedule_lock, flags);
   2.389 -        if ( on_blkdev_list(p) ) remove_from_blkdev_list(p);
   2.390 -        spin_unlock_irqrestore(&io_schedule_lock, flags);
   2.391 -    }
   2.392 +    remove_from_blkdev_list(p);
   2.393      UNSHARE_PFN(virt_to_page(p->blk_ring_base));
   2.394      free_page((unsigned long)p->blk_ring_base);
   2.395  }
   2.396  
   2.397 -
   2.398  void initialize_block_io ()
   2.399  {
   2.400      atomic_set(&nr_pending, 0);
   2.401  
   2.402 -    spin_lock_init(&io_schedule_lock);
   2.403 +    spin_lock_init(&io_schedule_list_lock);
   2.404      INIT_LIST_HEAD(&io_schedule_list);
   2.405  
   2.406      blk_request_cachep = kmem_cache_create(
     3.1 --- a/xenolinux-2.4.16-sparse/Makefile	Thu Feb 20 12:24:36 2003 +0000
     3.2 +++ b/xenolinux-2.4.16-sparse/Makefile	Thu Feb 20 15:00:30 2003 +0000
     3.3 @@ -25,7 +25,7 @@ FINDHPATH	= $(HPATH)/asm $(HPATH)/linux 
     3.4  HOSTCC  	= gcc
     3.5  HOSTCFLAGS	= -Wall -Wstrict-prototypes -O2 -fomit-frame-pointer
     3.6  
     3.7 -CROSS_COMPILE 	=
     3.8 +CROSS_COMPILE 	= 
     3.9  
    3.10  #
    3.11  # Include the make variables (CC, etc...)
     4.1 --- a/xenolinux-2.4.16-sparse/arch/xeno/drivers/block/xl_block.c	Thu Feb 20 12:24:36 2003 +0000
     4.2 +++ b/xenolinux-2.4.16-sparse/arch/xeno/drivers/block/xl_block.c	Thu Feb 20 15:00:30 2003 +0000
     4.3 @@ -30,7 +30,7 @@ typedef unsigned char	byte;
     4.4  
     4.5  void xlblk_ide_register_disk(int, unsigned long);
     4.6  
     4.7 -#define XLBLK_MAX 2                                /* very arbitrary */
     4.8 +#define XLBLK_MAX 32 /* Maximum minor devices we support */
     4.9  #define XLBLK_MAJOR_NAME "xhd"
    4.10  #define IDE_PARTN_BITS 6                           /* from ide.h::PARTN_BITS */
    4.11  #define IDE_PARTN_MASK ((1<<IDE_PARTN_BITS)-1)     /* from ide.h::PARTN_MASK */