ia64/xen-unstable

changeset 214:2e78c0d979ec

bitkeeper revision 1.74 (3e54b63dw5kX3U_MQzXBiMsEyQdHJQ)

sched.h, blkdev.h, xen_block.c, domain.c:
Beginnings of cheesy IO scheduling.
author kaf24@labyrinth.cl.cam.ac.uk
date Thu Feb 20 11:04:29 2003 +0000 (2003-02-20)
parents 3f4a788e4e0d
children 43f9b7e980ef
files xen-2.4.16/common/domain.c xen-2.4.16/drivers/block/xen_block.c xen-2.4.16/include/xeno/blkdev.h xen-2.4.16/include/xeno/sched.h
line diff
     1.1 --- a/xen-2.4.16/common/domain.c	Thu Feb 20 09:51:00 2003 +0000
     1.2 +++ b/xen-2.4.16/common/domain.c	Thu Feb 20 11:04:29 2003 +0000
     1.3 @@ -15,6 +15,7 @@
     1.4  #include <asm/flushtlb.h>
     1.5  #include <asm/msr.h>
     1.6  #include <xeno/multiboot.h>
     1.7 +#include <xeno/blkdev.h>
     1.8  
     1.9  #define L1_PROT (_PAGE_PRESENT|_PAGE_RW|_PAGE_USER|_PAGE_ACCESSED)
    1.10  #define L2_PROT (_PAGE_PRESENT|_PAGE_RW|_PAGE_USER|_PAGE_ACCESSED|_PAGE_DIRTY)
    1.11 @@ -48,10 +49,7 @@ struct task_struct *do_newdomain(unsigne
    1.12      memset(p->shared_info, 0, PAGE_SIZE);
    1.13      SHARE_PFN_WITH_DOMAIN(virt_to_page(p->shared_info), dom_id);
    1.14  
    1.15 -    if ( sizeof(*p->blk_ring_base) > PAGE_SIZE ) BUG();
    1.16 -    p->blk_ring_base = (blk_ring_t *)get_free_page(GFP_KERNEL);
    1.17 -    memset(p->blk_ring_base, 0, PAGE_SIZE);
    1.18 -    SHARE_PFN_WITH_DOMAIN(virt_to_page(p->blk_ring_base), dom_id);
    1.19 +    init_blkdev_info(p);
    1.20  
    1.21      SET_GDT_ENTRIES(p, DEFAULT_GDT_ENTRIES);
    1.22      SET_GDT_ADDRESS(p, DEFAULT_GDT_ADDRESS);
    1.23 @@ -216,8 +214,7 @@ void release_task(struct task_struct *p)
    1.24      }
    1.25      if ( p->mm.perdomain_pt ) free_page((unsigned long)p->mm.perdomain_pt);
    1.26  
    1.27 -    UNSHARE_PFN(virt_to_page(p->blk_ring_base));
    1.28 -    free_page((unsigned long)p->blk_ring_base);
    1.29 +    destroy_blkdev_info(p);
    1.30  
    1.31      UNSHARE_PFN(virt_to_page(p->shared_info));
    1.32      free_page((unsigned long)p->shared_info);
     2.1 --- a/xen-2.4.16/drivers/block/xen_block.c	Thu Feb 20 09:51:00 2003 +0000
     2.2 +++ b/xen-2.4.16/drivers/block/xen_block.c	Thu Feb 20 11:04:29 2003 +0000
     2.3 @@ -19,30 +19,67 @@
     2.4  #define XEN_BLK_DEBUG 0
     2.5  #define XEN_BLK_DEBUG_LEVEL KERN_ALERT
     2.6  
     2.7 -/*
     2.8 - * KAF XXX: the current state of play with blk_requests.
     2.9 - * 
    2.10 - * The following infrastructure is really here for future use.
    2.11 - * blk_requests are currently not used by any mechanism, but eventually
    2.12 - * pending blk_requests will go into an IO scheduler. This entry point
    2.13 - * will go where we currently increment 'nr_pending'. The scheduler will
    2.14 - * refuse admission of a blk_request if it is already full.
    2.15 - */
    2.16  typedef struct blk_request {
    2.17 -  struct list_head queue;
    2.18 -  struct buffer_head *bh;
    2.19 -  blk_ring_req_entry_t *request;
    2.20 -  struct task_struct *domain;                /* requesting domain */
    2.21 +    struct buffer_head *bh;
    2.22 +    void               *id;
    2.23 +    struct task_struct *domain;
    2.24  } blk_request_t;
    2.25 -#define MAX_PENDING_REQS 256                 /* very arbitrary */
    2.26 +#define MAX_PENDING_REQS 32
    2.27 +#define BATCH_PER_DOMAIN 8
    2.28  static kmem_cache_t *blk_request_cachep;
    2.29  static atomic_t nr_pending;
    2.30 -static int pending_work; /* Bitmask: which domains have work for us? */
    2.31 +
    2.32 +static int do_block_io_op_domain(struct task_struct* task, int max_to_do);
    2.33 +static int dispatch_rw_block_io(int index);
    2.34 +static int dispatch_probe_block_io(int index);
    2.35 +static int dispatch_debug_block_io(int index);
    2.36 +
    2.37 +static spinlock_t io_schedule_lock;
    2.38 +static struct list_head io_schedule_list;
    2.39 +
    2.40 +static int on_blkdev_list(struct task_struct *p)
    2.41 +{
    2.42 +    return p->blkdev_list.next != NULL;
    2.43 +}
    2.44 +
    2.45 +static void remove_from_blkdev_list(struct task_struct *p)
    2.46 +{
    2.47 +    list_del(&p->blkdev_list);
    2.48 +    p->blkdev_list.next = NULL;
    2.49 +}
    2.50 +
    2.51 +static void add_to_blkdev_list(struct task_struct *p)
    2.52 +{
    2.53 +    list_add(&p->blkdev_list, &io_schedule_list);
    2.54 +}
    2.55  
    2.56 -static long do_block_io_op_domain (struct task_struct* task);
    2.57 -static int dispatch_rw_block_io (int index);
    2.58 -static int dispatch_probe_block_io (int index);
    2.59 -static int dispatch_debug_block_io (int index);
    2.60 +static void add_to_blkdev_list_tail(struct task_struct *p)
    2.61 +{
    2.62 +    list_add_tail(&p->blkdev_list, &io_schedule_list);
    2.63 +}
    2.64 +
    2.65 +static void io_schedule(void)
    2.66 +{
    2.67 +    struct task_struct *p;
    2.68 +    struct list_head *ent;
    2.69 +
    2.70 +    while ( (atomic_read(&nr_pending) < (MAX_PENDING_REQS / 2)) &&
    2.71 +            !list_empty(&io_schedule_list) &&
    2.72 +            spin_trylock(&io_schedule_lock) )
    2.73 +    {
    2.74 +        while ( (atomic_read(&nr_pending) < MAX_PENDING_REQS) &&
    2.75 +                !list_empty(&io_schedule_list) )
    2.76 +        {
    2.77 +            ent = io_schedule_list.next;
    2.78 +            p = list_entry(ent, struct task_struct, blkdev_list);
    2.79 +            remove_from_blkdev_list(p);
    2.80 +            if ( do_block_io_op_domain(p, BATCH_PER_DOMAIN) )
    2.81 +                add_to_blkdev_list_tail(p);
    2.82 +        }
    2.83 +        spin_unlock(&io_schedule_lock);
    2.84 +    }
    2.85 +}
    2.86 +
    2.87  
    2.88  /*
    2.89   * end_block_io_op:
    2.90 @@ -58,7 +95,6 @@ void end_block_io_op(struct buffer_head 
    2.91      struct task_struct *p;
    2.92      int position = 0;
    2.93      blk_ring_t *blk_ring;
    2.94 -    int loop;
    2.95  
    2.96      if (XEN_BLK_DEBUG)  
    2.97  	printk(XEN_BLK_DEBUG_LEVEL "XEN end_block_io_op,  bh: %lx\n",
    2.98 @@ -74,7 +110,7 @@ void end_block_io_op(struct buffer_head 
    2.99      spin_lock_irqsave(&p->blk_ring_lock, flags);
   2.100      blk_ring = p->blk_ring_base;
   2.101      position = blk_ring->resp_prod;
   2.102 -    blk_ring->resp_ring[position].id     = blk_request->request->id;
   2.103 +    blk_ring->resp_ring[position].id     = blk_request->id;
   2.104      blk_ring->resp_ring[position].status = 0;
   2.105      blk_ring->resp_prod = BLK_RESP_RING_INC(blk_ring->resp_prod);
   2.106      spin_unlock_irqrestore(&p->blk_ring_lock, flags);
   2.107 @@ -87,27 +123,9 @@ void end_block_io_op(struct buffer_head 
   2.108      if ( blk_request->bh ) 
   2.109          kfree(blk_request->bh);     
   2.110      kmem_cache_free(blk_request_cachep, blk_request);
   2.111 -    
   2.112 -    /* XXX SMH: below is ugly and dangerous -- fix */
   2.113 -    /*
   2.114 -     * now check if there is any pending work from any domain
   2.115 -     * that we were previously unable to process.
   2.116 -     */
   2.117 -    for ( loop = 0; loop < XEN_BLOCK_MAX_DOMAINS; loop++ )
   2.118 -    {
   2.119 -	int domain = pending_work & (1 << loop);
   2.120  
   2.121 -	if ( domain ) 
   2.122 -        {
   2.123 -	    struct task_struct *mytask = current;
   2.124 -
   2.125 -	    while ( mytask->domain != loop )
   2.126 -		mytask = mytask->next_task;
   2.127 -
   2.128 -	    pending_work = pending_work & !(1 << loop);
   2.129 -	    do_block_io_op_domain(mytask);
   2.130 -	}
   2.131 -    }
   2.132 +    /* Get more work to do. */
   2.133 +    io_schedule();
   2.134  
   2.135      return;
   2.136  
   2.137 @@ -125,21 +143,25 @@ void end_block_io_op(struct buffer_head 
   2.138   *  Accept a block io request from a guest operating system.
   2.139   *  There is an entry in the hypervisor_call_table (xen/arch/i386/entry.S).
   2.140   */
   2.141 +long do_block_io_op(void)
   2.142 +{
   2.143 +    if ( !on_blkdev_list(current) )
   2.144 +    {
   2.145 +        spin_lock_irq(&io_schedule_lock);
   2.146 +        add_to_blkdev_list_tail(current);
   2.147 +        spin_unlock_irq(&io_schedule_lock);
   2.148 +    }
   2.149  
   2.150 -long do_block_io_op (void)
   2.151 -{
   2.152 -    return do_block_io_op_domain(current);
   2.153 +    io_schedule();
   2.154 +
   2.155 +    return 0L;
   2.156  }
   2.157  
   2.158  
   2.159 -/*
   2.160 - * do_block_io_op_domain:
   2.161 - *  Handle the requests for a particular domain
   2.162 - */
   2.163 -static long do_block_io_op_domain (struct task_struct* task)
   2.164 +static int do_block_io_op_domain(struct task_struct* task, int max_to_do)
   2.165  {
   2.166      blk_ring_t *blk_ring = task->blk_ring_base;
   2.167 -    int loop, status;
   2.168 +    int loop, status = 0;
   2.169  
   2.170      if (XEN_BLK_DEBUG)  
   2.171  	printk(XEN_BLK_DEBUG_LEVEL "XEN do_block_io_op %d %d\n",
   2.172 @@ -151,6 +173,8 @@ static long do_block_io_op_domain (struc
   2.173      {
   2.174  	status = 1;
   2.175  
   2.176 +        if ( max_to_do-- == 0 ) break;
   2.177 +        
   2.178  	switch (blk_ring->req_ring[loop].operation) {
   2.179  
   2.180  	case XEN_BLOCK_READ:
   2.181 @@ -172,20 +196,11 @@ static long do_block_io_op_domain (struc
   2.182  	    BUG();
   2.183  	}
   2.184  
   2.185 -
   2.186 -	if (status) {
   2.187 -	    /* 
   2.188 -	    ** Unable to successfully issue / complete command, maybe because
   2.189 -	    ** another resource (e.g. disk request buffers) is unavailable.
   2.190 -	    ** stop removing items from the communications ring and try later 
   2.191 -	    */
   2.192 -	    pending_work = pending_work | (1 << task->domain);
   2.193 -	    break;
   2.194 -	}
   2.195 +	if ( status ) break;
   2.196      }
   2.197  
   2.198      blk_ring->req_cons = loop;
   2.199 -    return 0L;
   2.200 +    return status;
   2.201  }
   2.202  
   2.203  
   2.204 @@ -284,7 +299,7 @@ static int dispatch_rw_block_io (int ind
   2.205      }
   2.206  
   2.207      /* save meta data about request */
   2.208 -    blk_request->request = &blk_ring->req_ring[index];
   2.209 +    blk_request->id     = blk_ring->req_ring[index].id;
   2.210      blk_request->bh     = bh;
   2.211      blk_request->domain = current; 
   2.212      
   2.213 @@ -304,16 +319,44 @@ static void dump_blockq(u_char key, void
   2.214  }
   2.215  
   2.216  
   2.217 +/* Start-of-day initialisation for a new domain. */
   2.218 +void init_blkdev_info(struct task_struct *p)
   2.219 +{
   2.220 +    if ( sizeof(*p->blk_ring_base) > PAGE_SIZE ) BUG();
   2.221 +    p->blk_ring_base = (blk_ring_t *)get_free_page(GFP_KERNEL);
   2.222 +    clear_page(p->blk_ring_base);
   2.223 +    SHARE_PFN_WITH_DOMAIN(virt_to_page(p->blk_ring_base), p->domain);
   2.224 +    p->blkdev_list.next = NULL;
   2.225 +}
   2.226 +
   2.227 +
   2.228 +/* End-of-day teardown for a domain. XXX Outstanding requests? */
   2.229 +void destroy_blkdev_info(struct task_struct *p)
   2.230 +{
   2.231 +    unsigned long flags;
   2.232 +    if ( on_blkdev_list(p) )
   2.233 +    {
   2.234 +        spin_lock_irqsave(&io_schedule_lock, flags);
   2.235 +        if ( on_blkdev_list(p) ) remove_from_blkdev_list(p);
   2.236 +        spin_unlock_irqrestore(&io_schedule_lock, flags);
   2.237 +    }
   2.238 +    UNSHARE_PFN(virt_to_page(p->blk_ring_base));
   2.239 +    free_page((unsigned long)p->blk_ring_base);
   2.240 +}
   2.241 +
   2.242 +
   2.243  void initialize_block_io ()
   2.244  {
   2.245 +    atomic_set(&nr_pending, 0);
   2.246 +
   2.247 +    spin_lock_init(&io_schedule_lock);
   2.248 +    INIT_LIST_HEAD(&io_schedule_list);
   2.249 +
   2.250      blk_request_cachep = kmem_cache_create(
   2.251          "blk_request_cache", sizeof(blk_request_t),
   2.252          0, SLAB_HWCACHE_ALIGN, NULL, NULL);
   2.253      
   2.254 -    add_key_handler('b', dump_blockq, "dump xen ide blkdev stats"); 
   2.255 -    
   2.256 -    pending_work = 0;
   2.257 -    atomic_set(&nr_pending, 0);
   2.258 +    add_key_handler('b', dump_blockq, "dump xen ide blkdev stats");     
   2.259  }
   2.260  
   2.261  
     3.1 --- a/xen-2.4.16/include/xeno/blkdev.h	Thu Feb 20 09:51:00 2003 +0000
     3.2 +++ b/xen-2.4.16/include/xeno/blkdev.h	Thu Feb 20 11:04:29 2003 +0000
     3.3 @@ -6,6 +6,7 @@
     3.4  #include <asm/bitops.h>
     3.5  #include <xeno/list.h>
     3.6  #include <xeno/kdev_t.h>
     3.7 +#include <xeno/sched.h>
     3.8  
     3.9  /* Some defines from fs.h that may actually be useful to the blkdev layer. */
    3.10  #define READ 0
    3.11 @@ -14,6 +15,9 @@
    3.12  #define BLOCK_SIZE_BITS 10
    3.13  #define BLOCK_SIZE (1<<BLOCK_SIZE_BITS)
    3.14  
    3.15 +extern void init_blkdev_info(struct task_struct *);
    3.16 +extern void destroy_blkdev_info(struct task_struct *);
    3.17 +
    3.18  extern int unregister_blkdev(unsigned int, const char *);
    3.19  extern int invalidate_device(kdev_t, int);
    3.20  extern int check_disk_change(kdev_t);
     4.1 --- a/xen-2.4.16/include/xeno/sched.h	Thu Feb 20 09:51:00 2003 +0000
     4.2 +++ b/xen-2.4.16/include/xeno/sched.h	Thu Feb 20 11:04:29 2003 +0000
     4.3 @@ -76,6 +76,7 @@ struct task_struct {
     4.4  
     4.5      /* Block I/O */
     4.6      blk_ring_t *blk_ring_base;
     4.7 +    struct list_head blkdev_list;
     4.8      spinlock_t blk_ring_lock;
     4.9  
    4.10      int has_cpu, policy, counter;