ia64/xen-unstable
changeset 214:2e78c0d979ec
bitkeeper revision 1.74 (3e54b63dw5kX3U_MQzXBiMsEyQdHJQ)
sched.h, blkdev.h, xen_block.c, domain.c:
Beginnings of cheesy IO scheduling.
sched.h, blkdev.h, xen_block.c, domain.c:
Beginnings of cheesy IO scheduling.
author | kaf24@labyrinth.cl.cam.ac.uk |
---|---|
date | Thu Feb 20 11:04:29 2003 +0000 (2003-02-20) |
parents | 3f4a788e4e0d |
children | 43f9b7e980ef |
files | xen-2.4.16/common/domain.c xen-2.4.16/drivers/block/xen_block.c xen-2.4.16/include/xeno/blkdev.h xen-2.4.16/include/xeno/sched.h |
line diff
1.1 --- a/xen-2.4.16/common/domain.c Thu Feb 20 09:51:00 2003 +0000 1.2 +++ b/xen-2.4.16/common/domain.c Thu Feb 20 11:04:29 2003 +0000 1.3 @@ -15,6 +15,7 @@ 1.4 #include <asm/flushtlb.h> 1.5 #include <asm/msr.h> 1.6 #include <xeno/multiboot.h> 1.7 +#include <xeno/blkdev.h> 1.8 1.9 #define L1_PROT (_PAGE_PRESENT|_PAGE_RW|_PAGE_USER|_PAGE_ACCESSED) 1.10 #define L2_PROT (_PAGE_PRESENT|_PAGE_RW|_PAGE_USER|_PAGE_ACCESSED|_PAGE_DIRTY) 1.11 @@ -48,10 +49,7 @@ struct task_struct *do_newdomain(unsigne 1.12 memset(p->shared_info, 0, PAGE_SIZE); 1.13 SHARE_PFN_WITH_DOMAIN(virt_to_page(p->shared_info), dom_id); 1.14 1.15 - if ( sizeof(*p->blk_ring_base) > PAGE_SIZE ) BUG(); 1.16 - p->blk_ring_base = (blk_ring_t *)get_free_page(GFP_KERNEL); 1.17 - memset(p->blk_ring_base, 0, PAGE_SIZE); 1.18 - SHARE_PFN_WITH_DOMAIN(virt_to_page(p->blk_ring_base), dom_id); 1.19 + init_blkdev_info(p); 1.20 1.21 SET_GDT_ENTRIES(p, DEFAULT_GDT_ENTRIES); 1.22 SET_GDT_ADDRESS(p, DEFAULT_GDT_ADDRESS); 1.23 @@ -216,8 +214,7 @@ void release_task(struct task_struct *p) 1.24 } 1.25 if ( p->mm.perdomain_pt ) free_page((unsigned long)p->mm.perdomain_pt); 1.26 1.27 - UNSHARE_PFN(virt_to_page(p->blk_ring_base)); 1.28 - free_page((unsigned long)p->blk_ring_base); 1.29 + destroy_blkdev_info(p); 1.30 1.31 UNSHARE_PFN(virt_to_page(p->shared_info)); 1.32 free_page((unsigned long)p->shared_info);
2.1 --- a/xen-2.4.16/drivers/block/xen_block.c Thu Feb 20 09:51:00 2003 +0000 2.2 +++ b/xen-2.4.16/drivers/block/xen_block.c Thu Feb 20 11:04:29 2003 +0000 2.3 @@ -19,30 +19,67 @@ 2.4 #define XEN_BLK_DEBUG 0 2.5 #define XEN_BLK_DEBUG_LEVEL KERN_ALERT 2.6 2.7 -/* 2.8 - * KAF XXX: the current state of play with blk_requests. 2.9 - * 2.10 - * The following infrastructure is really here for future use. 2.11 - * blk_requests are currently not used by any mechanism, but eventually 2.12 - * pending blk_requests will go into an IO scheduler. This entry point 2.13 - * will go where we currently increment 'nr_pending'. The scheduler will 2.14 - * refuse admission of a blk_request if it is already full. 2.15 - */ 2.16 typedef struct blk_request { 2.17 - struct list_head queue; 2.18 - struct buffer_head *bh; 2.19 - blk_ring_req_entry_t *request; 2.20 - struct task_struct *domain; /* requesting domain */ 2.21 + struct buffer_head *bh; 2.22 + void *id; 2.23 + struct task_struct *domain; 2.24 } blk_request_t; 2.25 -#define MAX_PENDING_REQS 256 /* very arbitrary */ 2.26 +#define MAX_PENDING_REQS 32 2.27 +#define BATCH_PER_DOMAIN 8 2.28 static kmem_cache_t *blk_request_cachep; 2.29 static atomic_t nr_pending; 2.30 -static int pending_work; /* Bitmask: which domains have work for us? */ 2.31 + 2.32 +static int do_block_io_op_domain(struct task_struct* task, int max_to_do); 2.33 +static int dispatch_rw_block_io(int index); 2.34 +static int dispatch_probe_block_io(int index); 2.35 +static int dispatch_debug_block_io(int index); 2.36 + 2.37 +static spinlock_t io_schedule_lock; 2.38 +static struct list_head io_schedule_list; 2.39 + 2.40 +static int on_blkdev_list(struct task_struct *p) 2.41 +{ 2.42 + return p->blkdev_list.next != NULL; 2.43 +} 2.44 + 2.45 +static void remove_from_blkdev_list(struct task_struct *p) 2.46 +{ 2.47 + list_del(&p->blkdev_list); 2.48 + p->blkdev_list.next = NULL; 2.49 +} 2.50 + 2.51 +static void add_to_blkdev_list(struct task_struct *p) 2.52 +{ 2.53 + list_add(&p->blkdev_list, &io_schedule_list); 2.54 +} 2.55 2.56 -static long do_block_io_op_domain (struct task_struct* task); 2.57 -static int dispatch_rw_block_io (int index); 2.58 -static int dispatch_probe_block_io (int index); 2.59 -static int dispatch_debug_block_io (int index); 2.60 +static void add_to_blkdev_list_tail(struct task_struct *p) 2.61 +{ 2.62 + list_add_tail(&p->blkdev_list, &io_schedule_list); 2.63 +} 2.64 + 2.65 +static void io_schedule(void) 2.66 +{ 2.67 + struct task_struct *p; 2.68 + struct list_head *ent; 2.69 + 2.70 + while ( (atomic_read(&nr_pending) < (MAX_PENDING_REQS / 2)) && 2.71 + !list_empty(&io_schedule_list) && 2.72 + spin_trylock(&io_schedule_lock) ) 2.73 + { 2.74 + while ( (atomic_read(&nr_pending) < MAX_PENDING_REQS) && 2.75 + !list_empty(&io_schedule_list) ) 2.76 + { 2.77 + ent = io_schedule_list.next; 2.78 + p = list_entry(ent, struct task_struct, blkdev_list); 2.79 + remove_from_blkdev_list(p); 2.80 + if ( do_block_io_op_domain(p, BATCH_PER_DOMAIN) ) 2.81 + add_to_blkdev_list_tail(p); 2.82 + } 2.83 + spin_unlock(&io_schedule_lock); 2.84 + } 2.85 +} 2.86 + 2.87 2.88 /* 2.89 * end_block_io_op: 2.90 @@ -58,7 +95,6 @@ void end_block_io_op(struct buffer_head 2.91 struct task_struct *p; 2.92 int position = 0; 2.93 blk_ring_t *blk_ring; 2.94 - int loop; 2.95 2.96 if (XEN_BLK_DEBUG) 2.97 printk(XEN_BLK_DEBUG_LEVEL "XEN end_block_io_op, bh: %lx\n", 2.98 @@ -74,7 +110,7 @@ void end_block_io_op(struct buffer_head 2.99 spin_lock_irqsave(&p->blk_ring_lock, flags); 2.100 blk_ring = p->blk_ring_base; 2.101 position = blk_ring->resp_prod; 2.102 - blk_ring->resp_ring[position].id = blk_request->request->id; 2.103 + blk_ring->resp_ring[position].id = blk_request->id; 2.104 blk_ring->resp_ring[position].status = 0; 2.105 blk_ring->resp_prod = BLK_RESP_RING_INC(blk_ring->resp_prod); 2.106 spin_unlock_irqrestore(&p->blk_ring_lock, flags); 2.107 @@ -87,27 +123,9 @@ void end_block_io_op(struct buffer_head 2.108 if ( blk_request->bh ) 2.109 kfree(blk_request->bh); 2.110 kmem_cache_free(blk_request_cachep, blk_request); 2.111 - 2.112 - /* XXX SMH: below is ugly and dangerous -- fix */ 2.113 - /* 2.114 - * now check if there is any pending work from any domain 2.115 - * that we were previously unable to process. 2.116 - */ 2.117 - for ( loop = 0; loop < XEN_BLOCK_MAX_DOMAINS; loop++ ) 2.118 - { 2.119 - int domain = pending_work & (1 << loop); 2.120 2.121 - if ( domain ) 2.122 - { 2.123 - struct task_struct *mytask = current; 2.124 - 2.125 - while ( mytask->domain != loop ) 2.126 - mytask = mytask->next_task; 2.127 - 2.128 - pending_work = pending_work & !(1 << loop); 2.129 - do_block_io_op_domain(mytask); 2.130 - } 2.131 - } 2.132 + /* Get more work to do. */ 2.133 + io_schedule(); 2.134 2.135 return; 2.136 2.137 @@ -125,21 +143,25 @@ void end_block_io_op(struct buffer_head 2.138 * Accept a block io request from a guest operating system. 2.139 * There is an entry in the hypervisor_call_table (xen/arch/i386/entry.S). 2.140 */ 2.141 +long do_block_io_op(void) 2.142 +{ 2.143 + if ( !on_blkdev_list(current) ) 2.144 + { 2.145 + spin_lock_irq(&io_schedule_lock); 2.146 + add_to_blkdev_list_tail(current); 2.147 + spin_unlock_irq(&io_schedule_lock); 2.148 + } 2.149 2.150 -long do_block_io_op (void) 2.151 -{ 2.152 - return do_block_io_op_domain(current); 2.153 + io_schedule(); 2.154 + 2.155 + return 0L; 2.156 } 2.157 2.158 2.159 -/* 2.160 - * do_block_io_op_domain: 2.161 - * Handle the requests for a particular domain 2.162 - */ 2.163 -static long do_block_io_op_domain (struct task_struct* task) 2.164 +static int do_block_io_op_domain(struct task_struct* task, int max_to_do) 2.165 { 2.166 blk_ring_t *blk_ring = task->blk_ring_base; 2.167 - int loop, status; 2.168 + int loop, status = 0; 2.169 2.170 if (XEN_BLK_DEBUG) 2.171 printk(XEN_BLK_DEBUG_LEVEL "XEN do_block_io_op %d %d\n", 2.172 @@ -151,6 +173,8 @@ static long do_block_io_op_domain (struc 2.173 { 2.174 status = 1; 2.175 2.176 + if ( max_to_do-- == 0 ) break; 2.177 + 2.178 switch (blk_ring->req_ring[loop].operation) { 2.179 2.180 case XEN_BLOCK_READ: 2.181 @@ -172,20 +196,11 @@ static long do_block_io_op_domain (struc 2.182 BUG(); 2.183 } 2.184 2.185 - 2.186 - if (status) { 2.187 - /* 2.188 - ** Unable to successfully issue / complete command, maybe because 2.189 - ** another resource (e.g. disk request buffers) is unavailable. 2.190 - ** stop removing items from the communications ring and try later 2.191 - */ 2.192 - pending_work = pending_work | (1 << task->domain); 2.193 - break; 2.194 - } 2.195 + if ( status ) break; 2.196 } 2.197 2.198 blk_ring->req_cons = loop; 2.199 - return 0L; 2.200 + return status; 2.201 } 2.202 2.203 2.204 @@ -284,7 +299,7 @@ static int dispatch_rw_block_io (int ind 2.205 } 2.206 2.207 /* save meta data about request */ 2.208 - blk_request->request = &blk_ring->req_ring[index]; 2.209 + blk_request->id = blk_ring->req_ring[index].id; 2.210 blk_request->bh = bh; 2.211 blk_request->domain = current; 2.212 2.213 @@ -304,16 +319,44 @@ static void dump_blockq(u_char key, void 2.214 } 2.215 2.216 2.217 +/* Start-of-day initialisation for a new domain. */ 2.218 +void init_blkdev_info(struct task_struct *p) 2.219 +{ 2.220 + if ( sizeof(*p->blk_ring_base) > PAGE_SIZE ) BUG(); 2.221 + p->blk_ring_base = (blk_ring_t *)get_free_page(GFP_KERNEL); 2.222 + clear_page(p->blk_ring_base); 2.223 + SHARE_PFN_WITH_DOMAIN(virt_to_page(p->blk_ring_base), p->domain); 2.224 + p->blkdev_list.next = NULL; 2.225 +} 2.226 + 2.227 + 2.228 +/* End-of-day teardown for a domain. XXX Outstanding requests? */ 2.229 +void destroy_blkdev_info(struct task_struct *p) 2.230 +{ 2.231 + unsigned long flags; 2.232 + if ( on_blkdev_list(p) ) 2.233 + { 2.234 + spin_lock_irqsave(&io_schedule_lock, flags); 2.235 + if ( on_blkdev_list(p) ) remove_from_blkdev_list(p); 2.236 + spin_unlock_irqrestore(&io_schedule_lock, flags); 2.237 + } 2.238 + UNSHARE_PFN(virt_to_page(p->blk_ring_base)); 2.239 + free_page((unsigned long)p->blk_ring_base); 2.240 +} 2.241 + 2.242 + 2.243 void initialize_block_io () 2.244 { 2.245 + atomic_set(&nr_pending, 0); 2.246 + 2.247 + spin_lock_init(&io_schedule_lock); 2.248 + INIT_LIST_HEAD(&io_schedule_list); 2.249 + 2.250 blk_request_cachep = kmem_cache_create( 2.251 "blk_request_cache", sizeof(blk_request_t), 2.252 0, SLAB_HWCACHE_ALIGN, NULL, NULL); 2.253 2.254 - add_key_handler('b', dump_blockq, "dump xen ide blkdev stats"); 2.255 - 2.256 - pending_work = 0; 2.257 - atomic_set(&nr_pending, 0); 2.258 + add_key_handler('b', dump_blockq, "dump xen ide blkdev stats"); 2.259 } 2.260 2.261
3.1 --- a/xen-2.4.16/include/xeno/blkdev.h Thu Feb 20 09:51:00 2003 +0000 3.2 +++ b/xen-2.4.16/include/xeno/blkdev.h Thu Feb 20 11:04:29 2003 +0000 3.3 @@ -6,6 +6,7 @@ 3.4 #include <asm/bitops.h> 3.5 #include <xeno/list.h> 3.6 #include <xeno/kdev_t.h> 3.7 +#include <xeno/sched.h> 3.8 3.9 /* Some defines from fs.h that may actually be useful to the blkdev layer. */ 3.10 #define READ 0 3.11 @@ -14,6 +15,9 @@ 3.12 #define BLOCK_SIZE_BITS 10 3.13 #define BLOCK_SIZE (1<<BLOCK_SIZE_BITS) 3.14 3.15 +extern void init_blkdev_info(struct task_struct *); 3.16 +extern void destroy_blkdev_info(struct task_struct *); 3.17 + 3.18 extern int unregister_blkdev(unsigned int, const char *); 3.19 extern int invalidate_device(kdev_t, int); 3.20 extern int check_disk_change(kdev_t);
4.1 --- a/xen-2.4.16/include/xeno/sched.h Thu Feb 20 09:51:00 2003 +0000 4.2 +++ b/xen-2.4.16/include/xeno/sched.h Thu Feb 20 11:04:29 2003 +0000 4.3 @@ -76,6 +76,7 @@ struct task_struct { 4.4 4.5 /* Block I/O */ 4.6 blk_ring_t *blk_ring_base; 4.7 + struct list_head blkdev_list; 4.8 spinlock_t blk_ring_lock; 4.9 4.10 int has_cpu, policy, counter;