ia64/xen-unstable

view linux-2.6.11-xen-sparse/drivers/xen/blkback/blkback.c @ 4943:1fa301443996

bitkeeper revision 1.1423 (428900e16Uxi9JzPfHD98q7NuH29dg)

Merge firebug.cl.cam.ac.uk:/local/scratch/kaf24/xen-2.0-testing.bk
into firebug.cl.cam.ac.uk:/local/scratch/kaf24/xen-unstable.bk
author kaf24@firebug.cl.cam.ac.uk
date Mon May 16 20:21:53 2005 +0000 (2005-05-16)
parents d16ae85cb89e 85e3c42fd78f
children 04626a6c01f1
line source
1 /******************************************************************************
2 * arch/xen/drivers/blkif/backend/main.c
3 *
4 * Back-end of the driver for virtual block devices. This portion of the
5 * driver exports a 'unified' block-device interface that can be accessed
6 * by any operating system that implements a compatible front end. A
7 * reference front-end implementation can be found in:
8 * arch/xen/drivers/blkif/frontend
9 *
10 * Copyright (c) 2003-2004, Keir Fraser & Steve Hand
11 * Copyright (c) 2005, Christopher Clark
12 */
14 #include "common.h"
15 #include <asm-xen/evtchn.h>
16 #ifdef CONFIG_XEN_BLKDEV_GRANT
17 #include <asm-xen/xen-public/grant_table.h>
18 #endif
20 /*
21 * These are rather arbitrary. They are fairly large because adjacent requests
22 * pulled from a communication ring are quite likely to end up being part of
23 * the same scatter/gather request at the disc.
24 *
25 * ** TRY INCREASING 'MAX_PENDING_REQS' IF WRITE SPEEDS SEEM TOO LOW **
26 * This will increase the chances of being able to write whole tracks.
27 * 64 should be enough to keep us competitive with Linux.
28 */
29 #define MAX_PENDING_REQS 64
30 #define BATCH_PER_DOMAIN 16
32 static unsigned long mmap_vstart;
33 #define MMAP_PAGES \
34 (MAX_PENDING_REQS * BLKIF_MAX_SEGMENTS_PER_REQUEST)
35 #define MMAP_VADDR(_req,_seg) \
36 (mmap_vstart + \
37 ((_req) * BLKIF_MAX_SEGMENTS_PER_REQUEST * PAGE_SIZE) + \
38 ((_seg) * PAGE_SIZE))
40 /*
41 * Each outstanding request that we've passed to the lower device layers has a
42 * 'pending_req' allocated to it. Each buffer_head that completes decrements
43 * the pendcnt towards zero. When it hits zero, the specified domain has a
44 * response queued for it, with the saved 'id' passed back.
45 */
46 typedef struct {
47 blkif_t *blkif;
48 unsigned long id;
49 int nr_pages;
50 atomic_t pendcnt;
51 unsigned short operation;
52 int status;
53 } pending_req_t;
55 /*
56 * We can't allocate pending_req's in order, since they may complete out of
57 * order. We therefore maintain an allocation ring. This ring also indicates
58 * when enough work has been passed down -- at that point the allocation ring
59 * will be empty.
60 */
61 static pending_req_t pending_reqs[MAX_PENDING_REQS];
62 static unsigned char pending_ring[MAX_PENDING_REQS];
63 static spinlock_t pend_prod_lock = SPIN_LOCK_UNLOCKED;
64 /* NB. We use a different index type to differentiate from shared blk rings. */
65 typedef unsigned int PEND_RING_IDX;
66 #define MASK_PEND_IDX(_i) ((_i)&(MAX_PENDING_REQS-1))
67 static PEND_RING_IDX pending_prod, pending_cons;
68 #define NR_PENDING_REQS (MAX_PENDING_REQS - pending_prod + pending_cons)
70 #if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,0)
71 static kmem_cache_t *buffer_head_cachep;
72 #else
73 static request_queue_t *plugged_queue;
74 static inline void flush_plugged_queue(void)
75 {
76 request_queue_t *q = plugged_queue;
77 if ( q != NULL )
78 {
79 if ( q->unplug_fn != NULL )
80 q->unplug_fn(q);
81 blk_put_queue(q);
82 plugged_queue = NULL;
83 }
84 }
85 #endif
87 #ifdef CONFIG_XEN_BLKDEV_GRANT
88 /* When using grant tables to map a frame for device access then the
89 * handle returned must be used to unmap the frame. This is needed to
90 * drop the ref count on the frame.
91 */
92 static u16 pending_grant_handles[MMAP_PAGES];
93 #define pending_handle(_idx, _i) \
94 (pending_grant_handles[((_idx) * BLKIF_MAX_SEGMENTS_PER_REQUEST) + (_i)])
95 #define BLKBACK_INVALID_HANDLE (0xFFFF)
96 #endif
98 #ifdef CONFIG_XEN_BLKDEV_TAP_BE
99 /*
100 * If the tap driver is used, we may get pages belonging to either the tap
101 * or (more likely) the real frontend. The backend must specify which domain
102 * a given page belongs to in update_va_mapping though. For the moment,
103 * the tap rewrites the ID field of the request to contain the request index
104 * and the id of the real front end domain.
105 */
106 #define BLKTAP_COOKIE 0xbeadfeed
107 static inline domid_t ID_TO_DOM(unsigned long id) { return (id >> 16); }
108 #endif
110 static int do_block_io_op(blkif_t *blkif, int max_to_do);
111 static void dispatch_probe(blkif_t *blkif, blkif_request_t *req);
112 static void dispatch_rw_block_io(blkif_t *blkif, blkif_request_t *req);
113 static void make_response(blkif_t *blkif, unsigned long id,
114 unsigned short op, int st);
116 static void fast_flush_area(int idx, int nr_pages)
117 {
118 #ifdef CONFIG_XEN_BLKDEV_GRANT
119 gnttab_op_t aop[BLKIF_MAX_SEGMENTS_PER_REQUEST];
120 unsigned int i, invcount = 0;
121 u16 handle;
123 for ( i = 0; i < nr_pages; i++ )
124 {
125 if ( BLKBACK_INVALID_HANDLE != ( handle = pending_handle(idx, i) ) )
126 {
127 aop[i].u.unmap_grant_ref.host_virt_addr = MMAP_VADDR(idx, i);
128 aop[i].u.unmap_grant_ref.dev_bus_addr = 0;
129 aop[i].u.unmap_grant_ref.handle = handle;
130 pending_handle(idx, i) = BLKBACK_INVALID_HANDLE;
131 invcount++;
132 }
133 }
134 if ( unlikely(HYPERVISOR_grant_table_op(
135 GNTTABOP_unmap_grant_ref, aop, invcount)))
136 BUG();
137 #else
139 multicall_entry_t mcl[BLKIF_MAX_SEGMENTS_PER_REQUEST];
140 int i;
142 for ( i = 0; i < nr_pages; i++ )
143 {
144 mcl[i].op = __HYPERVISOR_update_va_mapping;
145 mcl[i].args[0] = MMAP_VADDR(idx, i);
146 mcl[i].args[1] = 0;
147 mcl[i].args[2] = 0;
148 }
150 mcl[nr_pages-1].args[2] = UVMF_TLB_FLUSH|UVMF_ALL;
151 if ( unlikely(HYPERVISOR_multicall(mcl, nr_pages) != 0) )
152 BUG();
153 #endif
154 }
157 /******************************************************************
158 * BLOCK-DEVICE SCHEDULER LIST MAINTENANCE
159 */
161 static struct list_head blkio_schedule_list;
162 static spinlock_t blkio_schedule_list_lock;
164 static int __on_blkdev_list(blkif_t *blkif)
165 {
166 return blkif->blkdev_list.next != NULL;
167 }
169 static void remove_from_blkdev_list(blkif_t *blkif)
170 {
171 unsigned long flags;
172 if ( !__on_blkdev_list(blkif) ) return;
173 spin_lock_irqsave(&blkio_schedule_list_lock, flags);
174 if ( __on_blkdev_list(blkif) )
175 {
176 list_del(&blkif->blkdev_list);
177 blkif->blkdev_list.next = NULL;
178 blkif_put(blkif);
179 }
180 spin_unlock_irqrestore(&blkio_schedule_list_lock, flags);
181 }
183 static void add_to_blkdev_list_tail(blkif_t *blkif)
184 {
185 unsigned long flags;
186 if ( __on_blkdev_list(blkif) ) return;
187 spin_lock_irqsave(&blkio_schedule_list_lock, flags);
188 if ( !__on_blkdev_list(blkif) && (blkif->status == CONNECTED) )
189 {
190 list_add_tail(&blkif->blkdev_list, &blkio_schedule_list);
191 blkif_get(blkif);
192 }
193 spin_unlock_irqrestore(&blkio_schedule_list_lock, flags);
194 }
197 /******************************************************************
198 * SCHEDULER FUNCTIONS
199 */
201 static DECLARE_WAIT_QUEUE_HEAD(blkio_schedule_wait);
203 static int blkio_schedule(void *arg)
204 {
205 DECLARE_WAITQUEUE(wq, current);
207 blkif_t *blkif;
208 struct list_head *ent;
210 daemonize(
211 #if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,0)
212 "xenblkd"
213 #endif
214 );
216 for ( ; ; )
217 {
218 /* Wait for work to do. */
219 add_wait_queue(&blkio_schedule_wait, &wq);
220 set_current_state(TASK_INTERRUPTIBLE);
221 if ( (NR_PENDING_REQS == MAX_PENDING_REQS) ||
222 list_empty(&blkio_schedule_list) )
223 schedule();
224 __set_current_state(TASK_RUNNING);
225 remove_wait_queue(&blkio_schedule_wait, &wq);
227 /* Queue up a batch of requests. */
228 while ( (NR_PENDING_REQS < MAX_PENDING_REQS) &&
229 !list_empty(&blkio_schedule_list) )
230 {
231 ent = blkio_schedule_list.next;
232 blkif = list_entry(ent, blkif_t, blkdev_list);
233 blkif_get(blkif);
234 remove_from_blkdev_list(blkif);
235 if ( do_block_io_op(blkif, BATCH_PER_DOMAIN) )
236 add_to_blkdev_list_tail(blkif);
237 blkif_put(blkif);
238 }
240 /* Push the batch through to disc. */
241 #if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,0)
242 run_task_queue(&tq_disk);
243 #else
244 flush_plugged_queue();
245 #endif
246 }
247 }
249 static void maybe_trigger_blkio_schedule(void)
250 {
251 /*
252 * Needed so that two processes, who together make the following predicate
253 * true, don't both read stale values and evaluate the predicate
254 * incorrectly. Incredibly unlikely to stall the scheduler on x86, but...
255 */
256 smp_mb();
258 if ( (NR_PENDING_REQS < (MAX_PENDING_REQS/2)) &&
259 !list_empty(&blkio_schedule_list) )
260 wake_up(&blkio_schedule_wait);
261 }
265 /******************************************************************
266 * COMPLETION CALLBACK -- Called as bh->b_end_io()
267 */
269 static void __end_block_io_op(pending_req_t *pending_req, int uptodate)
270 {
271 unsigned long flags;
273 /* An error fails the entire request. */
274 if ( !uptodate )
275 {
276 DPRINTK("Buffer not up-to-date at end of operation\n");
277 pending_req->status = BLKIF_RSP_ERROR;
278 }
280 if ( atomic_dec_and_test(&pending_req->pendcnt) )
281 {
282 int pending_idx = pending_req - pending_reqs;
283 fast_flush_area(pending_idx, pending_req->nr_pages);
284 make_response(pending_req->blkif, pending_req->id,
285 pending_req->operation, pending_req->status);
286 blkif_put(pending_req->blkif);
287 spin_lock_irqsave(&pend_prod_lock, flags);
288 pending_ring[MASK_PEND_IDX(pending_prod++)] = pending_idx;
289 spin_unlock_irqrestore(&pend_prod_lock, flags);
290 maybe_trigger_blkio_schedule();
291 }
292 }
294 #if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,0)
295 static void end_block_io_op(struct buffer_head *bh, int uptodate)
296 {
297 __end_block_io_op(bh->b_private, uptodate);
298 kmem_cache_free(buffer_head_cachep, bh);
299 }
300 #else
301 static int end_block_io_op(struct bio *bio, unsigned int done, int error)
302 {
303 if ( bio->bi_size != 0 )
304 return 1;
305 __end_block_io_op(bio->bi_private, !error);
306 bio_put(bio);
307 return error;
308 }
309 #endif
312 /******************************************************************************
313 * NOTIFICATION FROM GUEST OS.
314 */
316 irqreturn_t blkif_be_int(int irq, void *dev_id, struct pt_regs *regs)
317 {
318 blkif_t *blkif = dev_id;
319 add_to_blkdev_list_tail(blkif);
320 maybe_trigger_blkio_schedule();
321 return IRQ_HANDLED;
322 }
326 /******************************************************************
327 * DOWNWARD CALLS -- These interface with the block-device layer proper.
328 */
330 static int do_block_io_op(blkif_t *blkif, int max_to_do)
331 {
332 blkif_back_ring_t *blk_ring = &blkif->blk_ring;
333 blkif_request_t *req;
334 RING_IDX i, rp;
335 int more_to_do = 0;
337 rp = blk_ring->sring->req_prod;
338 rmb(); /* Ensure we see queued requests up to 'rp'. */
340 for ( i = blk_ring->req_cons;
341 (i != rp) && !RING_REQUEST_CONS_OVERFLOW(blk_ring, i);
342 i++ )
343 {
344 if ( (max_to_do-- == 0) || (NR_PENDING_REQS == MAX_PENDING_REQS) )
345 {
346 more_to_do = 1;
347 break;
348 }
350 req = RING_GET_REQUEST(blk_ring, i);
351 switch ( req->operation )
352 {
353 case BLKIF_OP_READ:
354 case BLKIF_OP_WRITE:
355 dispatch_rw_block_io(blkif, req);
356 break;
358 case BLKIF_OP_PROBE:
359 dispatch_probe(blkif, req);
360 break;
362 default:
363 DPRINTK("error: unknown block io operation [%d]\n",
364 req->operation);
365 make_response(blkif, req->id, req->operation, BLKIF_RSP_ERROR);
366 break;
367 }
368 }
370 blk_ring->req_cons = i;
371 return more_to_do;
372 }
374 static void dispatch_probe(blkif_t *blkif, blkif_request_t *req)
375 {
376 int rsp = BLKIF_RSP_ERROR;
377 int pending_idx = pending_ring[MASK_PEND_IDX(pending_cons)];
379 /* We expect one buffer only. */
380 if ( unlikely(req->nr_segments != 1) )
381 goto out;
383 /* Make sure the buffer is page-sized. */
384 if ( (blkif_first_sect(req->frame_and_sects[0]) != 0) ||
385 (blkif_last_sect(req->frame_and_sects[0]) != 7) )
386 goto out;
388 #ifdef CONFIG_XEN_BLKDEV_GRANT
389 {
390 gnttab_op_t op;
392 op.u.map_grant_ref.host_virt_addr = MMAP_VADDR(pending_idx, 0);
393 op.u.map_grant_ref.flags = GNTMAP_host_map;
394 op.u.map_grant_ref.ref = blkif_gref_from_fas(req->frame_and_sects[0]);
395 op.u.map_grant_ref.dom = blkif->domid;
397 if ( unlikely(HYPERVISOR_grant_table_op(
398 GNTTABOP_map_grant_ref, &op, 1)))
399 BUG();
401 if ( op.u.map_grant_ref.handle < 0 )
402 goto out;
404 pending_handle(pending_idx, 0) = op.u.map_grant_ref.handle;
405 }
406 #else /* else CONFIG_XEN_BLKDEV_GRANT */
408 #ifdef CONFIG_XEN_BLKDEV_TAP_BE
409 /* Grab the real frontend out of the probe message. */
410 if (req->frame_and_sects[1] == BLKTAP_COOKIE)
411 blkif->is_blktap = 1;
412 #endif
415 #ifdef CONFIG_XEN_BLKDEV_TAP_BE
416 if ( HYPERVISOR_update_va_mapping_otherdomain(
417 MMAP_VADDR(pending_idx, 0),
418 (pte_t) { (req->frame_and_sects[0] & PAGE_MASK) | __PAGE_KERNEL },
419 0, (blkif->is_blktap ? ID_TO_DOM(req->id) : blkif->domid) ) )
421 goto out;
422 #else
423 if ( HYPERVISOR_update_va_mapping_otherdomain(
424 MMAP_VADDR(pending_idx, 0),
425 (pte_t) { (req->frame_and_sects[0] & PAGE_MASK) | __PAGE_KERNEL },
426 0, blkif->domid) )
428 goto out;
429 #endif
430 #endif /* endif CONFIG_XEN_BLKDEV_GRANT */
432 rsp = vbd_probe(blkif, (vdisk_t *)MMAP_VADDR(pending_idx, 0),
433 PAGE_SIZE / sizeof(vdisk_t));
435 out:
436 fast_flush_area(pending_idx, 1);
437 make_response(blkif, req->id, req->operation, rsp);
438 }
440 static void dispatch_rw_block_io(blkif_t *blkif, blkif_request_t *req)
441 {
442 extern void ll_rw_block(int rw, int nr, struct buffer_head * bhs[]);
443 int operation = (req->operation == BLKIF_OP_WRITE) ? WRITE : READ;
444 unsigned long fas = 0;
445 int i, pending_idx = pending_ring[MASK_PEND_IDX(pending_cons)];
446 pending_req_t *pending_req;
447 #ifdef CONFIG_XEN_BLKDEV_GRANT
448 gnttab_op_t aop[BLKIF_MAX_SEGMENTS_PER_REQUEST];
449 #else
450 unsigned long remap_prot;
451 multicall_entry_t mcl[BLKIF_MAX_SEGMENTS_PER_REQUEST];
452 #endif
453 struct phys_req preq;
454 struct {
455 unsigned long buf; unsigned int nsec;
456 } seg[BLKIF_MAX_SEGMENTS_PER_REQUEST];
457 unsigned int nseg;
458 #if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,0)
459 struct buffer_head *bh;
460 #else
461 struct bio *bio = NULL, *biolist[BLKIF_MAX_SEGMENTS_PER_REQUEST];
462 int nbio = 0;
463 request_queue_t *q;
464 #endif
466 /* Check that number of segments is sane. */
467 nseg = req->nr_segments;
468 if ( unlikely(nseg == 0) ||
469 unlikely(nseg > BLKIF_MAX_SEGMENTS_PER_REQUEST) )
470 {
471 DPRINTK("Bad number of segments in request (%d)\n", nseg);
472 goto bad_descriptor;
473 }
475 preq.dev = req->device;
476 preq.sector_number = req->sector_number;
477 preq.nr_sects = 0;
479 #ifdef CONFIG_XEN_BLKDEV_GRANT
480 for ( i = 0; i < nseg; i++ )
481 {
482 fas = req->frame_and_sects[i];
483 seg[i].nsec = blkif_last_sect(fas) - blkif_first_sect(fas) + 1;
485 if ( seg[i].nsec <= 0 )
486 goto bad_descriptor;
487 preq.nr_sects += seg[i].nsec;
489 aop[i].u.map_grant_ref.host_virt_addr = MMAP_VADDR(pending_idx, i);
490 aop[i].u.map_grant_ref.dom = blkif->domid;
491 aop[i].u.map_grant_ref.ref = blkif_gref_from_fas(fas);
492 aop[i].u.map_grant_ref.flags = GNTMAP_host_map;
493 if ( operation == WRITE )
494 aop[i].u.map_grant_ref.flags |= GNTMAP_readonly;
495 }
497 if ( unlikely(HYPERVISOR_grant_table_op(
498 GNTTABOP_map_grant_ref, aop, nseg)))
499 BUG();
501 for ( i = 0; i < nseg; i++ )
502 {
503 if ( unlikely(aop[i].u.map_grant_ref.handle < 0) )
504 {
505 DPRINTK("invalid buffer -- could not remap it\n");
506 fast_flush_area(pending_idx, nseg);
507 goto bad_descriptor;
508 }
510 phys_to_machine_mapping[__pa(MMAP_VADDR(pending_idx, i))>>PAGE_SHIFT] =
511 FOREIGN_FRAME(aop[i].u.map_grant_ref.dev_bus_addr);
513 pending_handle(pending_idx, i) = aop[i].u.map_grant_ref.handle;
514 }
515 #endif
517 for ( i = 0; i < nseg; i++ )
518 {
519 fas = req->frame_and_sects[i];
520 #ifdef CONFIG_XEN_BLKDEV_GRANT
521 seg[i].buf = (aop[i].u.map_grant_ref.dev_bus_addr << PAGE_SHIFT) |
522 (blkif_first_sect(fas) << 9);
523 #else
524 seg[i].buf = (fas & PAGE_MASK) | (blkif_first_sect(fas) << 9);
525 seg[i].nsec = blkif_last_sect(fas) - blkif_first_sect(fas) + 1;
526 if ( seg[i].nsec <= 0 )
527 goto bad_descriptor;
528 preq.nr_sects += seg[i].nsec;
529 #endif
530 }
532 if ( vbd_translate(&preq, blkif, operation) != 0 )
533 {
534 DPRINTK("access denied: %s of [%llu,%llu] on dev=%04x\n",
535 operation == READ ? "read" : "write", preq.sector_number,
536 preq.sector_number + preq.nr_sects, preq.dev);
537 goto bad_descriptor;
538 }
540 #ifndef CONFIG_XEN_BLKDEV_GRANT
541 if ( operation == READ )
542 remap_prot = _PAGE_PRESENT|_PAGE_DIRTY|_PAGE_ACCESSED|_PAGE_RW;
543 else
544 remap_prot = _PAGE_PRESENT|_PAGE_DIRTY|_PAGE_ACCESSED;
546 for ( i = 0; i < nseg; i++ )
547 {
548 mcl[i].op = __HYPERVISOR_update_va_mapping_otherdomain;
549 mcl[i].args[0] = MMAP_VADDR(pending_idx, i);
550 mcl[i].args[1] = (seg[i].buf & PAGE_MASK) | remap_prot;
551 mcl[i].args[2] = 0;
552 mcl[i].args[3] = blkif->domid;
553 #ifdef CONFIG_XEN_BLKDEV_TAP_BE
554 if ( blkif->is_blktap )
555 mcl[i].args[3] = ID_TO_DOM(req->id);
556 #endif
557 phys_to_machine_mapping[__pa(MMAP_VADDR(pending_idx, i))>>PAGE_SHIFT] =
558 FOREIGN_FRAME(seg[i].buf >> PAGE_SHIFT);
559 }
561 BUG_ON(HYPERVISOR_multicall(mcl, nseg) != 0);
563 for ( i = 0; i < nseg; i++ )
564 {
565 if ( unlikely(mcl[i].args[5] != 0) )
566 {
567 DPRINTK("invalid buffer -- could not remap it\n");
568 fast_flush_area(pending_idx, nseg);
569 goto bad_descriptor;
570 }
571 }
572 #endif /* end ifndef CONFIG_XEN_BLKDEV_GRANT */
574 pending_req = &pending_reqs[pending_idx];
575 pending_req->blkif = blkif;
576 pending_req->id = req->id;
577 pending_req->operation = operation;
578 pending_req->status = BLKIF_RSP_OKAY;
579 pending_req->nr_pages = nseg;
581 #if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,0)
583 atomic_set(&pending_req->pendcnt, nseg);
584 pending_cons++;
585 blkif_get(blkif);
587 for ( i = 0; i < nseg; i++ )
588 {
589 bh = kmem_cache_alloc(buffer_head_cachep, GFP_KERNEL);
590 if ( unlikely(bh == NULL) )
591 {
592 __end_block_io_op(pending_req, 0);
593 continue;
594 }
596 memset(bh, 0, sizeof (struct buffer_head));
598 init_waitqueue_head(&bh->b_wait);
599 bh->b_size = seg[i].nsec << 9;
600 bh->b_dev = preq.dev;
601 bh->b_rdev = preq.dev;
602 bh->b_rsector = (unsigned long)preq.sector_number;
603 bh->b_data = (char *)MMAP_VADDR(pending_idx, i) +
604 (seg[i].buf & ~PAGE_MASK);
605 bh->b_page = virt_to_page(MMAP_VADDR(pending_idx, i));
606 bh->b_end_io = end_block_io_op;
607 bh->b_private = pending_req;
609 bh->b_state = (1 << BH_Mapped) | (1 << BH_Lock) |
610 (1 << BH_Req) | (1 << BH_Launder);
611 if ( operation == WRITE )
612 bh->b_state |= (1 << BH_JBD) | (1 << BH_Req) | (1 << BH_Uptodate);
614 atomic_set(&bh->b_count, 1);
616 /* Dispatch a single request. We'll flush it to disc later. */
617 generic_make_request(operation, bh);
619 preq.sector_number += seg[i].nsec;
620 }
622 #else
624 for ( i = 0; i < nseg; i++ )
625 {
626 while ( (bio == NULL) ||
627 (bio_add_page(bio,
628 virt_to_page(MMAP_VADDR(pending_idx, i)),
629 seg[i].nsec << 9,
630 seg[i].buf & ~PAGE_MASK) == 0) )
631 {
632 bio = biolist[nbio++] = bio_alloc(GFP_KERNEL, nseg-i);
633 if ( unlikely(bio == NULL) )
634 {
635 for ( i = 0; i < (nbio-1); i++ )
636 bio_put(biolist[i]);
637 fast_flush_area(pending_idx, nseg);
638 goto bad_descriptor;
639 }
641 bio->bi_bdev = preq.bdev;
642 bio->bi_private = pending_req;
643 bio->bi_end_io = end_block_io_op;
644 bio->bi_sector = preq.sector_number;
645 }
647 preq.sector_number += seg[i].nsec;
648 }
650 if ( (q = bdev_get_queue(bio->bi_bdev)) != plugged_queue )
651 {
652 flush_plugged_queue();
653 blk_get_queue(q);
654 plugged_queue = q;
655 }
657 atomic_set(&pending_req->pendcnt, nbio);
658 pending_cons++;
659 blkif_get(blkif);
661 for ( i = 0; i < nbio; i++ )
662 submit_bio(operation, biolist[i]);
664 #endif
666 return;
668 bad_descriptor:
669 make_response(blkif, req->id, req->operation, BLKIF_RSP_ERROR);
670 }
674 /******************************************************************
675 * MISCELLANEOUS SETUP / TEARDOWN / DEBUGGING
676 */
679 static void make_response(blkif_t *blkif, unsigned long id,
680 unsigned short op, int st)
681 {
682 blkif_response_t *resp;
683 unsigned long flags;
684 blkif_back_ring_t *blk_ring = &blkif->blk_ring;
686 /* Place on the response ring for the relevant domain. */
687 spin_lock_irqsave(&blkif->blk_ring_lock, flags);
688 resp = RING_GET_RESPONSE(blk_ring, blk_ring->rsp_prod_pvt);
689 resp->id = id;
690 resp->operation = op;
691 resp->status = st;
692 wmb(); /* Ensure other side can see the response fields. */
693 blk_ring->rsp_prod_pvt++;
694 RING_PUSH_RESPONSES(blk_ring);
695 spin_unlock_irqrestore(&blkif->blk_ring_lock, flags);
697 /* Kick the relevant domain. */
698 notify_via_evtchn(blkif->evtchn);
699 }
701 void blkif_deschedule(blkif_t *blkif)
702 {
703 remove_from_blkdev_list(blkif);
704 }
706 static int __init blkif_init(void)
707 {
708 int i;
710 if ( !(xen_start_info.flags & SIF_INITDOMAIN) &&
711 !(xen_start_info.flags & SIF_BLK_BE_DOMAIN) )
712 return 0;
714 blkif_interface_init();
716 if ( (mmap_vstart = allocate_empty_lowmem_region(MMAP_PAGES)) == 0 )
717 BUG();
719 pending_cons = 0;
720 pending_prod = MAX_PENDING_REQS;
721 memset(pending_reqs, 0, sizeof(pending_reqs));
722 for ( i = 0; i < MAX_PENDING_REQS; i++ )
723 pending_ring[i] = i;
725 spin_lock_init(&blkio_schedule_list_lock);
726 INIT_LIST_HEAD(&blkio_schedule_list);
728 if ( kernel_thread(blkio_schedule, 0, CLONE_FS | CLONE_FILES) < 0 )
729 BUG();
731 #if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,0)
732 buffer_head_cachep = kmem_cache_create(
733 "buffer_head_cache", sizeof(struct buffer_head),
734 0, SLAB_HWCACHE_ALIGN, NULL, NULL);
735 #endif
737 blkif_ctrlif_init();
739 #ifdef CONFIG_XEN_BLKDEV_GRANT
740 memset( pending_grant_handles, BLKBACK_INVALID_HANDLE, MMAP_PAGES );
741 printk(KERN_ALERT "Blkif backend is using grant tables.\n");
742 #endif
744 #ifdef CONFIG_XEN_BLKDEV_TAP_BE
745 printk(KERN_ALERT "NOTE: Blkif backend is running with tap support on!\n");
746 #endif
748 return 0;
749 }
751 __initcall(blkif_init);