ia64/xen-unstable

view linux-2.6-xen-sparse/drivers/xen/blkback/blkback.c @ 8623:6ce7c026320e

return -ENOMEM value instead of -1 when running out of memory.

Signed-off-by: Vincent Hanquez <vincent@xensource.com>
author vhanquez@kneesa.uk.xensource.com
date Mon Jan 16 23:54:24 2006 +0000 (2006-01-16)
parents 994e8ee5179d
children fd9b2c1bb577
line source
1 /******************************************************************************
2 * arch/xen/drivers/blkif/backend/main.c
3 *
4 * Back-end of the driver for virtual block devices. This portion of the
5 * driver exports a 'unified' block-device interface that can be accessed
6 * by any operating system that implements a compatible front end. A
7 * reference front-end implementation can be found in:
8 * arch/xen/drivers/blkif/frontend
9 *
10 * Copyright (c) 2003-2004, Keir Fraser & Steve Hand
11 * Copyright (c) 2005, Christopher Clark
12 */
14 #include <linux/spinlock.h>
15 #include <linux/kthread.h>
16 #include <linux/list.h>
17 #include <asm-xen/balloon.h>
18 #include <asm/hypervisor.h>
19 #include "common.h"
21 /*
22 * These are rather arbitrary. They are fairly large because adjacent requests
23 * pulled from a communication ring are quite likely to end up being part of
24 * the same scatter/gather request at the disc.
25 *
26 * ** TRY INCREASING 'blkif_reqs' IF WRITE SPEEDS SEEM TOO LOW **
27 *
28 * This will increase the chances of being able to write whole tracks.
29 * 64 should be enough to keep us competitive with Linux.
30 */
31 static int blkif_reqs = 64;
32 static int mmap_pages;
34 static int __init set_blkif_reqs(char *str)
35 {
36 get_option(&str, &blkif_reqs);
37 return 1;
38 }
39 __setup("blkif_reqs=", set_blkif_reqs);
41 /* Run-time switchable: /sys/module/blkback/parameters/ */
42 static unsigned int log_stats = 0;
43 static unsigned int debug_lvl = 0;
44 module_param(log_stats, int, 0644);
45 module_param(debug_lvl, int, 0644);
47 /*
48 * Each outstanding request that we've passed to the lower device layers has a
49 * 'pending_req' allocated to it. Each buffer_head that completes decrements
50 * the pendcnt towards zero. When it hits zero, the specified domain has a
51 * response queued for it, with the saved 'id' passed back.
52 */
53 typedef struct {
54 blkif_t *blkif;
55 unsigned long id;
56 int nr_pages;
57 atomic_t pendcnt;
58 unsigned short operation;
59 int status;
60 struct list_head free_list;
61 } pending_req_t;
63 static pending_req_t *pending_reqs;
64 static struct list_head pending_free;
65 static spinlock_t pending_free_lock = SPIN_LOCK_UNLOCKED;
66 static DECLARE_WAIT_QUEUE_HEAD(pending_free_wq);
68 #define BLKBACK_INVALID_HANDLE (~0)
70 static unsigned long mmap_vstart;
71 static unsigned long *pending_vaddrs;
72 static grant_handle_t *pending_grant_handles;
74 static inline int vaddr_pagenr(pending_req_t *req, int seg)
75 {
76 return (req - pending_reqs) * BLKIF_MAX_SEGMENTS_PER_REQUEST + seg;
77 }
79 static inline unsigned long vaddr(pending_req_t *req, int seg)
80 {
81 return pending_vaddrs[vaddr_pagenr(req, seg)];
82 }
84 #define pending_handle(_req, _seg) \
85 (pending_grant_handles[vaddr_pagenr(_req, _seg)])
88 #ifdef CONFIG_XEN_BLKDEV_TAP_BE
89 /*
90 * If the tap driver is used, we may get pages belonging to either the tap
91 * or (more likely) the real frontend. The backend must specify which domain
92 * a given page belongs to in update_va_mapping though. For the moment,
93 * the tap rewrites the ID field of the request to contain the request index
94 * and the id of the real front end domain.
95 */
96 #define BLKTAP_COOKIE 0xbeadfeed
97 static inline domid_t ID_TO_DOM(unsigned long id) { return (id >> 16); }
98 #endif
100 static int do_block_io_op(blkif_t *blkif);
101 static void dispatch_rw_block_io(blkif_t *blkif,
102 blkif_request_t *req,
103 pending_req_t *pending_req);
104 static void make_response(blkif_t *blkif, unsigned long id,
105 unsigned short op, int st);
107 /******************************************************************
108 * misc small helpers
109 */
110 static pending_req_t* alloc_req(void)
111 {
112 pending_req_t *req = NULL;
113 unsigned long flags;
115 spin_lock_irqsave(&pending_free_lock, flags);
116 if (!list_empty(&pending_free)) {
117 req = list_entry(pending_free.next, pending_req_t, free_list);
118 list_del(&req->free_list);
119 }
120 spin_unlock_irqrestore(&pending_free_lock, flags);
121 return req;
122 }
124 static void free_req(pending_req_t *req)
125 {
126 unsigned long flags;
127 int was_empty;
129 spin_lock_irqsave(&pending_free_lock, flags);
130 was_empty = list_empty(&pending_free);
131 list_add(&req->free_list, &pending_free);
132 spin_unlock_irqrestore(&pending_free_lock, flags);
133 if (was_empty)
134 wake_up(&pending_free_wq);
135 }
137 static void unplug_queue(blkif_t *blkif)
138 {
139 if (blkif->plug == NULL)
140 return;
141 if (blkif->plug->unplug_fn)
142 blkif->plug->unplug_fn(blkif->plug);
143 blk_put_queue(blkif->plug);
144 blkif->plug = NULL;
145 }
147 static void plug_queue(blkif_t *blkif, struct bio *bio)
148 {
149 request_queue_t *q = bdev_get_queue(bio->bi_bdev);
151 if (q == blkif->plug)
152 return;
153 unplug_queue(blkif);
154 blk_get_queue(q);
155 blkif->plug = q;
156 }
158 static void fast_flush_area(pending_req_t *req)
159 {
160 struct gnttab_unmap_grant_ref unmap[BLKIF_MAX_SEGMENTS_PER_REQUEST];
161 unsigned int i, invcount = 0;
162 grant_handle_t handle;
163 int ret;
165 for (i = 0; i < req->nr_pages; i++) {
166 handle = pending_handle(req, i);
167 if (handle == BLKBACK_INVALID_HANDLE)
168 continue;
169 unmap[invcount].host_addr = vaddr(req, i);
170 unmap[invcount].dev_bus_addr = 0;
171 unmap[invcount].handle = handle;
172 pending_handle(req, i) = BLKBACK_INVALID_HANDLE;
173 invcount++;
174 }
176 ret = HYPERVISOR_grant_table_op(
177 GNTTABOP_unmap_grant_ref, unmap, invcount);
178 BUG_ON(ret);
179 }
181 /******************************************************************
182 * SCHEDULER FUNCTIONS
183 */
185 static void print_stats(blkif_t *blkif)
186 {
187 printk(KERN_DEBUG "%s: oo %3d | rd %4d | wr %4d\n",
188 current->comm, blkif->st_oo_req,
189 blkif->st_rd_req, blkif->st_wr_req);
190 blkif->st_print = jiffies + msecs_to_jiffies(10 * 1000);
191 blkif->st_rd_req = 0;
192 blkif->st_wr_req = 0;
193 blkif->st_oo_req = 0;
194 }
196 int blkif_schedule(void *arg)
197 {
198 blkif_t *blkif = arg;
200 blkif_get(blkif);
201 if (debug_lvl)
202 printk(KERN_DEBUG "%s: started\n", current->comm);
203 for (;;) {
204 if (kthread_should_stop()) {
205 /* asked to quit? */
206 if (!atomic_read(&blkif->io_pending))
207 break;
208 if (debug_lvl)
209 printk(KERN_DEBUG "%s: I/O pending, "
210 "delaying exit\n", current->comm);
211 }
213 if (!atomic_read(&blkif->io_pending)) {
214 /* Wait for work to do. */
215 wait_event_interruptible(
216 blkif->wq,
217 (atomic_read(&blkif->io_pending) ||
218 kthread_should_stop()));
219 } else if (list_empty(&pending_free)) {
220 /* Wait for pending_req becoming available. */
221 wait_event_interruptible(
222 pending_free_wq,
223 !list_empty(&pending_free));
224 }
226 if (blkif->status != CONNECTED) {
227 /* make sure we are connected */
228 if (debug_lvl)
229 printk(KERN_DEBUG "%s: not connected "
230 "(%d pending)\n",
231 current->comm,
232 atomic_read(&blkif->io_pending));
233 wait_event_interruptible(
234 blkif->wq,
235 (blkif->status == CONNECTED ||
236 kthread_should_stop()));
237 continue;
238 }
240 /* Schedule I/O */
241 atomic_set(&blkif->io_pending, 0);
242 if (do_block_io_op(blkif))
243 atomic_inc(&blkif->io_pending);
244 unplug_queue(blkif);
246 if (log_stats && time_after(jiffies, blkif->st_print))
247 print_stats(blkif);
248 }
250 if (log_stats)
251 print_stats(blkif);
252 if (debug_lvl)
253 printk(KERN_DEBUG "%s: exiting\n", current->comm);
254 blkif->xenblkd = NULL;
255 blkif_put(blkif);
256 return 0;
257 }
259 /******************************************************************
260 * COMPLETION CALLBACK -- Called as bh->b_end_io()
261 */
263 static void __end_block_io_op(pending_req_t *pending_req, int uptodate)
264 {
265 /* An error fails the entire request. */
266 if (!uptodate) {
267 DPRINTK("Buffer not up-to-date at end of operation\n");
268 pending_req->status = BLKIF_RSP_ERROR;
269 }
271 if (atomic_dec_and_test(&pending_req->pendcnt)) {
272 fast_flush_area(pending_req);
273 make_response(pending_req->blkif, pending_req->id,
274 pending_req->operation, pending_req->status);
275 blkif_put(pending_req->blkif);
276 free_req(pending_req);
277 }
278 }
280 static int end_block_io_op(struct bio *bio, unsigned int done, int error)
281 {
282 if (bio->bi_size != 0)
283 return 1;
284 __end_block_io_op(bio->bi_private, !error);
285 bio_put(bio);
286 return error;
287 }
290 /******************************************************************************
291 * NOTIFICATION FROM GUEST OS.
292 */
294 irqreturn_t blkif_be_int(int irq, void *dev_id, struct pt_regs *regs)
295 {
296 blkif_t *blkif = dev_id;
298 atomic_inc(&blkif->io_pending);
299 wake_up(&blkif->wq);
300 return IRQ_HANDLED;
301 }
305 /******************************************************************
306 * DOWNWARD CALLS -- These interface with the block-device layer proper.
307 */
309 static int do_block_io_op(blkif_t *blkif)
310 {
311 blkif_back_ring_t *blk_ring = &blkif->blk_ring;
312 blkif_request_t *req;
313 pending_req_t *pending_req;
314 RING_IDX rc, rp;
315 int more_to_do = 0;
317 rc = blk_ring->req_cons;
318 rp = blk_ring->sring->req_prod;
319 rmb(); /* Ensure we see queued requests up to 'rp'. */
321 while ((rc != rp) && !RING_REQUEST_CONS_OVERFLOW(blk_ring, rc)) {
323 pending_req = alloc_req();
324 if (NULL == pending_req) {
325 blkif->st_oo_req++;
326 more_to_do = 1;
327 break;
328 }
330 req = RING_GET_REQUEST(blk_ring, rc);
331 blk_ring->req_cons = ++rc; /* before make_response() */
333 switch (req->operation) {
334 case BLKIF_OP_READ:
335 blkif->st_rd_req++;
336 dispatch_rw_block_io(blkif, req, pending_req);
337 break;
338 case BLKIF_OP_WRITE:
339 blkif->st_wr_req++;
340 dispatch_rw_block_io(blkif, req, pending_req);
341 break;
342 default:
343 DPRINTK("error: unknown block io operation [%d]\n",
344 req->operation);
345 make_response(blkif, req->id, req->operation,
346 BLKIF_RSP_ERROR);
347 free_req(pending_req);
348 break;
349 }
350 }
351 return more_to_do;
352 }
354 static void dispatch_rw_block_io(blkif_t *blkif,
355 blkif_request_t *req,
356 pending_req_t *pending_req)
357 {
358 extern void ll_rw_block(int rw, int nr, struct buffer_head * bhs[]);
359 int operation = (req->operation == BLKIF_OP_WRITE) ? WRITE : READ;
360 struct gnttab_map_grant_ref map[BLKIF_MAX_SEGMENTS_PER_REQUEST];
361 struct phys_req preq;
362 struct {
363 unsigned long buf; unsigned int nsec;
364 } seg[BLKIF_MAX_SEGMENTS_PER_REQUEST];
365 unsigned int nseg;
366 struct bio *bio = NULL, *biolist[BLKIF_MAX_SEGMENTS_PER_REQUEST];
367 int ret, i, nbio = 0;
369 /* Check that number of segments is sane. */
370 nseg = req->nr_segments;
371 if (unlikely(nseg == 0) ||
372 unlikely(nseg > BLKIF_MAX_SEGMENTS_PER_REQUEST)) {
373 DPRINTK("Bad number of segments in request (%d)\n", nseg);
374 goto fail_response;
375 }
377 preq.dev = req->handle;
378 preq.sector_number = req->sector_number;
379 preq.nr_sects = 0;
381 pending_req->blkif = blkif;
382 pending_req->id = req->id;
383 pending_req->operation = operation;
384 pending_req->status = BLKIF_RSP_OKAY;
385 pending_req->nr_pages = nseg;
387 for (i = 0; i < nseg; i++) {
388 seg[i].nsec = req->seg[i].last_sect -
389 req->seg[i].first_sect + 1;
391 if ((req->seg[i].last_sect >= (PAGE_SIZE >> 9)) ||
392 (seg[i].nsec <= 0))
393 goto fail_response;
394 preq.nr_sects += seg[i].nsec;
396 map[i].host_addr = vaddr(pending_req, i);
397 map[i].dom = blkif->domid;
398 map[i].ref = req->seg[i].gref;
399 map[i].flags = GNTMAP_host_map;
400 if ( operation == WRITE )
401 map[i].flags |= GNTMAP_readonly;
402 }
404 ret = HYPERVISOR_grant_table_op(GNTTABOP_map_grant_ref, map, nseg);
405 BUG_ON(ret);
407 for (i = 0; i < nseg; i++) {
408 if (unlikely(map[i].status != 0)) {
409 DPRINTK("invalid buffer -- could not remap it\n");
410 goto fail_flush;
411 }
413 pending_handle(pending_req, i) = map[i].handle;
414 #ifdef __ia64__
415 pending_vaddrs[vaddr_pagenr(pending_req, i)] =
416 (unsigned long)gnttab_map_vaddr(map[i]);
417 #else
418 set_phys_to_machine(__pa(vaddr(
419 pending_req, i)) >> PAGE_SHIFT,
420 FOREIGN_FRAME(map[i].dev_bus_addr >> PAGE_SHIFT));
421 #endif
422 seg[i].buf = map[i].dev_bus_addr |
423 (req->seg[i].first_sect << 9);
424 }
426 if (vbd_translate(&preq, blkif, operation) != 0) {
427 DPRINTK("access denied: %s of [%llu,%llu] on dev=%04x\n",
428 operation == READ ? "read" : "write",
429 preq.sector_number,
430 preq.sector_number + preq.nr_sects, preq.dev);
431 goto fail_flush;
432 }
434 for (i = 0; i < nseg; i++) {
435 if (((int)preq.sector_number|(int)seg[i].nsec) &
436 ((bdev_hardsect_size(preq.bdev) >> 9) - 1)) {
437 DPRINTK("Misaligned I/O request from domain %d",
438 blkif->domid);
439 goto fail_put_bio;
440 }
442 while ((bio == NULL) ||
443 (bio_add_page(bio,
444 virt_to_page(vaddr(pending_req, i)),
445 seg[i].nsec << 9,
446 seg[i].buf & ~PAGE_MASK) == 0)) {
447 bio = biolist[nbio++] = bio_alloc(GFP_KERNEL, nseg-i);
448 if (unlikely(bio == NULL))
449 goto fail_put_bio;
451 bio->bi_bdev = preq.bdev;
452 bio->bi_private = pending_req;
453 bio->bi_end_io = end_block_io_op;
454 bio->bi_sector = preq.sector_number;
455 }
457 preq.sector_number += seg[i].nsec;
458 }
460 plug_queue(blkif, bio);
461 atomic_set(&pending_req->pendcnt, nbio);
462 blkif_get(blkif);
464 for (i = 0; i < nbio; i++)
465 submit_bio(operation, biolist[i]);
467 return;
469 fail_put_bio:
470 for (i = 0; i < (nbio-1); i++)
471 bio_put(biolist[i]);
472 fail_flush:
473 fast_flush_area(pending_req);
474 fail_response:
475 make_response(blkif, req->id, req->operation, BLKIF_RSP_ERROR);
476 free_req(pending_req);
477 }
481 /******************************************************************
482 * MISCELLANEOUS SETUP / TEARDOWN / DEBUGGING
483 */
486 static void make_response(blkif_t *blkif, unsigned long id,
487 unsigned short op, int st)
488 {
489 blkif_response_t *resp;
490 unsigned long flags;
491 blkif_back_ring_t *blk_ring = &blkif->blk_ring;
492 int more_to_do = 0;
493 int notify;
495 spin_lock_irqsave(&blkif->blk_ring_lock, flags);
497 /* Place on the response ring for the relevant domain. */
498 resp = RING_GET_RESPONSE(blk_ring, blk_ring->rsp_prod_pvt);
499 resp->id = id;
500 resp->operation = op;
501 resp->status = st;
502 blk_ring->rsp_prod_pvt++;
503 RING_PUSH_RESPONSES_AND_CHECK_NOTIFY(blk_ring, notify);
505 if (blk_ring->rsp_prod_pvt == blk_ring->req_cons) {
506 /*
507 * Tail check for pending requests. Allows frontend to avoid
508 * notifications if requests are already in flight (lower
509 * overheads and promotes batching).
510 */
511 RING_FINAL_CHECK_FOR_REQUESTS(blk_ring, more_to_do);
513 } else if (RING_HAS_UNCONSUMED_REQUESTS(blk_ring)) {
514 more_to_do = 1;
516 }
517 spin_unlock_irqrestore(&blkif->blk_ring_lock, flags);
519 if (more_to_do) {
520 atomic_inc(&blkif->io_pending);
521 wake_up(&blkif->wq);
522 }
523 if (notify)
524 notify_remote_via_irq(blkif->irq);
525 }
527 static int __init blkif_init(void)
528 {
529 struct page *page;
530 int i;
532 if (xen_init() < 0)
533 return -ENODEV;
535 mmap_pages = blkif_reqs * BLKIF_MAX_SEGMENTS_PER_REQUEST;
536 pending_reqs = kmalloc(sizeof(pending_reqs[0]) *
537 blkif_reqs, GFP_KERNEL);
538 pending_grant_handles = kmalloc(sizeof(pending_grant_handles[0]) *
539 mmap_pages, GFP_KERNEL);
540 pending_vaddrs = kmalloc(sizeof(pending_vaddrs[0]) *
541 mmap_pages, GFP_KERNEL);
542 if (!pending_reqs || !pending_grant_handles || !pending_vaddrs) {
543 kfree(pending_reqs);
544 kfree(pending_grant_handles);
545 kfree(pending_vaddrs);
546 printk("%s: out of memory\n", __FUNCTION__);
547 return -ENOMEM;
548 }
550 blkif_interface_init();
552 #ifdef __ia64__
553 extern unsigned long alloc_empty_foreign_map_page_range(
554 unsigned long pages);
555 mmap_vstart = (unsigned long)
556 alloc_empty_foreign_map_page_range(mmap_pages);
557 #else /* ! ia64 */
558 page = balloon_alloc_empty_page_range(mmap_pages);
559 BUG_ON(page == NULL);
560 mmap_vstart = (unsigned long)pfn_to_kaddr(page_to_pfn(page));
561 #endif
562 printk("%s: reqs=%d, pages=%d, mmap_vstart=0x%lx\n",
563 __FUNCTION__, blkif_reqs, mmap_pages, mmap_vstart);
564 BUG_ON(mmap_vstart == 0);
565 for (i = 0; i < mmap_pages; i++) {
566 pending_vaddrs[i] = mmap_vstart + (i << PAGE_SHIFT);
567 pending_grant_handles[i] = BLKBACK_INVALID_HANDLE;
568 }
570 memset(pending_reqs, 0, sizeof(pending_reqs));
571 INIT_LIST_HEAD(&pending_free);
573 for (i = 0; i < blkif_reqs; i++)
574 list_add_tail(&pending_reqs[i].free_list, &pending_free);
576 blkif_xenbus_init();
577 return 0;
578 }
580 __initcall(blkif_init);
582 /*
583 * Local variables:
584 * c-file-style: "linux"
585 * indent-tabs-mode: t
586 * c-indent-level: 8
587 * c-basic-offset: 8
588 * tab-width: 8
589 * End:
590 */