ia64/xen-unstable

view linux-2.6-xen-sparse/drivers/xen/blkback/blkback.c @ 10038:60f7b567bb2b

Simply do not declare module_exit() handlers for netback/blkback, rather
than declaring the modules unsafe.
Signed-off-by: Keir Fraser <keir@xensource.com>
author kaf24@firebug.cl.cam.ac.uk
date Wed May 10 16:47:00 2006 +0100 (2006-05-10)
parents 9ffa49fe58ab
children 48c0f5489d44
line source
1 /******************************************************************************
2 * arch/xen/drivers/blkif/backend/main.c
3 *
4 * Back-end of the driver for virtual block devices. This portion of the
5 * driver exports a 'unified' block-device interface that can be accessed
6 * by any operating system that implements a compatible front end. A
7 * reference front-end implementation can be found in:
8 * arch/xen/drivers/blkif/frontend
9 *
10 * Copyright (c) 2003-2004, Keir Fraser & Steve Hand
11 * Copyright (c) 2005, Christopher Clark
12 *
13 * This program is free software; you can redistribute it and/or
14 * modify it under the terms of the GNU General Public License version 2
15 * as published by the Free Software Foundation; or, when distributed
16 * separately from the Linux kernel or incorporated into other
17 * software packages, subject to the following license:
18 *
19 * Permission is hereby granted, free of charge, to any person obtaining a copy
20 * of this source file (the "Software"), to deal in the Software without
21 * restriction, including without limitation the rights to use, copy, modify,
22 * merge, publish, distribute, sublicense, and/or sell copies of the Software,
23 * and to permit persons to whom the Software is furnished to do so, subject to
24 * the following conditions:
25 *
26 * The above copyright notice and this permission notice shall be included in
27 * all copies or substantial portions of the Software.
28 *
29 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
30 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
31 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
32 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
33 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
34 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
35 * IN THE SOFTWARE.
36 */
38 #include <linux/spinlock.h>
39 #include <linux/kthread.h>
40 #include <linux/list.h>
41 #include <xen/balloon.h>
42 #include <asm/hypervisor.h>
43 #include "common.h"
45 /*
46 * These are rather arbitrary. They are fairly large because adjacent requests
47 * pulled from a communication ring are quite likely to end up being part of
48 * the same scatter/gather request at the disc.
49 *
50 * ** TRY INCREASING 'blkif_reqs' IF WRITE SPEEDS SEEM TOO LOW **
51 *
52 * This will increase the chances of being able to write whole tracks.
53 * 64 should be enough to keep us competitive with Linux.
54 */
55 static int blkif_reqs = 64;
56 module_param_named(reqs, blkif_reqs, int, 0);
57 MODULE_PARM_DESC(reqs, "Number of blkback requests to allocate");
59 static int mmap_pages;
61 /* Run-time switchable: /sys/module/blkback/parameters/ */
62 static unsigned int log_stats = 0;
63 static unsigned int debug_lvl = 0;
64 module_param(log_stats, int, 0644);
65 module_param(debug_lvl, int, 0644);
67 /*
68 * Each outstanding request that we've passed to the lower device layers has a
69 * 'pending_req' allocated to it. Each buffer_head that completes decrements
70 * the pendcnt towards zero. When it hits zero, the specified domain has a
71 * response queued for it, with the saved 'id' passed back.
72 */
73 typedef struct {
74 blkif_t *blkif;
75 unsigned long id;
76 int nr_pages;
77 atomic_t pendcnt;
78 unsigned short operation;
79 int status;
80 struct list_head free_list;
81 } pending_req_t;
83 static pending_req_t *pending_reqs;
84 static struct list_head pending_free;
85 static spinlock_t pending_free_lock = SPIN_LOCK_UNLOCKED;
86 static DECLARE_WAIT_QUEUE_HEAD(pending_free_wq);
88 #define BLKBACK_INVALID_HANDLE (~0)
90 static unsigned long mmap_vstart;
91 static unsigned long *pending_vaddrs;
92 static grant_handle_t *pending_grant_handles;
94 static inline int vaddr_pagenr(pending_req_t *req, int seg)
95 {
96 return (req - pending_reqs) * BLKIF_MAX_SEGMENTS_PER_REQUEST + seg;
97 }
99 static inline unsigned long vaddr(pending_req_t *req, int seg)
100 {
101 return pending_vaddrs[vaddr_pagenr(req, seg)];
102 }
104 #define pending_handle(_req, _seg) \
105 (pending_grant_handles[vaddr_pagenr(_req, _seg)])
108 #ifdef CONFIG_XEN_BLKDEV_TAP_BE
109 /*
110 * If the tap driver is used, we may get pages belonging to either the tap
111 * or (more likely) the real frontend. The backend must specify which domain
112 * a given page belongs to in update_va_mapping though. For the moment,
113 * the tap rewrites the ID field of the request to contain the request index
114 * and the id of the real front end domain.
115 */
116 #define BLKTAP_COOKIE 0xbeadfeed
117 static inline domid_t ID_TO_DOM(unsigned long id) { return (id >> 16); }
118 #endif
120 static int do_block_io_op(blkif_t *blkif);
121 static void dispatch_rw_block_io(blkif_t *blkif,
122 blkif_request_t *req,
123 pending_req_t *pending_req);
124 static void make_response(blkif_t *blkif, unsigned long id,
125 unsigned short op, int st);
127 /******************************************************************
128 * misc small helpers
129 */
130 static pending_req_t* alloc_req(void)
131 {
132 pending_req_t *req = NULL;
133 unsigned long flags;
135 spin_lock_irqsave(&pending_free_lock, flags);
136 if (!list_empty(&pending_free)) {
137 req = list_entry(pending_free.next, pending_req_t, free_list);
138 list_del(&req->free_list);
139 }
140 spin_unlock_irqrestore(&pending_free_lock, flags);
141 return req;
142 }
144 static void free_req(pending_req_t *req)
145 {
146 unsigned long flags;
147 int was_empty;
149 spin_lock_irqsave(&pending_free_lock, flags);
150 was_empty = list_empty(&pending_free);
151 list_add(&req->free_list, &pending_free);
152 spin_unlock_irqrestore(&pending_free_lock, flags);
153 if (was_empty)
154 wake_up(&pending_free_wq);
155 }
157 static void unplug_queue(blkif_t *blkif)
158 {
159 if (blkif->plug == NULL)
160 return;
161 if (blkif->plug->unplug_fn)
162 blkif->plug->unplug_fn(blkif->plug);
163 blk_put_queue(blkif->plug);
164 blkif->plug = NULL;
165 }
167 static void plug_queue(blkif_t *blkif, struct bio *bio)
168 {
169 request_queue_t *q = bdev_get_queue(bio->bi_bdev);
171 if (q == blkif->plug)
172 return;
173 unplug_queue(blkif);
174 blk_get_queue(q);
175 blkif->plug = q;
176 }
178 static void fast_flush_area(pending_req_t *req)
179 {
180 struct gnttab_unmap_grant_ref unmap[BLKIF_MAX_SEGMENTS_PER_REQUEST];
181 unsigned int i, invcount = 0;
182 grant_handle_t handle;
183 int ret;
185 for (i = 0; i < req->nr_pages; i++) {
186 handle = pending_handle(req, i);
187 if (handle == BLKBACK_INVALID_HANDLE)
188 continue;
189 gnttab_set_unmap_op(&unmap[i], vaddr(req, i), GNTMAP_host_map,
190 handle);
191 pending_handle(req, i) = BLKBACK_INVALID_HANDLE;
192 invcount++;
193 }
195 ret = HYPERVISOR_grant_table_op(
196 GNTTABOP_unmap_grant_ref, unmap, invcount);
197 BUG_ON(ret);
198 }
200 /******************************************************************
201 * SCHEDULER FUNCTIONS
202 */
204 static void print_stats(blkif_t *blkif)
205 {
206 printk(KERN_DEBUG "%s: oo %3d | rd %4d | wr %4d\n",
207 current->comm, blkif->st_oo_req,
208 blkif->st_rd_req, blkif->st_wr_req);
209 blkif->st_print = jiffies + msecs_to_jiffies(10 * 1000);
210 blkif->st_rd_req = 0;
211 blkif->st_wr_req = 0;
212 blkif->st_oo_req = 0;
213 }
215 int blkif_schedule(void *arg)
216 {
217 blkif_t *blkif = arg;
219 blkif_get(blkif);
221 if (debug_lvl)
222 printk(KERN_DEBUG "%s: started\n", current->comm);
224 while (!kthread_should_stop()) {
225 wait_event_interruptible(
226 blkif->wq,
227 blkif->waiting_reqs || kthread_should_stop());
228 wait_event_interruptible(
229 pending_free_wq,
230 !list_empty(&pending_free) || kthread_should_stop());
232 blkif->waiting_reqs = 0;
233 smp_mb(); /* clear flag *before* checking for work */
235 if (do_block_io_op(blkif))
236 blkif->waiting_reqs = 1;
237 unplug_queue(blkif);
239 if (log_stats && time_after(jiffies, blkif->st_print))
240 print_stats(blkif);
241 }
243 if (log_stats)
244 print_stats(blkif);
245 if (debug_lvl)
246 printk(KERN_DEBUG "%s: exiting\n", current->comm);
248 blkif->xenblkd = NULL;
249 blkif_put(blkif);
251 return 0;
252 }
254 /******************************************************************
255 * COMPLETION CALLBACK -- Called as bh->b_end_io()
256 */
258 static void __end_block_io_op(pending_req_t *pending_req, int uptodate)
259 {
260 /* An error fails the entire request. */
261 if (!uptodate) {
262 DPRINTK("Buffer not up-to-date at end of operation\n");
263 pending_req->status = BLKIF_RSP_ERROR;
264 }
266 if (atomic_dec_and_test(&pending_req->pendcnt)) {
267 fast_flush_area(pending_req);
268 make_response(pending_req->blkif, pending_req->id,
269 pending_req->operation, pending_req->status);
270 blkif_put(pending_req->blkif);
271 free_req(pending_req);
272 }
273 }
275 static int end_block_io_op(struct bio *bio, unsigned int done, int error)
276 {
277 if (bio->bi_size != 0)
278 return 1;
279 __end_block_io_op(bio->bi_private, !error);
280 bio_put(bio);
281 return error;
282 }
285 /******************************************************************************
286 * NOTIFICATION FROM GUEST OS.
287 */
289 static void blkif_notify_work(blkif_t *blkif)
290 {
291 blkif->waiting_reqs = 1;
292 wake_up(&blkif->wq);
293 }
295 irqreturn_t blkif_be_int(int irq, void *dev_id, struct pt_regs *regs)
296 {
297 blkif_notify_work(dev_id);
298 return IRQ_HANDLED;
299 }
303 /******************************************************************
304 * DOWNWARD CALLS -- These interface with the block-device layer proper.
305 */
307 static int do_block_io_op(blkif_t *blkif)
308 {
309 blkif_back_ring_t *blk_ring = &blkif->blk_ring;
310 blkif_request_t *req;
311 pending_req_t *pending_req;
312 RING_IDX rc, rp;
313 int more_to_do = 0;
315 rc = blk_ring->req_cons;
316 rp = blk_ring->sring->req_prod;
317 rmb(); /* Ensure we see queued requests up to 'rp'. */
319 while ((rc != rp) && !RING_REQUEST_CONS_OVERFLOW(blk_ring, rc)) {
321 pending_req = alloc_req();
322 if (NULL == pending_req) {
323 blkif->st_oo_req++;
324 more_to_do = 1;
325 break;
326 }
328 req = RING_GET_REQUEST(blk_ring, rc);
329 blk_ring->req_cons = ++rc; /* before make_response() */
331 switch (req->operation) {
332 case BLKIF_OP_READ:
333 blkif->st_rd_req++;
334 dispatch_rw_block_io(blkif, req, pending_req);
335 break;
336 case BLKIF_OP_WRITE:
337 blkif->st_wr_req++;
338 dispatch_rw_block_io(blkif, req, pending_req);
339 break;
340 default:
341 DPRINTK("error: unknown block io operation [%d]\n",
342 req->operation);
343 make_response(blkif, req->id, req->operation,
344 BLKIF_RSP_ERROR);
345 free_req(pending_req);
346 break;
347 }
348 }
349 return more_to_do;
350 }
352 static void dispatch_rw_block_io(blkif_t *blkif,
353 blkif_request_t *req,
354 pending_req_t *pending_req)
355 {
356 extern void ll_rw_block(int rw, int nr, struct buffer_head * bhs[]);
357 int operation = (req->operation == BLKIF_OP_WRITE) ? WRITE : READ;
358 struct gnttab_map_grant_ref map[BLKIF_MAX_SEGMENTS_PER_REQUEST];
359 struct phys_req preq;
360 struct {
361 unsigned long buf; unsigned int nsec;
362 } seg[BLKIF_MAX_SEGMENTS_PER_REQUEST];
363 unsigned int nseg;
364 struct bio *bio = NULL, *biolist[BLKIF_MAX_SEGMENTS_PER_REQUEST];
365 int ret, i, nbio = 0;
367 /* Check that number of segments is sane. */
368 nseg = req->nr_segments;
369 if (unlikely(nseg == 0) ||
370 unlikely(nseg > BLKIF_MAX_SEGMENTS_PER_REQUEST)) {
371 DPRINTK("Bad number of segments in request (%d)\n", nseg);
372 goto fail_response;
373 }
375 preq.dev = req->handle;
376 preq.sector_number = req->sector_number;
377 preq.nr_sects = 0;
379 pending_req->blkif = blkif;
380 pending_req->id = req->id;
381 pending_req->operation = operation;
382 pending_req->status = BLKIF_RSP_OKAY;
383 pending_req->nr_pages = nseg;
385 for (i = 0; i < nseg; i++) {
386 uint32_t flags;
388 seg[i].nsec = req->seg[i].last_sect -
389 req->seg[i].first_sect + 1;
391 if ((req->seg[i].last_sect >= (PAGE_SIZE >> 9)) ||
392 (seg[i].nsec <= 0))
393 goto fail_response;
394 preq.nr_sects += seg[i].nsec;
396 flags = GNTMAP_host_map;
397 if ( operation == WRITE )
398 flags |= GNTMAP_readonly;
399 gnttab_set_map_op(&map[i], vaddr(pending_req, i), flags,
400 req->seg[i].gref, blkif->domid);
401 }
403 ret = HYPERVISOR_grant_table_op(GNTTABOP_map_grant_ref, map, nseg);
404 BUG_ON(ret);
406 for (i = 0; i < nseg; i++) {
407 if (unlikely(map[i].status != 0)) {
408 DPRINTK("invalid buffer -- could not remap it\n");
409 goto fail_flush;
410 }
412 pending_handle(pending_req, i) = map[i].handle;
413 #ifdef CONFIG_XEN_IA64_DOM0_NON_VP
414 pending_vaddrs[vaddr_pagenr(pending_req, i)] =
415 (unsigned long)gnttab_map_vaddr(map[i]);
416 #else
417 set_phys_to_machine(__pa(vaddr(
418 pending_req, i)) >> PAGE_SHIFT,
419 FOREIGN_FRAME(map[i].dev_bus_addr >> PAGE_SHIFT));
420 #endif
421 seg[i].buf = map[i].dev_bus_addr |
422 (req->seg[i].first_sect << 9);
423 }
425 if (vbd_translate(&preq, blkif, operation) != 0) {
426 DPRINTK("access denied: %s of [%llu,%llu] on dev=%04x\n",
427 operation == READ ? "read" : "write",
428 preq.sector_number,
429 preq.sector_number + preq.nr_sects, preq.dev);
430 goto fail_flush;
431 }
433 for (i = 0; i < nseg; i++) {
434 if (((int)preq.sector_number|(int)seg[i].nsec) &
435 ((bdev_hardsect_size(preq.bdev) >> 9) - 1)) {
436 DPRINTK("Misaligned I/O request from domain %d",
437 blkif->domid);
438 goto fail_put_bio;
439 }
441 while ((bio == NULL) ||
442 (bio_add_page(bio,
443 virt_to_page(vaddr(pending_req, i)),
444 seg[i].nsec << 9,
445 seg[i].buf & ~PAGE_MASK) == 0)) {
446 bio = biolist[nbio++] = bio_alloc(GFP_KERNEL, nseg-i);
447 if (unlikely(bio == NULL))
448 goto fail_put_bio;
450 bio->bi_bdev = preq.bdev;
451 bio->bi_private = pending_req;
452 bio->bi_end_io = end_block_io_op;
453 bio->bi_sector = preq.sector_number;
454 }
456 preq.sector_number += seg[i].nsec;
457 }
459 plug_queue(blkif, bio);
460 atomic_set(&pending_req->pendcnt, nbio);
461 blkif_get(blkif);
463 for (i = 0; i < nbio; i++)
464 submit_bio(operation, biolist[i]);
466 return;
468 fail_put_bio:
469 for (i = 0; i < (nbio-1); i++)
470 bio_put(biolist[i]);
471 fail_flush:
472 fast_flush_area(pending_req);
473 fail_response:
474 make_response(blkif, req->id, req->operation, BLKIF_RSP_ERROR);
475 free_req(pending_req);
476 }
480 /******************************************************************
481 * MISCELLANEOUS SETUP / TEARDOWN / DEBUGGING
482 */
485 static void make_response(blkif_t *blkif, unsigned long id,
486 unsigned short op, int st)
487 {
488 blkif_response_t *resp;
489 unsigned long flags;
490 blkif_back_ring_t *blk_ring = &blkif->blk_ring;
491 int more_to_do = 0;
492 int notify;
494 spin_lock_irqsave(&blkif->blk_ring_lock, flags);
496 /* Place on the response ring for the relevant domain. */
497 resp = RING_GET_RESPONSE(blk_ring, blk_ring->rsp_prod_pvt);
498 resp->id = id;
499 resp->operation = op;
500 resp->status = st;
501 blk_ring->rsp_prod_pvt++;
502 RING_PUSH_RESPONSES_AND_CHECK_NOTIFY(blk_ring, notify);
504 if (blk_ring->rsp_prod_pvt == blk_ring->req_cons) {
505 /*
506 * Tail check for pending requests. Allows frontend to avoid
507 * notifications if requests are already in flight (lower
508 * overheads and promotes batching).
509 */
510 RING_FINAL_CHECK_FOR_REQUESTS(blk_ring, more_to_do);
512 } else if (RING_HAS_UNCONSUMED_REQUESTS(blk_ring)) {
513 more_to_do = 1;
515 }
516 spin_unlock_irqrestore(&blkif->blk_ring_lock, flags);
518 if (more_to_do)
519 blkif_notify_work(blkif);
520 if (notify)
521 notify_remote_via_irq(blkif->irq);
522 }
524 static int __init blkif_init(void)
525 {
526 struct page *page;
527 int i;
529 if (xen_init() < 0)
530 return -ENODEV;
532 mmap_pages = blkif_reqs * BLKIF_MAX_SEGMENTS_PER_REQUEST;
533 pending_reqs = kmalloc(sizeof(pending_reqs[0]) *
534 blkif_reqs, GFP_KERNEL);
535 pending_grant_handles = kmalloc(sizeof(pending_grant_handles[0]) *
536 mmap_pages, GFP_KERNEL);
537 pending_vaddrs = kmalloc(sizeof(pending_vaddrs[0]) *
538 mmap_pages, GFP_KERNEL);
539 if (!pending_reqs || !pending_grant_handles || !pending_vaddrs) {
540 kfree(pending_reqs);
541 kfree(pending_grant_handles);
542 kfree(pending_vaddrs);
543 printk("%s: out of memory\n", __FUNCTION__);
544 return -ENOMEM;
545 }
547 blkif_interface_init();
549 #ifdef CONFIG_XEN_IA64_DOM0_NON_VP
550 extern unsigned long alloc_empty_foreign_map_page_range(
551 unsigned long pages);
552 mmap_vstart = (unsigned long)
553 alloc_empty_foreign_map_page_range(mmap_pages);
554 #else /* ! ia64 */
555 page = balloon_alloc_empty_page_range(mmap_pages);
556 BUG_ON(page == NULL);
557 mmap_vstart = (unsigned long)pfn_to_kaddr(page_to_pfn(page));
558 #endif
559 printk("%s: reqs=%d, pages=%d, mmap_vstart=0x%lx\n",
560 __FUNCTION__, blkif_reqs, mmap_pages, mmap_vstart);
561 BUG_ON(mmap_vstart == 0);
562 for (i = 0; i < mmap_pages; i++) {
563 pending_vaddrs[i] = mmap_vstart + (i << PAGE_SHIFT);
564 pending_grant_handles[i] = BLKBACK_INVALID_HANDLE;
565 }
567 memset(pending_reqs, 0, sizeof(pending_reqs));
568 INIT_LIST_HEAD(&pending_free);
570 for (i = 0; i < blkif_reqs; i++)
571 list_add_tail(&pending_reqs[i].free_list, &pending_free);
573 blkif_xenbus_init();
575 return 0;
576 }
578 module_init(blkif_init);
580 MODULE_LICENSE("Dual BSD/GPL");
582 /*
583 * Local variables:
584 * c-file-style: "linux"
585 * indent-tabs-mode: t
586 * c-indent-level: 8
587 * c-basic-offset: 8
588 * tab-width: 8
589 * End:
590 */